Files

236 lines
7.3 KiB
Python
Raw Permalink Normal View History

# Copyright (c) 2016-2017 Tom Zander <tomz@freedommail.ch>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import codecs
2017-01-17 10:27:25 +01:00
class CMF_ValueType:
2017-01-17 10:27:25 +01:00
# var-int-encoded (between 1 and 9 bytes in length). Per definition a
# positive number.
PositiveNumber = 0
# var-int-encoded (between 1 and 9 bytes in length). Per definition a
# negative number.
NegativeNumber = 1
# first an UnsignedNumber for the length, then the actual bytes. Never a
# closing zero. Utf8 encoded.
String = 2
2017-01-16 16:50:05 -05:00
ByteArray = 3 # identical to String, but without encoding.
BoolTrue = 4 # not followed with any bytes
BoolFalse = 5 # not followed with any bytes
2017-01-17 10:27:25 +01:00
def serialize(data, offset, value):
pos = 0
2017-01-16 17:01:30 -05:00
while True:
mask = 0
2017-01-16 17:01:30 -05:00
if pos != 0:
mask = 0x80
data[pos + offset] = (value & 0x7F) | mask
2017-01-16 17:01:30 -05:00
if value <= 0x7F:
break
value = (value >> 7) - 1
2017-01-16 17:01:30 -05:00
pos += 1
# reverse
i = int(pos / 2)
2017-01-16 17:01:30 -05:00
while i >= 0:
byte = data[pos + offset - i]
data[pos + offset - i] = data[i + offset]
data[i + offset] = byte
2017-01-16 17:01:30 -05:00
i -= 1
return pos + 1
def unserialize(data, dataSize, position):
assert(position >= 0)
result = 0
pos = position
2017-01-16 17:01:30 -05:00
while (pos - position) < 8:
byte = data[pos]
pos += 1
result = (result << 7) | (byte & 0x7F)
2017-01-16 17:01:30 -05:00
if (byte & 0x80) != 0:
result += 1
else:
position = pos
return position, result
raise Exception("Reading VarInt past stream-size")
2017-01-17 10:27:25 +01:00
2017-01-16 16:53:09 -05:00
def arraycopy(source, sourcePos, dest, destPos):
numElem = len(source)
while numElem > 0:
dest[destPos] = source[sourcePos]
numElem -= 1
destPos += 1
sourcePos += 1
"""
Message builder creates a generic message in the form of
name: value
pairs, in a flat list.
The unique part is that the value is typed and for variable-length structures
(like a string) a length is included.
The effect is that you can fully parse a structure without having any prior knowledge
of the fields, the expected content in the fields and the size.
You can compare this to an XML stream where some items are stored with tags or attributes
are unknown to the reader, without causing any effect on being able to parse them or to write
them out again unchanged.
"""
2017-01-17 10:27:25 +01:00
class MessageBuilder:
2017-01-17 10:27:25 +01:00
2017-01-16 17:01:30 -05:00
def __init__(self, buffer, position):
self.buffer = buffer
self.position = position
def add_int(self, tag, value):
2017-01-16 16:59:42 -05:00
if value >= 0:
vt = CMF_ValueType.PositiveNumber
else:
2017-01-16 16:59:42 -05:00
vt = CMF_ValueType.NegativeNumber
value *= -1
self.__write(tag, vt)
self.position += serialize(self.buffer, self.position, value)
# This method assumes that 'value' is already an utf8 encoded string.
def add_string(self, tag, value):
2017-01-16 16:59:42 -05:00
self.__write(tag, CMF_ValueType.String)
2017-01-17 10:27:25 +01:00
bytesData = codecs.encode(value, 'utf-8')
2017-01-16 13:36:50 +01:00
self.position += serialize(self.buffer, self.position, len(bytesData))
2017-01-16 16:53:09 -05:00
arraycopy(bytesData, 0, self.buffer, self.position)
2017-01-16 13:36:50 +01:00
self.position += len(bytesData)
def add_bytes(self, tag, value):
2017-01-16 16:59:42 -05:00
self.__write(tag, CMF_ValueType.ByteArray)
self.position += serialize(self.buffer, self.position, len(value))
2017-01-16 16:53:09 -05:00
arraycopy(value, 0, self.buffer, self.position)
self.position += len(value)
def add_bool(self, tag, value):
2017-01-16 16:59:42 -05:00
if value:
self.__write(tag, CMF_ValueType.BoolTrue)
else:
self.__write(tag, CMF_ValueType.BoolFalse)
def get_position(self):
return self.position
def __write(self, tag, type):
2017-01-17 10:27:25 +01:00
if tag >= 31: # use more than 1 byte
byte = type | 0xF8 # set the 'tag' to all 1s
self.buffer[self.position] = byte
self.position += serialize(self.buffer, self.position + 1, tag) + 1
return
assert(tag < 32)
byte = tag
byte = byte << 3
byte += type
self.buffer[self.position] = byte
self.position = self.position + 1
2017-01-17 10:27:25 +01:00
class MessageParser(object):
2017-01-17 10:27:25 +01:00
def __init__(self, data, position, length):
self.data = data
self.position = position
self.endPosition = position + length
self.tag = -1
self.valueState = MessageParser.Lazy.ValueParsed
self.dataStart = -1
self.dataLength = -1
class Type:
FoundTag = 0
EndOfDocument = 1
Error = 3
class Lazy:
ValueParsed = 0
ByteArray = 1
String = 1
def next(self):
2017-01-16 17:01:30 -05:00
if self.endPosition <= self.position:
return MessageParser.Type.EndOfDocument
byte = self.data[self.position]
data_type = (byte & 0x07)
self.tag = byte >> 3
2017-01-16 17:01:30 -05:00
if self.tag == 31: # the tag is stored in the next byte(s)
newTag = 0
self.position += 1
2017-01-17 10:27:25 +01:00
self.position, newTag = unserialize(
self.data, self.endPosition, self.position)
ok = True
2017-01-16 17:01:30 -05:00
if ok and newTag > 0xFFFF:
ok = False
self.position -= 1
2017-01-16 17:01:30 -05:00
if not ok:
return MessageParser.Type.Error
self.tag = newTag
value = 0
2017-01-16 16:59:42 -05:00
if data_type in [CMF_ValueType.PositiveNumber, CMF_ValueType.NegativeNumber]:
2017-01-17 10:27:25 +01:00
self.position, value = unserialize(
self.data, self.endPosition, self.position + 1)
2017-01-16 16:59:42 -05:00
if data_type == CMF_ValueType.NegativeNumber:
value *= -1
self.value = value
2017-01-16 16:59:42 -05:00
elif data_type in [CMF_ValueType.String, CMF_ValueType.ByteArray]:
newPos = self.position + 1
newPos, value = unserialize(self.data, self.endPosition, newPos)
2017-01-17 10:27:25 +01:00
if newPos + value > len(self.data): # need more bytes
return MessageParser.Type.Error
2017-01-16 16:59:42 -05:00
if data_type == CMF_ValueType.ByteArray:
self.valueState = MessageParser.Lazy.ByteArray
else:
self.valueState = MessageParser.Lazy.String
self.dataStart = newPos
self.dataLength = value
self.position = newPos + value
2017-01-16 16:59:42 -05:00
elif data_type == CMF_ValueType.BoolTrue:
value = True
self.position += 1
2017-01-16 16:59:42 -05:00
elif data_type == CMF_ValueType.BoolFalse:
value = False
self.position += 1
else:
return MessageParser.Type.Error
self.value = value
return MessageParser.Type.FoundTag
def string_value(self):
2017-01-16 16:59:42 -05:00
if self.valueState in [MessageParser.Lazy.ByteArray, MessageParser.Lazy.String]:
return self.data[self.dataStart:self.dataStart + self.dataLength]
return self.value
def consumed(self):
return self.position
def consume(self, num_bytes):
assert(num_bytes >= 0)
self.position += num_bytes