Skip to content

Commit

Permalink
binary and bits encode decode
Browse files Browse the repository at this point in the history
  • Loading branch information
kvakvs committed May 24, 2017
1 parent 1d082f2 commit 1029ec4
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 19 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ run:
$(PY) test1.py

test:
$(PY) test/dist_etf_encode_test.py && \
$(PY) test/dist_etf_decode_test.py
$(PY) test/dist_etf_decode_test.py && \
$(PY) test/dist_etf_transitive_test.py
77 changes: 65 additions & 12 deletions Pyrlang/Dist/etf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
ETF_VERSION_TAG = 131

TAG_NEW_FLOAT_EXT = 70
TAG_BIT_BINARY_EXT = 77
TAG_COMPRESSED = 80
TAG_SMALL_INT = 97
TAG_INT = 98
Expand All @@ -21,6 +22,7 @@
TAG_NIL_EXT = 106
TAG_STRING_EXT = 107
TAG_LIST_EXT = 108
TAG_BINARY_EXT = 109
TAG_NEW_REF_EXT = 114
TAG_MAP_EXT = 116

Expand All @@ -33,8 +35,11 @@ class ETFEncodeException(Exception):
pass


def incomplete_data():
raise ETFDecodeException("Incomplete data")
def incomplete_data(where=""):
if where:
raise ETFDecodeException("Incomplete data")
else:
raise ETFDecodeException("Incomplete data at " + where)


def binary_to_term(data: bytes):
Expand Down Expand Up @@ -67,11 +72,11 @@ def binary_to_term_2(data: bytes):
if tag == TAG_ATOM_EXT:
len_data = len(data)
if len_data < 3:
return incomplete_data()
return incomplete_data("decoding length for an atom name")

len_expected = util.u16(data, 1) + 3
if len_expected > len_data:
return incomplete_data()
return incomplete_data("decoding text for an atom")

name = data[3:len_expected]
if name == b'true':
Expand All @@ -89,7 +94,7 @@ def binary_to_term_2(data: bytes):
if tag == TAG_STRING_EXT:
len_data = len(data)
if len_data < 3:
return incomplete_data()
return incomplete_data("decoding length for a string")

len_expected = util.u16(data, 1) + 3

Expand All @@ -99,6 +104,8 @@ def binary_to_term_2(data: bytes):
return data[3:len_expected].decode("utf8"), data[len_expected:]

if tag == TAG_LIST_EXT:
if len(data) < 5:
return incomplete_data("decoding length for a list")
len_expected = util.u32(data, 1)
result = term.List()
tail = data[5:]
Expand All @@ -114,6 +121,8 @@ def binary_to_term_2(data: bytes):
return result, tail

if tag == TAG_SMALL_TUPLE_EXT:
if len(data) < 2:
return incomplete_data("decoding length for a small tuple")
len_expected = data[1]
result = []
tail = data[2:]
Expand All @@ -125,6 +134,8 @@ def binary_to_term_2(data: bytes):
return tuple(result), tail

if tag == TAG_LARGE_TUPLE_EXT:
if len(data) < 5:
return incomplete_data("decoding length for a large tuple")
len_expected = util.u32(data, 1)
result = []
tail = data[5:]
Expand All @@ -136,9 +147,13 @@ def binary_to_term_2(data: bytes):
return tuple(result), tail

if tag == TAG_SMALL_INT:
if len(data) < 2:
return incomplete_data("decoding a 8-bit small uint")
return data[1], data[2:]

if tag == TAG_INT:
if len(data) < 5:
return incomplete_data("decoding a 32-bit int")
return util.i32(data, 1), data[5:]

if tag == TAG_PID_EXT:
Expand All @@ -151,6 +166,8 @@ def binary_to_term_2(data: bytes):
return pid, tail[9:]

if tag == TAG_NEW_REF_EXT:
if len(data) < 2:
return incomplete_data("decoding length for a new-ref")
term_len = util.u16(data, 1)
node, tail = binary_to_term_2(data[3:])
creation = tail[0]
Expand All @@ -161,6 +178,8 @@ def binary_to_term_2(data: bytes):
return ref, tail[id_len + 1:]

if tag == TAG_MAP_EXT:
if len(data) < 5:
return incomplete_data("decoding length for a map")
len_expected = util.u32(data, 1)
result = {}
tail = data[5:]
Expand All @@ -172,8 +191,31 @@ def binary_to_term_2(data: bytes):

return result, tail

if tag == TAG_BINARY_EXT:
len_data = len(data)
if len_data < 5:
return incomplete_data("decoding length for a binary")
len_expected = util.u32(data, 1) + 5
if len_expected > len_data:
return incomplete_data("decoding data for a binary")

bin1 = term.Binary(data=data[5:len_expected])
return bin1, data[len_expected:]

if tag == TAG_BIT_BINARY_EXT:
len_data = len(data)
if len_data < 6:
return incomplete_data("decoding length for a bit-binary")
len_expected = util.u32(data, 1) + 6
lbb = data[5]
if len_expected > len_data:
return incomplete_data("decoding data for a bit-binary")

bin1 = term.Binary(data=data[6:len_expected], last_byte_bits=lbb)
return bin1, data[len_expected:]

if tag == TAG_NEW_FLOAT_EXT:
(result, ) = struct.unpack(">d", data[1:9])
(result,) = struct.unpack(">d", data[1:9])
return result, data[10:]

raise ETFDecodeException("Unknown tag %d" % data[0])
Expand Down Expand Up @@ -235,7 +277,7 @@ def _pack_atom(text: str) -> bytes:


# TODO: maybe move this into pid class
def _pack_pid(val: term.Pid) -> bytes:
def _pack_pid(val) -> bytes:
data = bytes([TAG_PID_EXT]) + \
term_to_binary_2(val.node_) + \
util.to_u32(val.id_) + \
Expand All @@ -245,7 +287,7 @@ def _pack_pid(val: term.Pid) -> bytes:


# TODO: maybe move this into ref class
def _pack_ref(val: term.Reference) -> bytes:
def _pack_ref(val) -> bytes:
data = bytes([TAG_NEW_REF_EXT]) + util.to_u16(len(val.id_) // 4) + \
term_to_binary_2(val.node_) + bytes([val.creation_]) + val.id_
return data
Expand All @@ -272,11 +314,16 @@ def _pack_float(val):
return bytes([TAG_NEW_FLOAT_EXT]) + struct.pack(">d", val)


def term_to_binary_2(val):
if type(val) == bytes:
return _pack_string(val)
def _pack_binary(data, last_byte_bits):
if last_byte_bits == 8:
return bytes([TAG_BINARY_EXT]) + util.to_u32(len(data)) + data

return bytes([TAG_BIT_BINARY_EXT]) + util.to_u32(len(data)) + \
bytes([last_byte_bits]) + data

elif type(val) == str:

def term_to_binary_2(val):
if type(val) == str:
return _pack_str(val)

elif type(val) == list:
Expand Down Expand Up @@ -309,6 +356,12 @@ def term_to_binary_2(val):
elif isinstance(val, term.Reference):
return _pack_ref(val)

elif type(val) == bytes:
return _pack_binary(val, 8)

elif isinstance(val, term.Binary):
return _pack_binary(val.bytes_, val.last_byte_bits_)

return term_to_binary_2(_serialize_object(val))
# obj_data = term_to_binary_2(_serialize_object(val))
# print(util.hex_bytes(obj_data))
Expand Down
11 changes: 8 additions & 3 deletions Pyrlang/Dist/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,14 @@ def _handle_socket_read(receiver, socket):
return _handle_connect_disconnect


def hex_bytes(s: bytes):
def hex_bytes(data: bytes, sep: str= " "):
""" Format a bytes() object as a hex dump """
return " ".join("{:02x}".format(c) for c in s)
return sep.join("{:02x}".format(bval) for bval in data)


def dec_bytes(data: bytes, sep: str= " "):
""" Format a bytes() object as a decimal dump """
return sep.join(str(bval) for bval in data)


def schedule(delay, func, *args, **kw_args):
Expand All @@ -92,6 +97,6 @@ def schedule(delay, func, *args, **kw_args):
gevent.spawn_later(delay, schedule, delay, func, *args, **kw_args)


__all__ = ['make_handler', 'hex_bytes', 'schedule',
__all__ = ['make_handler', 'hex_bytes', 'dec_bytes', 'schedule',
'u16', 'u32', 'i32',
'to_u16', 'to_u32', 'to_i32']
34 changes: 34 additions & 0 deletions Pyrlang/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from future.utils import python_2_unicode_compatible
from builtins import chr

from Pyrlang.Dist import util

ATOM_MARKER = "pyrlang.Atom"
PID_MARKER = "pyrlang.Pid"

Expand Down Expand Up @@ -108,3 +110,35 @@ def __repr__(self) -> str:

def __str__(self) -> str:
return self.__repr__()


class Binary:
""" Represents a bytes object, with last byte optionally incomplete.
Bit objects have last_byte_bits < 8
"""
def __init__(self, data: bytes, last_byte_bits: int = 8) -> None:
self.bytes_ = data
self.last_byte_bits_ = last_byte_bits

def __repr__(self) -> str:
lbb = self.last_byte_bits_
if lbb == 8:
return "<<%s>>" % util.dec_bytes(self.bytes_, ",")
else:
return "<<%s:%d>>" % (util.dec_bytes(self.bytes_, ","), lbb)

def __str__(self) -> str:
return self.__repr__()

def equals(self, other) -> bool:
return isinstance(other, Binary) \
and self.bytes_ == other.bytes_ \
and self.last_byte_bits_ == other.last_byte_bits_

__eq__ = equals

def __ne__(self, other):
return not self.equals(other)


__all__ = ['Atom', 'Pid', 'Binary', 'Reference']
2 changes: 1 addition & 1 deletion test/dist_etf_decode_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

sys.path.insert(0, '.')

from Pyrlang.Dist import etf, util
from Pyrlang.Dist import etf
from Pyrlang import term


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

sys.path.insert(0, '.')

from Pyrlang.Dist import etf, util
from Pyrlang.Dist import etf
from Pyrlang import term


Expand Down Expand Up @@ -44,6 +44,22 @@ def test_float(self):
self.assertEqual(val1, val2)
self.assertEqual(tail, b'')

def test_binary(self):
""" Encode and decode binary immediately and compare results """
data1 = bytes([131, 109, 0, 0, 0, 1, 34])
(val1, tail) = etf.binary_to_term(data1)
data2 = etf.term_to_binary(val1)
self.assertEqual(data1, data2)
self.assertEqual(tail, b'')

def test_binary_bits(self):
""" Encode and decode binary bits immediately and compare results """
data1 = bytes([131, 77, 0, 0, 0, 1, 2, 192])
(val1, tail) = etf.binary_to_term(data1)
data2 = etf.term_to_binary(val1)
self.assertEqual(data1, data2)
self.assertEqual(tail, b'')


if __name__ == '__main__':
unittest.main()

0 comments on commit 1029ec4

Please sign in to comment.