Skip to content

Commit a97ba25

Browse files
Add support for unicode characters
1 parent dce101a commit a97ba25

File tree

2 files changed

+28
-20
lines changed

2 files changed

+28
-20
lines changed

nrepl/bencode.py

+22-20
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from io import StringIO
1818

1919
import sys
20+
from io import BytesIO
21+
import array
2022

2123
# Some code so we can use different features without worrying about versions.
2224
PY2 = sys.version_info[0] == 2
@@ -42,11 +44,11 @@ def _read_int(s, terminator=None, init_data=None):
4244
break
4345
else:
4446
int_chrs.append(c)
45-
return int(''.join(int_chrs))
47+
return int(b''.join(int_chrs))
4648

4749

4850
def _read_bytes(s, n):
49-
data = StringIO('')
51+
data = BytesIO()
5052
cnt = 0
5153
while cnt < n:
5254
m = s.read(n - cnt)
@@ -85,17 +87,17 @@ def _read_map(s):
8587
return dict(zip(i, i))
8688

8789

88-
_read_fns = {"i": _read_int,
89-
"l": _read_list,
90-
"d": _read_map,
91-
"e": lambda _: None,
90+
_read_fns = {b"i": _read_int,
91+
b"l": _read_list,
92+
b"d": _read_map,
93+
b"e": lambda _: None,
9294
# EOF
9395
None: lambda _: None}
9496

9597

9698
def _read_datum(s):
9799
delim = _read_delimiter(s)
98-
if delim is not '':
100+
if delim is not b'':
99101
return _read_fns.get(delim, lambda s: _read_bytes(s, delim))(s)
100102

101103

@@ -104,32 +106,32 @@ def _write_datum(x, out):
104106
# x = x.encode("UTF-8")
105107
# TODO revisit encodings, this is surely not right. Python
106108
# (2.x, anyway) conflates bytes and strings, but 3.x does not...
107-
out.write(str(len(x)))
108-
out.write(":")
109-
out.write(x)
109+
out.write(str(len(x.encode('utf-8'))).encode('utf-8'))
110+
out.write(b":")
111+
out.write(x.encode('utf-8'))
110112
elif isinstance(x, int):
111-
out.write("i")
112-
out.write(str(x))
113-
out.write("e")
113+
out.write(b"i")
114+
out.write(str(x).encode('utf-8'))
115+
out.write(b"e")
114116
elif isinstance(x, (list, tuple)):
115-
out.write("l")
117+
out.write(b"l")
116118
for v in x:
117119
_write_datum(v, out)
118-
out.write("e")
120+
out.write(b"e")
119121
elif isinstance(x, dict):
120-
out.write("d")
122+
out.write(b"d")
121123
for k, v in x.items():
122124
_write_datum(k, out)
123125
_write_datum(v, out)
124-
out.write("e")
126+
out.write(b"e")
125127
out.flush()
126128

127129

128130
def encode(v):
129131
"bencodes the given value, may be a string, integer, list, or dict."
130-
s = StringIO()
132+
s = BytesIO()
131133
_write_datum(v, s)
132-
return s.getvalue()
134+
return s.getvalue().decode('utf-8')
133135

134136

135137
def decode_file(file):
@@ -142,7 +144,7 @@ def decode_file(file):
142144

143145
def decode(string):
144146
"Generator that yields decoded values from the input string."
145-
return decode_file(StringIO(string))
147+
return decode_file(BytesIO(string.encode('utf-8')))
146148

147149

148150
class BencodeIO(object):

test.py

+6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#!/usr/bin/env python
2+
# coding=utf-8
3+
24
import os, unittest, subprocess, re, signal, time
35
import nrepl
46
from collections import OrderedDict
@@ -29,6 +31,10 @@ def test_empty_string (self):
2931
self.assertEqual([['spam', '', 'a', 'ab']], list(decode('l4:spam0:1:a2:abe')))
3032
self.assertEqual([{'spam': ''}], list(decode('d4:spam0:e')))
3133

34+
def test_unicode_string(self):
35+
self.assertEqual([u'á'], list(decode(u'2:á')))
36+
self.assertEqual(u'2:á', encode(u'á'))
37+
3238
class REPLTest (unittest.TestCase):
3339
def setUp (self):
3440
# this here only to accommodate travis, which puts leiningen @ lein2

0 commit comments

Comments
 (0)