From cfbe4e48c8d12663bafcbf738a63b9ea94c2fc4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Tue, 11 Jul 2017 11:12:44 +0900 Subject: [PATCH 01/12] escaping of text nodes to prevent XSS --- json2html/jsonconv.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/json2html/jsonconv.py b/json2html/jsonconv.py index 2c33002..ebf85fe 100644 --- a/json2html/jsonconv.py +++ b/json2html/jsonconv.py @@ -17,7 +17,7 @@ -------- """ -import sys +import sys, cgi if sys.version_info[:2] < (2, 7): from ordereddict import OrderedDict @@ -34,7 +34,7 @@ text_types = [str] class Json2Html: - def convert(self, json="", table_attributes='border="1"', clubbing=True, encode=False): + def convert(self, json="", table_attributes='border="1"', clubbing=True, encode=False, escape=True): """ Convert JSON to HTML Table format """ @@ -42,6 +42,7 @@ def convert(self, json="", table_attributes='border="1"', clubbing=True, encode= # eg: table_attributes = 'class = "table table-bordered sortable"' self.table_init_markup = "" % table_attributes self.clubbing = clubbing + self.escape = escape json_input = None if not json: json_input = {} @@ -56,7 +57,7 @@ def convert(self, json="", table_attributes='border="1"', clubbing=True, encode= def column_headers_from_list_of_dicts(self, json_input): """ - This method is required to implement clubbing. + This method is required to implement clubbing. It tries to come up with column headers for your input """ if not json_input \ @@ -78,12 +79,15 @@ def convert_json_node(self, json_input): """ Dispatch JSON input according to the outermost type and process it to generate the super awesome HTML format. - We try to adhere to duck typing such that users can just pass all kinds - of funky objects to json2html that *behave* like dicts and lists and other + We try to adhere to duck typing such that users can just pass all kinds + of funky objects to json2html that *behave* like dicts and lists and other basic JSON types. """ if type(json_input) in text_types: - return text(json_input) + if self.escape: + return cgi.escape(text(json_input)) + else: + return text(json_input) if hasattr(json_input, 'items'): return self.convert_object(json_input) if hasattr(json_input, '__iter__') and hasattr(json_input, '__getitem__'): From 262c3148a9e8f2f3ba3aa9fa277fa50ba519aa3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Tue, 11 Jul 2017 11:13:28 +0900 Subject: [PATCH 02/12] small beauty fix, use tuples for text types --- json2html/jsonconv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/json2html/jsonconv.py b/json2html/jsonconv.py index ebf85fe..06d8aef 100644 --- a/json2html/jsonconv.py +++ b/json2html/jsonconv.py @@ -28,10 +28,10 @@ if sys.version_info[:2] < (3, 0): text = unicode - text_types = [unicode, str] + text_types = (unicode, str) else: text = str - text_types = [str] + text_types = (str,) class Json2Html: def convert(self, json="", table_attributes='border="1"', clubbing=True, encode=False, escape=True): From ef4efcfb22a6f12ba2d72d3f8ac70f4ace64b661 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Tue, 11 Jul 2017 11:18:46 +0900 Subject: [PATCH 03/12] added a test for xss - fails because we passed a non-json string. let's fix this first --- test/run_tests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/run_tests.py b/test/run_tests.py index 8dc210b..85c46c0 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -188,13 +188,19 @@ def test_none(self): u'' ) + def test_xss(self): + self.assertEqual( + json2html.convert(""), + u"<script></script>" + ) + def test_all(self): for test_definition in self.test_json: _json = test_definition['json'] _clubbing = "no_clubbing" not in test_definition['filename'] print("testing %s" %(test_definition['filename'])) self.assertEqual( - json2html.convert(json = _json, clubbing=_clubbing, encode=True), + json2html.convert(json=_json, clubbing=_clubbing, encode=True), test_definition['output'].encode('ascii', 'xmlcharrefreplace') ) #testing whether we can call convert with a positional args instead of keyword arg From 092d6e5f66e9017883493bdc47b05d5c9af58e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Tue, 11 Jul 2017 11:36:06 +0900 Subject: [PATCH 04/12] added additional tests, catched a json.loads exception where the user probably only wanted to pass a text node. rather edge-casey, but hey... --- json2html/jsonconv.py | 10 +++++++++- test/xss_check1.json | 3 +++ test/xss_check1.txt | 1 + test/xss_check2.json | 3 +++ test/xss_check2.txt | 1 + 5 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 test/xss_check1.json create mode 100644 test/xss_check1.txt create mode 100644 test/xss_check2.json create mode 100644 test/xss_check2.txt diff --git a/json2html/jsonconv.py b/json2html/jsonconv.py index 06d8aef..f932a30 100644 --- a/json2html/jsonconv.py +++ b/json2html/jsonconv.py @@ -47,7 +47,15 @@ def convert(self, json="", table_attributes='border="1"', clubbing=True, encode= if not json: json_input = {} elif type(json) in text_types: - json_input = json_parser.loads(json, object_pairs_hook=OrderedDict) + try: + json_input = json_parser.loads(json, object_pairs_hook=OrderedDict) + except ValueError as e: + #so the string passed here is actually not a json string + # - let's analyze whether we want to pass on the error or use the string as-is as a text node + if u"Expecting property name" in text(e): + #if this specific json loads error is raised, then the user probably actually wanted to pass json, but made a mistake + raise e + json_input = json else: json_input = json converted = self.convert_json_node(json_input) diff --git a/test/xss_check1.json b/test/xss_check1.json new file mode 100644 index 0000000..ec0b05d --- /dev/null +++ b/test/xss_check1.json @@ -0,0 +1,3 @@ +{ + "":"blub" +} \ No newline at end of file diff --git a/test/xss_check1.txt b/test/xss_check1.txt new file mode 100644 index 0000000..ebb4b21 --- /dev/null +++ b/test/xss_check1.txt @@ -0,0 +1 @@ +
<script></script>blub
\ No newline at end of file diff --git a/test/xss_check2.json b/test/xss_check2.json new file mode 100644 index 0000000..15b15a2 --- /dev/null +++ b/test/xss_check2.json @@ -0,0 +1,3 @@ +{ + "blub":"" +} \ No newline at end of file diff --git a/test/xss_check2.txt b/test/xss_check2.txt new file mode 100644 index 0000000..259978a --- /dev/null +++ b/test/xss_check2.txt @@ -0,0 +1 @@ +
blub<script></script>
\ No newline at end of file From d58ed8a068bbd45d2607b9b066fa631ffacfbc8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Tue, 11 Jul 2017 11:39:24 +0900 Subject: [PATCH 05/12] added testcase without escaping --- test/run_tests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/run_tests.py b/test/run_tests.py index 85c46c0..27e1b78 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -193,6 +193,10 @@ def test_xss(self): json2html.convert(""), u"<script></script>" ) + self.assertEqual( + json2html.convert("", escape=False), + u"" + ) def test_all(self): for test_definition in self.test_json: From 63d707f51670d9f7dfdcb2b6de3f4a8d7ab554cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Tue, 11 Jul 2017 11:42:54 +0900 Subject: [PATCH 06/12] documentation --- README.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.rst b/README.rst index 37cb068..a3c54b6 100644 --- a/README.rst +++ b/README.rst @@ -54,6 +54,8 @@ Argument Description `clubbing` turn on[default]/off clubbing of list with same keys of a dict / Array of objects with same key --------------------- ---------------- `encode` turn on/off[default] encoding of result to escaped html, compatible with any browser +--------------------- ---------------- +`escape` turn on[default]/off escaping of html tags in text nodes (prevents XSS attacks in case you pass untrusted data to json2html) ===================== ================ Installation @@ -216,6 +218,7 @@ Contributors * Python 3 support ; Added integration tests for Python 2.6, 3.4 and 3.5 such that support doesn't break. * Can now also do the proper encoding for you (disabled by default to not break backwards compatibility). * Can now handle non-JSON objects on a best-effort principle. + * Now by default escapes html in text nodes to prevent XSS attacks. 2. Daniel Lekic: [@lekic](https://github.com/lekic) * Fixed issue with one-item lists not rendering correctly. From 9cc63b4a59a6deb3701242b2ae640dca493a6d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Mon, 21 Aug 2017 11:33:15 +0900 Subject: [PATCH 07/12] trying to create less string instances in convert_object --- json2html/jsonconv.py | 20 ++++++++++---------- test/benchmark.py | 33 +++++++++++++++++++++++++++++++++ test/run_tests.py | 4 ++-- 3 files changed, 45 insertions(+), 12 deletions(-) create mode 100644 test/benchmark.py diff --git a/json2html/jsonconv.py b/json2html/jsonconv.py index b724aec..b8751cf 100644 --- a/json2html/jsonconv.py +++ b/json2html/jsonconv.py @@ -163,15 +163,15 @@ def convert_object(self, json_input): """ if not json_input: return "" #avoid empty tables - converted_output = self.table_init_markup + "" - converted_output += "".join([ - "%s%s" %( - self.convert_json_node(k), - self.convert_json_node(v) - ) - for k, v in json_input.items() - ]) - converted_output += '' - return converted_output + return "%s%s" %( + self.table_init_markup, + "".join([ + "%s%s" %( + self.convert_json_node(k), + self.convert_json_node(v) + ) + for k, v in json_input.items() + ]) + ) json2html = Json2Html() diff --git a/test/benchmark.py b/test/benchmark.py new file mode 100644 index 0000000..1e78700 --- /dev/null +++ b/test/benchmark.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +import os, sys + +lib_path = os.path.abspath(os.path.join('..')) +sys.path.append(lib_path) + +from functools import wraps +from time import time +from json2html import * + +def timing(f): + @wraps(f) + def wrap(*args, **kw): + ts = time() + result = f(*args, **kw) + te = time() + print 'func:%r args:[%r, %r] took: %2.4f sec' % \ + (f.__name__, args, kw, te-ts) + return result + return wrap + +@timing +def run(nesting=1000): + benchdata = {} + current_head = benchdata + for i in xrange(nesting): + current_head["test"] = {} + current_head = current_head["test"] + current_head["finally"] = "glob" + json2html.convert(benchdata) + +sys.setrecursionlimit(100000) +run(int(sys.argv[1]) if len(sys.argv) > 1 else 1000) \ No newline at end of file diff --git a/test/run_tests.py b/test/run_tests.py index 9ba80a7..27e1b78 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -133,7 +133,7 @@ def items(self): #clubbed with single element self.assertEqual( json2html.convert([binary_dict([1, 2], u"blübi")]), - u'
onetwo
  • 1
  • 2
blübi
' + u'
onetwo
  • 1
  • 2
blübi
' ) #clubbed with two elements self.assertEqual( @@ -141,7 +141,7 @@ def items(self): binary_dict([1, 2], u"blübi"), binary_dict("foo", "bar") ]), - u'
onetwo
  • 1
  • 2
blübi
foobar
' + u'
onetwo
  • 1
  • 2
blübi
foobar
' ) #not clubbed, second element has different keys self.assertEqual( From 3aa4937e77592b63aaeed47b87baaddb35ed9f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Mon, 21 Aug 2017 14:50:35 +0900 Subject: [PATCH 08/12] cleaning up some code - string concatenations with + are discouraged due to performance reasons --- json2html/jsonconv.py | 47 ++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/json2html/jsonconv.py b/json2html/jsonconv.py index b8751cf..1e7f3ca 100644 --- a/json2html/jsonconv.py +++ b/json2html/jsonconv.py @@ -131,23 +131,42 @@ def convert_list(self, list_input): if not list_input: return "" converted_output = "" - column_headers = None if self.clubbing: column_headers = self.column_headers_from_list_of_dicts(list_input) if column_headers is not None: - converted_output += self.table_init_markup - converted_output += '' - converted_output += '' + ''.join(column_headers) + '' - converted_output += '' - converted_output += '' - for list_entry in list_input: - converted_output += '' - converted_output += ''.join([self.convert_json_node(list_entry[column_header]) for column_header in - column_headers]) - converted_output += '' - converted_output += '' - converted_output += '' - return converted_output + header = ( + '' + '%s' + '' + ) %(''.join(column_headers)) + body = ( + '' + '%s' + '' + ) %( + ''.join([ + ''.join([ + self.convert_json_node(list_entry[column_header]) + for column_header in column_headers + ]) + for list_entry in list_input + ]) + ) + return '%s%s%s' %(self.table_init_markup, header, body) + + # converted_output += self.table_init_markup + # converted_output += '' + # converted_output += '' + ''.join(column_headers) + '' + # converted_output += '' + # converted_output += '' + # for list_entry in list_input: + # converted_output += '' + # converted_output += ''.join([self.convert_json_node(list_entry[column_header]) for column_header in + # column_headers]) + # converted_output += '' + # converted_output += '' + # converted_output += '' + # return converted_output #so you don't want or need clubbing eh? This makes @muellermichel very sad... ;( #alright, let's fall back to a basic list here... From f77070f4e2bf5d7232fe07a5ef358e81d92d7c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Mon, 21 Aug 2017 15:22:20 +0900 Subject: [PATCH 09/12] nicer syntax --- json2html/jsonconv.py | 15 +++++---------- test/run_tests.py | 14 ++++++++------ 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/json2html/jsonconv.py b/json2html/jsonconv.py index 1e7f3ca..bec974b 100644 --- a/json2html/jsonconv.py +++ b/json2html/jsonconv.py @@ -139,18 +139,13 @@ def convert_list(self, list_input): '%s' '' ) %(''.join(column_headers)) - body = ( - '' - '%s' - '' - ) %( - ''.join([ - ''.join([ + body = '%s' %( + '%s' %(''.join([( + '%s' %(''.join([ self.convert_json_node(list_entry[column_header]) for column_header in column_headers - ]) - for list_entry in list_input - ]) + ])) + ) for list_entry in list_input])) ) return '%s%s%s' %(self.table_init_markup, header, body) diff --git a/test/run_tests.py b/test/run_tests.py index 27e1b78..82cbb0d 100644 --- a/test/run_tests.py +++ b/test/run_tests.py @@ -133,15 +133,17 @@ def items(self): #clubbed with single element self.assertEqual( json2html.convert([binary_dict([1, 2], u"blübi")]), - u'
onetwo
  • 1
  • 2
blübi
' + u'
onetwo
  • 1
  • 2
blübi
' ) #clubbed with two elements + converted = json2html.convert([ + binary_dict([1, 2], u"blübi"), + binary_dict("foo", "bar") + ]) self.assertEqual( - json2html.convert([ - binary_dict([1, 2], u"blübi"), - binary_dict("foo", "bar") - ]), - u'
onetwo
  • 1
  • 2
blübi
foobar
' + converted, + u'
onetwo
  • 1
  • 2
blübi
foobar
', + converted ) #not clubbed, second element has different keys self.assertEqual( From e19412f50029183c8920d853662a08b98ca5497b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Mon, 21 Aug 2017 15:29:50 +0900 Subject: [PATCH 10/12] fixed test issues --- json2html/jsonconv.py | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) diff --git a/json2html/jsonconv.py b/json2html/jsonconv.py index bec974b..ee45064 100644 --- a/json2html/jsonconv.py +++ b/json2html/jsonconv.py @@ -131,6 +131,7 @@ def convert_list(self, list_input): if not list_input: return "" converted_output = "" + column_headers = None if self.clubbing: column_headers = self.column_headers_from_list_of_dicts(list_input) if column_headers is not None: @@ -140,35 +141,20 @@ def convert_list(self, list_input): '' ) %(''.join(column_headers)) body = '%s' %( - '%s' %(''.join([( + '%s' %(''.join([ '%s' %(''.join([ self.convert_json_node(list_entry[column_header]) for column_header in column_headers ])) - ) for list_entry in list_input])) + for list_entry in list_input + ])) ) return '%s%s%s' %(self.table_init_markup, header, body) - # converted_output += self.table_init_markup - # converted_output += '' - # converted_output += '' + ''.join(column_headers) + '' - # converted_output += '' - # converted_output += '' - # for list_entry in list_input: - # converted_output += '' - # converted_output += ''.join([self.convert_json_node(list_entry[column_header]) for column_header in - # column_headers]) - # converted_output += '' - # converted_output += '' - # converted_output += '' - # return converted_output - - #so you don't want or need clubbing eh? This makes @muellermichel very sad... ;( - #alright, let's fall back to a basic list here... - converted_output = '
  • ' - converted_output += '
  • '.join([self.convert_json_node(child) for child in list_input]) - converted_output += '
' - return converted_output + #no clubbing required here - columnb headers are not consistent across list + return '
  • %s
' %( + '
  • '.join([self.convert_json_node(child) for child in list_input]) + ) def convert_object(self, json_input): """ From 905e8e485eb316274e4b80c56e51a5ec18164ed3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Mon, 21 Aug 2017 15:32:52 +0900 Subject: [PATCH 11/12] typo --- json2html/jsonconv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/json2html/jsonconv.py b/json2html/jsonconv.py index ee45064..f9410ca 100644 --- a/json2html/jsonconv.py +++ b/json2html/jsonconv.py @@ -151,7 +151,7 @@ def convert_list(self, list_input): ) return '%s%s%s' %(self.table_init_markup, header, body) - #no clubbing required here - columnb headers are not consistent across list + #no clubbing required here - column headers are not consistent across list return '
    • %s
    ' %( '
  • '.join([self.convert_json_node(child) for child in list_input]) ) From 537c3e6c7e2c6ca2780acffb68a03123c3fcbb4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20M=C3=BCller?= Date: Mon, 21 Aug 2017 15:37:03 +0900 Subject: [PATCH 12/12] a bit more compact --- json2html/jsonconv.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/json2html/jsonconv.py b/json2html/jsonconv.py index f9410ca..c4053ae 100644 --- a/json2html/jsonconv.py +++ b/json2html/jsonconv.py @@ -135,11 +135,7 @@ def convert_list(self, list_input): if self.clubbing: column_headers = self.column_headers_from_list_of_dicts(list_input) if column_headers is not None: - header = ( - '' - '%s' - '' - ) %(''.join(column_headers)) + header = '%s' %(''.join(column_headers)) body = '%s' %( '%s' %(''.join([ '%s' %(''.join([