diff --git a/svglib/svglib.py b/svglib/svglib.py
index 3efe17f..0fe6433 100755
--- a/svglib/svglib.py
+++ b/svglib/svglib.py
@@ -1040,20 +1040,6 @@ def convertPolygon(self, node):
return shape
- def clean_text(self, text, preserve_space):
- """Text cleaning as per https://www.w3.org/TR/SVG/text.html#WhiteSpace
- """
- if text is None:
- return
- if preserve_space:
- text = text.replace('\r\n', ' ').replace('\n', ' ').replace('\t', ' ')
- else:
- text = text.replace('\r\n', '').replace('\n', '').replace('\t', ' ')
- text = text.strip()
- while ' ' in text:
- text = text.replace(' ', ' ')
- return text
-
def convertText(self, node):
attrConv = self.attrConverter
xml_space = node.getAttribute(f"{{{XML_NS}}}space")
@@ -1075,25 +1061,22 @@ def convertText(self, node):
fs = attrConv.findAttr(node, "font-size") or str(DEFAULT_FONT_SIZE)
fs = attrConv.convertLength(fs)
x, y = self.convert_length_attrs(node, 'x', 'y', em_base=fs)
- for c in itertools.chain([node], node.getchildren()):
+ for subnode, text, is_tail in iter_text_node(node, preserve_space):
+ if not text:
+ continue
has_x, has_y = False, False
dx, dy = 0, 0
baseLineShift = 0
- if node_name(c) in ('text', 'tspan'):
- text = self.clean_text(c.text, preserve_space)
- if not text:
- continue
- x1, y1, dx, dy = self.convert_length_attrs(c, 'x', 'y', 'dx', 'dy', em_base=fs)
- has_x, has_y = (c.attrib.get('x', '') != '', c.attrib.get('y', '') != '')
+ if not is_tail:
+ x1, y1, dx, dy = self.convert_length_attrs(subnode, 'x', 'y', 'dx', 'dy', em_base=fs)
+ has_x, has_y = (subnode.attrib.get('x', '') != '', subnode.attrib.get('y', '') != '')
dx0 = dx0 + (dx[0] if isinstance(dx, list) else dx)
dy0 = dy0 + (dy[0] if isinstance(dy, list) else dy)
- baseLineShift = c.attrib.get("baseline-shift", '0')
- if baseLineShift in ("sub", "super", "baseline"):
- baseLineShift = {"sub": -fs/2, "super": fs/2, "baseline": 0}[baseLineShift]
- else:
- baseLineShift = attrConv.convertLength(baseLineShift, em_base=fs)
+ baseLineShift = subnode.attrib.get("baseline-shift", '0')
+ if baseLineShift in ("sub", "super", "baseline"):
+ baseLineShift = {"sub": -fs/2, "super": fs/2, "baseline": 0}[baseLineShift]
else:
- continue
+ baseLineShift = attrConv.convertLength(baseLineShift, em_base=fs)
frag_lengths.append(stringWidth(text, ff, fs))
@@ -1124,8 +1107,8 @@ def convertText(self, node):
new_y = char_dy + (last_y if char_y is None else char_y)
shape = String(new_x, -(new_y - baseLineShift), char)
self.applyStyleOnShape(shape, node)
- if node_name(c) == 'tspan':
- self.applyStyleOnShape(shape, c)
+ if node_name(subnode) == 'tspan':
+ self.applyStyleOnShape(shape, subnode)
gr.add(shape)
last_x = new_x
last_y = new_y
@@ -1135,8 +1118,8 @@ def convertText(self, node):
new_y = (y1 + dy) if has_y else (y + dy0)
shape = String(new_x, -(new_y - baseLineShift), text)
self.applyStyleOnShape(shape, node)
- if node_name(c) == 'tspan':
- self.applyStyleOnShape(shape, c)
+ if node_name(subnode) == 'tspan':
+ self.applyStyleOnShape(shape, subnode)
gr.add(shape)
gr.scale(1, -1)
@@ -1516,6 +1499,35 @@ def node_name(node):
pass
+def iter_text_node(node, preserve_space, level=0):
+ """
+ Recursively iterate through text node and its children, including node tails.
+ """
+ # FIXME: determine if text is last, then strip_end=True, else strip_end=False
+ text = clean_text(node.text, preserve_space) if node.text else None
+ yield node, text, False
+ for child in node.getchildren():
+ yield from iter_text_node(child, preserve_space, level=level + 1)
+ tail = clean_text(node.tail, preserve_space, strip_start=False) if node.tail else None
+ if tail is not None:
+ yield node.getparent(), tail, True
+
+
+def clean_text(text, preserve_space, strip_start=True, strip_end=True):
+ """Text cleaning as per https://www.w3.org/TR/SVG/text.html#WhiteSpace"""
+ if text is None:
+ return None
+ text = text.replace('\r\n', ' ').replace('\n', ' ').replace('\t', ' ')
+ if not preserve_space:
+ if strip_start:
+ text = text.lstrip()
+ if strip_end:
+ text = text.rstrip()
+ while ' ' in text:
+ text = text.replace(' ', ' ')
+ return text
+
+
def copy_shape_properties(source_shape, dest_shape):
for prop, val in source_shape.getProperties().items():
try:
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 07433f7..e4d5438 100755
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -685,6 +685,7 @@ def test_tspan_position(self):
Subtitle
Complete
+ The end
''')))
@@ -697,6 +698,7 @@ def test_tspan_position(self):
assert main_group.contents[0].contents[2].y == -33.487
assert main_group.contents[0].contents[3].x == 10 + (3 * 28)
assert main_group.contents[0].contents[3].y == -20 - (1.5 * 28)
+ assert main_group.contents[0].contents[4].text == 'The end'
class TestRectNode: