Merge pull request #26 from BroadbandForum/feature/misc-improvements

clayrisser · web-flow · commit e72768ab155e · 2020-06-07T02:33:19.000-05:00
Miscellaneous improvements (will add more detail later)
diff --git a/sphinx_markdown_parser/markdown_parser.py b/sphinx_markdown_parser/markdown_parser.py
@@ -6,12 +6,15 @@
 import html
 import markdown
 from markdown import util
+import os.path
 import urllib.parse
 
 from pydash import _
 import re
 import yaml
 
+from sphinx import addnodes
+
 __all__ = ['MarkdownParser']
 
 TAGS_INLINE = set("""
@@ -277,7 +280,11 @@ def pop_node(self):
 
     def new_section(self, heading):
         section = nodes.section()
-        anchor = to_html_anchor("".join(heading.itertext()))
+        if heading.get('id'):
+            anchor_text = heading.get('id')
+        else:
+            anchor_text = "".join(heading.itertext())
+        anchor = to_html_anchor(anchor_text)
         section['ids'] = [anchor]
         section['names'] = [anchor]
         return section
@@ -291,7 +298,10 @@ def start_new_section(self, lvl, heading):
         self.reset_w_old()
         self.parse_stack_h.append(lvl)
         assert isinstance(self.parse_stack_w[-1], nodes.section)
-        return nodes.title()
+        title = nodes.title()
+        if heading.get('class'):
+            title['classes'] = heading.get('class').split()
+        return title
 
     def visit_script(self, node):
         if node.attrib.get("type", "").split(";")[0] == "math/tex":
@@ -356,15 +366,43 @@ def visit_em(self, node):
     def visit_br(self, node):
         return nodes.Text('\n')
 
+    # note: logic is based on CommonMarkParser.visit_link()
     def visit_a(self, node):
         reference = nodes.reference()
         href = node.attrib.pop('href', '')
-        try:
-            r = urllib.parse.urlparse(href)
-            if r.path.endswith(".md"):
-              href = urllib.parse.urlunparse(r._replace(path = r.path[:-3] + ".html"))
-        except:
-            pass
+        url_check = urllib.parse.urlparse(href)
+        if not url_check.scheme and not url_check.fragment:
+            # remove .md or .markdown extension
+            href_root, href_ext = os.path.splitext(href)
+            if href_ext in {'.md', '.markdown'}:
+                href = href_root
+
+            # remove leading '~'; it causes link text to be last component only
+            last_only = href.startswith('~')
+            if last_only:
+                href = href[1:]
+
+            # always add an 'any' reference
+            reference = addnodes.pending_xref(reftarget=href, reftype='any',
+                                              refdomain=None, refexplicit=True,
+                                              refwarn=True)
+
+            # generate default link text
+            text = re.sub(r'.+\.', '', href) if last_only else href
+
+            # add text if none was supplied, so can use [](url) to get ReST
+            # link behaviour
+            def adjust_text(node_):
+                node_text = (node_.text or '').strip()
+                if not node_text:
+                    node_.text = text
+
+            if not list(node):
+                adjust_text(node)
+            else:
+                # note: have to use ` ` (with a space) to get code style
+                adjust_text(list(node)[0])
+
         reference['refuri'] = href
         return reference
 
diff --git a/sphinx_markdown_parser/transform.py b/sphinx_markdown_parser/transform.py
@@ -11,7 +11,6 @@
 
 from .states import DummyStateMachine
 
-
 class AutoStructify(transforms.Transform):
     """Automatically try to transform blocks to sphinx directives.
 
@@ -39,6 +38,8 @@ def __init__(self, *args, **kwargs):
     suffix_set = set(['md', 'rst'])
 
     default_config = {
+        'auto_toc_tree_maxdepth': 1,
+        'auto_toc_tree_numbered': None,
         'auto_toc_tree_section': None,
         'commonmark_suffixes': ['.md'],
         'enable_auto_doc_ref': False,
@@ -125,6 +126,8 @@ def auto_toc_tree(self, node):  # pylint: disable=too-many-branches
         """
         if not self.config['enable_auto_toc_tree']:
             return None
+        maxdepth = self.config.get('auto_toc_tree_maxdepth', 1)
+        numbered = self.config.get('auto_toc_tree_numbered', None)
         # when auto_toc_tree_section is set
         # only auto generate toctree under the specified section title
         sec = self.config['auto_toc_tree_section']
@@ -147,11 +150,12 @@ def auto_toc_tree(self, node):  # pylint: disable=too-many-branches
             if title.astext().strip() != sec:
                 return None
 
-        numbered = None
-        if isinstance(node, nodes.bullet_list):
+        if numbered is not None:
+            pass
+        elif isinstance(node, nodes.bullet_list):
             numbered = 0
         elif isinstance(node, nodes.enumerated_list):
-            numbered = 1
+            numbered = 999
 
         if numbered is None:
             return None
@@ -168,10 +172,18 @@ def auto_toc_tree(self, node):  # pylint: disable=too-many-branches
             ref = par.children[0]
             if isinstance(ref, addnodes.pending_xref):
                 ref = ref.children[0]
-            if not isinstance(ref, nodes.reference):
+            if isinstance(ref, nodes.Text):
+                text = ref.astext()
+                title, uri, docpath = text, text, None
+            elif isinstance(ref, nodes.reference):
+                # TODO check that this can't happen and, if so, get rid of
+                #      parse_ref()
+                self.reporter.warning('AutoStructify unexpected reference '
+                                      '%r' % ref)
+                title, uri, docpath = self.parse_ref(ref)
+            else:
                 return None
-            title, uri, docpath = self.parse_ref(ref)
-            if title is None or uri.startswith('#'):
+            if uri.startswith('#'):
                 return None
             if docpath:
                 refs.append((title, docpath))
@@ -183,10 +195,11 @@ def auto_toc_tree(self, node):  # pylint: disable=too-many-branches
         return self.state_machine.run_directive(
             'toctree',
             options={
-                'maxdepth': 1,
+                'caption': sec,
+                'maxdepth': maxdepth,
                 'numbered': numbered
             },
-            content=['%s <%s>' % (k, v) for k, v in refs]
+            content=[v for _, v in refs]
         )
 
     def auto_inline_code(self, node):