From 750c6ab0cef97cc98abebda1005aeb1d716c2c86 Mon Sep 17 00:00:00 2001 From: William Lupton Date: Wed, 3 Jun 2020 10:18:53 +0100 Subject: [PATCH] Miscellaneous improvements (will add more detail later) --- sphinx_markdown_parser/markdown_parser.py | 54 +++++++++++++++++++---- sphinx_markdown_parser/transform.py | 31 +++++++++---- 2 files changed, 68 insertions(+), 17 deletions(-) diff --git a/sphinx_markdown_parser/markdown_parser.py b/sphinx_markdown_parser/markdown_parser.py index b331aff..602a3e9 100644 --- a/sphinx_markdown_parser/markdown_parser.py +++ b/sphinx_markdown_parser/markdown_parser.py @@ -6,12 +6,15 @@ import html import markdown from markdown import util +import os.path import urllib.parse from pydash import _ import re import yaml +from sphinx import addnodes + __all__ = ['MarkdownParser'] TAGS_INLINE = set(""" @@ -275,7 +278,11 @@ def pop_node(self): def new_section(self, heading): section = nodes.section() - anchor = to_html_anchor("".join(heading.itertext())) + if heading.get('id'): + anchor_text = heading.get('id') + else: + anchor_text = "".join(heading.itertext()) + anchor = to_html_anchor(anchor_text) section['ids'] = [anchor] section['names'] = [anchor] return section @@ -289,7 +296,10 @@ def start_new_section(self, lvl, heading): self.reset_w_old() self.parse_stack_h.append(lvl) assert isinstance(self.parse_stack_w[-1], nodes.section) - return nodes.title() + title = nodes.title() + if heading.get('class'): + title['classes'] = heading.get('class').split() + return title def visit_script(self, node): if node.attrib.get("type", "").split(";")[0] == "math/tex": @@ -354,15 +364,43 @@ def visit_em(self, node): def visit_br(self, node): return nodes.Text('\n') + # note: logic is based on CommonMarkParser.visit_link() def visit_a(self, node): reference = nodes.reference() href = node.attrib.pop('href', '') - try: - r = urllib.parse.urlparse(href) - if r.path.endswith(".md"): - href = urllib.parse.urlunparse(r._replace(path = r.path[:-3] + ".html")) - except: - pass + url_check = urllib.parse.urlparse(href) + if not url_check.scheme and not url_check.fragment: + # remove .md or .markdown extension + href_root, href_ext = os.path.splitext(href) + if href_ext in {'.md', '.markdown'}: + href = href_root + + # remove leading '~'; it causes link text to be last component only + last_only = href.startswith('~') + if last_only: + href = href[1:] + + # always add an 'any' reference + reference = addnodes.pending_xref(reftarget=href, reftype='any', + refdomain=None, refexplicit=True, + refwarn=True) + + # generate default link text + text = re.sub(r'.+\.', '', href) if last_only else href + + # add text if none was supplied, so can use [](url) to get ReST + # link behaviour + def adjust_text(node_): + node_text = (node_.text or '').strip() + if not node_text: + node_.text = text + + if not list(node): + adjust_text(node) + else: + # note: have to use ` ` (with a space) to get code style + adjust_text(list(node)[0]) + reference['refuri'] = href return reference diff --git a/sphinx_markdown_parser/transform.py b/sphinx_markdown_parser/transform.py index 22064f6..15846d9 100644 --- a/sphinx_markdown_parser/transform.py +++ b/sphinx_markdown_parser/transform.py @@ -11,7 +11,6 @@ from .states import DummyStateMachine - class AutoStructify(transforms.Transform): """Automatically try to transform blocks to sphinx directives. @@ -39,6 +38,8 @@ def __init__(self, *args, **kwargs): suffix_set = set(['md', 'rst']) default_config = { + 'auto_toc_tree_maxdepth': 1, + 'auto_toc_tree_numbered': None, 'auto_toc_tree_section': None, 'commonmark_suffixes': ['.md'], 'enable_auto_doc_ref': False, @@ -125,6 +126,8 @@ def auto_toc_tree(self, node): # pylint: disable=too-many-branches """ if not self.config['enable_auto_toc_tree']: return None + maxdepth = self.config.get('auto_toc_tree_maxdepth', 1) + numbered = self.config.get('auto_toc_tree_numbered', None) # when auto_toc_tree_section is set # only auto generate toctree under the specified section title sec = self.config['auto_toc_tree_section'] @@ -147,11 +150,12 @@ def auto_toc_tree(self, node): # pylint: disable=too-many-branches if title.astext().strip() != sec: return None - numbered = None - if isinstance(node, nodes.bullet_list): + if numbered is not None: + pass + elif isinstance(node, nodes.bullet_list): numbered = 0 elif isinstance(node, nodes.enumerated_list): - numbered = 1 + numbered = 999 if numbered is None: return None @@ -168,10 +172,18 @@ def auto_toc_tree(self, node): # pylint: disable=too-many-branches ref = par.children[0] if isinstance(ref, addnodes.pending_xref): ref = ref.children[0] - if not isinstance(ref, nodes.reference): + if isinstance(ref, nodes.Text): + text = ref.astext() + title, uri, docpath = text, text, None + elif isinstance(ref, nodes.reference): + # TODO check that this can't happen and, if so, get rid of + # parse_ref() + self.reporter.warning('AutoStructify unexpected reference ' + '%r' % ref) + title, uri, docpath = self.parse_ref(ref) + else: return None - title, uri, docpath = self.parse_ref(ref) - if title is None or uri.startswith('#'): + if uri.startswith('#'): return None if docpath: refs.append((title, docpath)) @@ -183,10 +195,11 @@ def auto_toc_tree(self, node): # pylint: disable=too-many-branches return self.state_machine.run_directive( 'toctree', options={ - 'maxdepth': 1, + 'caption': sec, + 'maxdepth': maxdepth, 'numbered': numbered }, - content=['%s <%s>' % (k, v) for k, v in refs] + content=[v for _, v in refs] ) def auto_inline_code(self, node):