Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 46 additions & 8 deletions sphinx_markdown_parser/markdown_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
import html
import markdown
from markdown import util
import os.path
import urllib.parse

from pydash import _
import re
import yaml

from sphinx import addnodes

__all__ = ['MarkdownParser']

TAGS_INLINE = set("""
Expand Down Expand Up @@ -275,7 +278,11 @@ def pop_node(self):

def new_section(self, heading):
section = nodes.section()
anchor = to_html_anchor("".join(heading.itertext()))
if heading.get('id'):
anchor_text = heading.get('id')
else:
anchor_text = "".join(heading.itertext())
anchor = to_html_anchor(anchor_text)
section['ids'] = [anchor]
section['names'] = [anchor]
return section
Expand All @@ -289,7 +296,10 @@ def start_new_section(self, lvl, heading):
self.reset_w_old()
self.parse_stack_h.append(lvl)
assert isinstance(self.parse_stack_w[-1], nodes.section)
return nodes.title()
title = nodes.title()
if heading.get('class'):
title['classes'] = heading.get('class').split()
return title

def visit_script(self, node):
if node.attrib.get("type", "").split(";")[0] == "math/tex":
Expand Down Expand Up @@ -354,15 +364,43 @@ def visit_em(self, node):
def visit_br(self, node):
return nodes.Text('\n')

# note: logic is based on CommonMarkParser.visit_link()
def visit_a(self, node):
reference = nodes.reference()
href = node.attrib.pop('href', '')
try:
r = urllib.parse.urlparse(href)
if r.path.endswith(".md"):
href = urllib.parse.urlunparse(r._replace(path = r.path[:-3] + ".html"))
except:
pass
url_check = urllib.parse.urlparse(href)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@wlupton the previous code added .html, is this redundant (does sphinx or other code add it automatically) or is the behaviour actually changed? Some of my projects depend on this behaviour.

Copy link
Contributor Author

@wlupton wlupton Jun 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just did a test with this input:

These should all work:

* See [this file](extensions.html) for more info
* See [this file](extensions.md) for more info
* See [this file](extensions) for more info

It's actually the first one that doesn't work (the other two do). It's not detected as a link at all. Is that expected?

But I guess this also needs testing where there's an explicit link to an HTML file outside the "sphinx domain"?

Update:

I'd missed that I had the following sphinx warning:

introduction.md:: WARNING: None:any reference target not found: extensions.html

So if this does need to work, it needs not to be an any reference. Using a file: URL should be sufficient to guarantee this.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why change it at all, what was wrong with the old version?

if not url_check.scheme and not url_check.fragment:
# remove .md or .markdown extension
href_root, href_ext = os.path.splitext(href)
if href_ext in {'.md', '.markdown'}:
href = href_root

# remove leading '~'; it causes link text to be last component only
last_only = href.startswith('~')
if last_only:
href = href[1:]

# always add an 'any' reference
reference = addnodes.pending_xref(reftarget=href, reftype='any',
refdomain=None, refexplicit=True,
refwarn=True)

# generate default link text
text = re.sub(r'.+\.', '', href) if last_only else href

# add text if none was supplied, so can use [](url) to get ReST
# link behaviour
def adjust_text(node_):
node_text = (node_.text or '').strip()
if not node_text:
node_.text = text

if not list(node):
adjust_text(node)
else:
# note: have to use ` ` (with a space) to get code style
adjust_text(list(node)[0])

reference['refuri'] = href
return reference

Expand Down
31 changes: 22 additions & 9 deletions sphinx_markdown_parser/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

from .states import DummyStateMachine


class AutoStructify(transforms.Transform):
"""Automatically try to transform blocks to sphinx directives.

Expand Down Expand Up @@ -39,6 +38,8 @@ def __init__(self, *args, **kwargs):
suffix_set = set(['md', 'rst'])

default_config = {
'auto_toc_tree_maxdepth': 1,
'auto_toc_tree_numbered': None,
'auto_toc_tree_section': None,
'commonmark_suffixes': ['.md'],
'enable_auto_doc_ref': False,
Expand Down Expand Up @@ -125,6 +126,8 @@ def auto_toc_tree(self, node): # pylint: disable=too-many-branches
"""
if not self.config['enable_auto_toc_tree']:
return None
maxdepth = self.config.get('auto_toc_tree_maxdepth', 1)
numbered = self.config.get('auto_toc_tree_numbered', None)
# when auto_toc_tree_section is set
# only auto generate toctree under the specified section title
sec = self.config['auto_toc_tree_section']
Expand All @@ -147,11 +150,12 @@ def auto_toc_tree(self, node): # pylint: disable=too-many-branches
if title.astext().strip() != sec:
return None

numbered = None
if isinstance(node, nodes.bullet_list):
if numbered is not None:
pass
elif isinstance(node, nodes.bullet_list):
numbered = 0
elif isinstance(node, nodes.enumerated_list):
numbered = 1
numbered = 999

if numbered is None:
return None
Expand All @@ -168,10 +172,18 @@ def auto_toc_tree(self, node): # pylint: disable=too-many-branches
ref = par.children[0]
if isinstance(ref, addnodes.pending_xref):
ref = ref.children[0]
if not isinstance(ref, nodes.reference):
if isinstance(ref, nodes.Text):
text = ref.astext()
title, uri, docpath = text, text, None
elif isinstance(ref, nodes.reference):
# TODO check that this can't happen and, if so, get rid of
# parse_ref()
self.reporter.warning('AutoStructify unexpected reference '
'%r' % ref)
title, uri, docpath = self.parse_ref(ref)
else:
return None
title, uri, docpath = self.parse_ref(ref)
if title is None or uri.startswith('#'):
if uri.startswith('#'):
return None
if docpath:
refs.append((title, docpath))
Expand All @@ -183,10 +195,11 @@ def auto_toc_tree(self, node): # pylint: disable=too-many-branches
return self.state_machine.run_directive(
'toctree',
options={
'maxdepth': 1,
'caption': sec,
'maxdepth': maxdepth,
'numbered': numbered
},
content=['%s <%s>' % (k, v) for k, v in refs]
content=[v for _, v in refs]
)

def auto_inline_code(self, node):
Expand Down