Skip to content

Commit 99425b4

Browse files
authored
Improve type annotations (add more and fix wrong ones)
The origins of these are three-fold: * Merging in stubs from https://github.com/python/typeshed/tree/main/stubs/Markdown using "merge-pyi" - Note: we can consider these annotations to be the important ones because it's what people have been adding according to their own need * Double-checking around places where stubs were already added from the above, particularly conflicts with annotations that got added in this repository already + Taking the opportunity to declare a generic "Registry of T" class * Running mypy and eliminating the most glaring errors it reported
1 parent c53d5d7 commit 99425b4

16 files changed

+130
-100
lines changed

docs/changelog.md

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
### Fixed
1414

1515
* Fix a performance problem with HTML extraction where large HTML input could trigger quadratic line counting behavior (PR#1392).
16+
* Improve and expand type annotations in the code base (#1394)
1617

1718
## [3.5] -- 2023-10-06
1819

markdown/blockparser.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,12 @@
3030
from __future__ import annotations
3131

3232
import xml.etree.ElementTree as etree
33-
from typing import TYPE_CHECKING, Sequence, Any
33+
from typing import TYPE_CHECKING, Iterable, Any
3434
from . import util
3535

3636
if TYPE_CHECKING: # pragma: no cover
3737
from markdown import Markdown
38+
from .blockprocessors import BlockProcessor
3839

3940

4041
class State(list):
@@ -59,7 +60,7 @@ def set(self, state: Any):
5960
""" Set a new state. """
6061
self.append(state)
6162

62-
def reset(self):
63+
def reset(self) -> None:
6364
""" Step back one step in nested state. """
6465
self.pop()
6566

@@ -92,11 +93,11 @@ def __init__(self, md: Markdown):
9293
[`blockprocessors`][markdown.blockprocessors].
9394
9495
"""
95-
self.blockprocessors = util.Registry()
96+
self.blockprocessors: util.Registry[BlockProcessor] = util.Registry()
9697
self.state = State()
9798
self.md = md
9899

99-
def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree:
100+
def parseDocument(self, lines: Iterable[str]) -> etree.ElementTree:
100101
""" Parse a Markdown document into an `ElementTree`.
101102
102103
Given a list of lines, an `ElementTree` object (not just a parent
@@ -116,7 +117,7 @@ def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree:
116117
self.parseChunk(self.root, '\n'.join(lines))
117118
return etree.ElementTree(self.root)
118119

119-
def parseChunk(self, parent: etree.Element, text: str):
120+
def parseChunk(self, parent: etree.Element, text: str) -> None:
120121
""" Parse a chunk of Markdown text and attach to given `etree` node.
121122
122123
While the `text` argument is generally assumed to contain multiple
@@ -134,7 +135,7 @@ def parseChunk(self, parent: etree.Element, text: str):
134135
"""
135136
self.parseBlocks(parent, text.split('\n\n'))
136137

137-
def parseBlocks(self, parent: etree.Element, blocks: Sequence[str]):
138+
def parseBlocks(self, parent: etree.Element, blocks: list[str]) -> None:
138139
""" Process blocks of Markdown text and attach to given `etree` node.
139140
140141
Given a list of `blocks`, each `blockprocessor` is stepped through

markdown/blockprocessors.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def lastChild(self, parent: etree.Element) -> etree.Element | None:
8282
else:
8383
return None
8484

85-
def detab(self, text: str, length: int = None) -> str:
85+
def detab(self, text: str, length: int | None = None) -> tuple[str, str]:
8686
""" Remove a tab from the front of each line of the given text. """
8787
if length is None:
8888
length = self.tab_length
@@ -105,7 +105,7 @@ def looseDetab(self, text: str, level: int = 1) -> str:
105105
lines[i] = lines[i][self.tab_length*level:]
106106
return '\n'.join(lines)
107107

108-
def test(self, parent: etree.Element, block: list[str]) -> bool:
108+
def test(self, parent: etree.Element, block: str) -> bool:
109109
""" Test for block type. Must be overridden by subclasses.
110110
111111
As the parser loops through processors, it will call the `test`
@@ -214,7 +214,7 @@ def run(self, parent, blocks):
214214
self.create_item(sibling, block)
215215
self.parser.state.reset()
216216

217-
def create_item(self, parent: etree.Element, block: str):
217+
def create_item(self, parent: etree.Element, block: str) -> None:
218218
""" Create a new `li` and parse the block with it as the parent. """
219219
li = etree.SubElement(parent, 'li')
220220
self.parser.parseBlocks(li, [block])
@@ -329,7 +329,7 @@ class OListProcessor(BlockProcessor):
329329

330330
TAG: str = 'ol'
331331
""" The tag used for the the wrapping element. """
332-
STARTSWITH: int = '1'
332+
STARTSWITH: str = '1'
333333
"""
334334
The integer (as a string ) with which the list starts. For example, if a list is initialized as
335335
`3. Item`, then the `ol` tag will be assigned an HTML attribute of `starts="3"`. Default: `"1"`.
@@ -342,7 +342,7 @@ class OListProcessor(BlockProcessor):
342342
This is the list of types which can be mixed.
343343
"""
344344

345-
def __init__(self, parser):
345+
def __init__(self, parser: BlockParser):
346346
super().__init__(parser)
347347
# Detect an item (`1. item`). `group(1)` contains contents of item.
348348
self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1))
@@ -448,7 +448,7 @@ class UListProcessor(OListProcessor):
448448
TAG: str = 'ul'
449449
""" The tag used for the the wrapping element. """
450450

451-
def __init__(self, parser):
451+
def __init__(self, parser: BlockParser):
452452
super().__init__(parser)
453453
# Detect an item (`1. item`). `group(1)` contains contents of item.
454454
self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))

markdown/core.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import sys
2424
import logging
2525
import importlib
26-
from typing import TYPE_CHECKING, Any, TextIO, Callable
26+
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, Sequence, TextIO
2727
from . import util
2828
from .preprocessors import build_preprocessors
2929
from .blockprocessors import build_block_parser
@@ -76,7 +76,7 @@ class Markdown:
7676

7777
doc_tag = "div" # Element used to wrap document - later removed
7878

79-
output_formats: dict[str, Callable[Element]] = {
79+
output_formats: ClassVar[dict[str, Callable[[Element], str]]] = {
8080
'html': to_html_string,
8181
'xhtml': to_xhtml_string,
8282
}
@@ -156,7 +156,11 @@ def build_parser(self) -> Markdown:
156156
self.postprocessors = build_postprocessors(self)
157157
return self
158158

159-
def registerExtensions(self, extensions: list[Extension | str], configs: dict[str, dict[str, Any]]) -> Markdown:
159+
def registerExtensions(
160+
self,
161+
extensions: Sequence[Extension | str],
162+
configs: Mapping[str, Mapping[str, Any]]
163+
) -> Markdown:
160164
"""
161165
Load a list of extensions into an instance of the `Markdown` class.
162166
@@ -188,7 +192,7 @@ def registerExtensions(self, extensions: list[Extension | str], configs: dict[st
188192
)
189193
return self
190194

191-
def build_extension(self, ext_name: str, configs: dict[str, Any]) -> Extension:
195+
def build_extension(self, ext_name: str, configs: Mapping[str, Any]) -> Extension:
192196
"""
193197
Build extension from a string name, then return an instance using the given `configs`.
194198

markdown/extensions/__init__.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
from __future__ import annotations
2929

30-
from typing import TYPE_CHECKING, Any
30+
from typing import TYPE_CHECKING, Any, Mapping, Sequence
3131
from ..util import parseBoolValue
3232

3333
if TYPE_CHECKING: # pragma: no cover
@@ -37,7 +37,7 @@
3737
class Extension:
3838
""" Base class for extensions to subclass. """
3939

40-
config: dict[str, list[Any, str]] = {}
40+
config: Mapping[str, list] = {}
4141
"""
4242
Default configuration for an extension.
4343
@@ -91,7 +91,7 @@ def getConfigInfo(self) -> list[tuple[str, str]]:
9191
"""
9292
return [(key, self.config[key][1]) for key in self.config.keys()]
9393

94-
def setConfig(self, key: str, value: Any):
94+
def setConfig(self, key: str, value: Any) -> None:
9595
"""
9696
Set a configuration option.
9797
@@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any):
112112
value = parseBoolValue(value, preserve_none=True)
113113
self.config[key][0] = value
114114

115-
def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]):
115+
def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]):
116116
"""
117117
Loop through a collection of configuration options, passing each to
118118
[`setConfig`][markdown.extensions.Extension.setConfig].
@@ -129,7 +129,7 @@ def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]):
129129
for key, value in items:
130130
self.setConfig(key, value)
131131

132-
def extendMarkdown(self, md: Markdown):
132+
def extendMarkdown(self, md: Markdown) -> None:
133133
"""
134134
Add the various processors and patterns to the Markdown Instance.
135135

markdown/extensions/attr_list.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def run(self, doc: Element):
146146
self.assign_attrs(elem, m.group(1))
147147
elem.tail = elem.tail[m.end():]
148148

149-
def assign_attrs(self, elem: Element, attrs: dict[str, str]):
149+
def assign_attrs(self, elem: Element, attrs: str) -> None:
150150
""" Assign `attrs` to element. """
151151
for k, v in get_attrs(attrs):
152152
if k == '.':

markdown/extensions/footnotes.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,14 @@ def extendMarkdown(self, md):
9898
# Insert a postprocessor after amp_substitute processor
9999
md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
100100

101-
def reset(self):
101+
def reset(self) -> None:
102102
""" Clear footnotes on reset, and prepare for distinct document. """
103-
self.footnotes = OrderedDict()
103+
self.footnotes: OrderedDict[str, str] = OrderedDict()
104104
self.unique_prefix += 1
105105
self.found_refs = {}
106106
self.used_refs = set()
107107

108-
def unique_ref(self, reference, found=False):
108+
def unique_ref(self, reference, found: bool = False):
109109
""" Get a unique reference if there are duplicates. """
110110
if not found:
111111
return reference
@@ -144,7 +144,7 @@ def finder(element):
144144
res = finder(root)
145145
return res
146146

147-
def setFootnote(self, id, text):
147+
def setFootnote(self, id, text) -> None:
148148
""" Store a footnote for later retrieval. """
149149
self.footnotes[id] = text
150150

@@ -159,7 +159,7 @@ def makeFootnoteId(self, id):
159159
else:
160160
return 'fn{}{}'.format(self.get_separator(), id)
161161

162-
def makeFootnoteRefId(self, id, found=False):
162+
def makeFootnoteRefId(self, id, found: bool = False):
163163
""" Return footnote back-link id. """
164164
if self.getConfig("UNIQUE_IDS"):
165165
return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
@@ -329,7 +329,7 @@ class FootnotePostTreeprocessor(Treeprocessor):
329329
def __init__(self, footnotes):
330330
self.footnotes = footnotes
331331

332-
def add_duplicates(self, li, duplicates):
332+
def add_duplicates(self, li, duplicates) -> None:
333333
""" Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """
334334
for link in li.iter('a'):
335335
# Find the link that needs to be duplicated.
@@ -355,7 +355,7 @@ def get_num_duplicates(self, li):
355355
link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
356356
return self.footnotes.found_refs.get(link_id, 0)
357357

358-
def handle_duplicates(self, parent):
358+
def handle_duplicates(self, parent) -> None:
359359
""" Find duplicate footnotes and format and add the duplicates. """
360360
for li in list(parent):
361361
# Check number of duplicates footnotes and insert

markdown/extensions/meta.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def extendMarkdown(self, md):
4444
self.md = md
4545
md.preprocessors.register(MetaPreprocessor(md), 'meta', 27)
4646

47-
def reset(self):
47+
def reset(self) -> None:
4848
self.md.Meta = {}
4949

5050

markdown/extensions/smarty.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def _addPatterns(self, md, patterns, serie, priority):
193193
name = 'smarty-%s-%d' % (serie, ind)
194194
self.inlinePatterns.register(pattern, name, priority-ind)
195195

196-
def educateDashes(self, md):
196+
def educateDashes(self, md) -> None:
197197
emDashesPattern = SubstituteTextPattern(
198198
r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md
199199
)
@@ -203,13 +203,13 @@ def educateDashes(self, md):
203203
self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50)
204204
self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45)
205205

206-
def educateEllipses(self, md):
206+
def educateEllipses(self, md) -> None:
207207
ellipsesPattern = SubstituteTextPattern(
208208
r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md
209209
)
210210
self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10)
211211

212-
def educateAngledQuotes(self, md):
212+
def educateAngledQuotes(self, md) -> None:
213213
leftAngledQuotePattern = SubstituteTextPattern(
214214
r'\<\<', (self.substitutions['left-angle-quote'],), md
215215
)
@@ -219,7 +219,7 @@ def educateAngledQuotes(self, md):
219219
self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40)
220220
self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35)
221221

222-
def educateQuotes(self, md):
222+
def educateQuotes(self, md) -> None:
223223
lsquo = self.substitutions['left-single-quote']
224224
rsquo = self.substitutions['right-single-quote']
225225
ldquo = self.substitutions['left-double-quote']
@@ -243,7 +243,7 @@ def educateQuotes(self, md):
243243

244244
def extendMarkdown(self, md):
245245
configs = self.getConfigs()
246-
self.inlinePatterns = Registry()
246+
self.inlinePatterns: Registry[HtmlInlineProcessor] = Registry()
247247
if configs['smart_ellipses']:
248248
self.educateEllipses(md)
249249
if configs['smart_quotes']:

markdown/extensions/toc.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def get_name(el):
7171
return ''.join(text).strip()
7272

7373

74-
def stashedHTML2text(text, md, strip_entities=True):
74+
def stashedHTML2text(text, md, strip_entities: bool = True):
7575
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
7676
def _html_sub(m):
7777
""" Substitute raw html with plain text. """
@@ -198,7 +198,7 @@ def iterparent(self, node):
198198
yield node, child
199199
yield from self.iterparent(child)
200200

201-
def replace_marker(self, root, elem):
201+
def replace_marker(self, root, elem) -> None:
202202
""" Replace marker with elem. """
203203
for (p, c) in self.iterparent(root):
204204
text = ''.join(c.itertext()).strip()
@@ -219,14 +219,14 @@ def replace_marker(self, root, elem):
219219
p[i] = elem
220220
break
221221

222-
def set_level(self, elem):
222+
def set_level(self, elem) -> None:
223223
""" Adjust header level according to base level. """
224224
level = int(elem.tag[-1]) + self.base_level
225225
if level > 6:
226226
level = 6
227227
elem.tag = 'h%d' % level
228228

229-
def add_anchor(self, c, elem_id):
229+
def add_anchor(self, c, elem_id) -> None:
230230
anchor = etree.Element("a")
231231
anchor.text = c.text
232232
anchor.attrib["href"] = "#" + elem_id
@@ -238,7 +238,7 @@ def add_anchor(self, c, elem_id):
238238
c.remove(c[0])
239239
c.append(anchor)
240240

241-
def add_permalink(self, c, elem_id):
241+
def add_permalink(self, c, elem_id) -> None:
242242
permalink = etree.Element("a")
243243
permalink.text = ("%spara;" % AMP_SUBSTITUTE
244244
if self.use_permalinks is True
@@ -399,7 +399,7 @@ def extendMarkdown(self, md):
399399
tocext = self.TreeProcessorClass(md, self.getConfigs())
400400
md.treeprocessors.register(tocext, 'toc', 5)
401401

402-
def reset(self):
402+
def reset(self) -> None:
403403
self.md.toc = ''
404404
self.md.toc_tokens = []
405405

markdown/htmlparser.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ def get_endtag_text(self, tag: str) -> str:
156156
# Failed to extract from raw data. Assume well formed and lowercase.
157157
return '</{}>'.format(tag)
158158

159-
def handle_starttag(self, tag: str, attrs: dict[str, str]):
159+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str]]):
160160
# Handle tags that should always be empty and do not specify a closing tag
161161
if tag in self.empty_tags:
162162
self.handle_startendtag(tag, attrs)
@@ -235,7 +235,7 @@ def handle_empty_tag(self, data: str, is_block: bool):
235235
else:
236236
self.cleandoc.append(data)
237237

238-
def handle_startendtag(self, tag: str, attrs: dict[str, str]):
238+
def handle_startendtag(self, tag: str, attrs: list[tuple[str, str]]):
239239
self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag))
240240

241241
def handle_charref(self, name: str):
@@ -277,7 +277,7 @@ def parse_html_declaration(self, i: int) -> int:
277277
# As `__startag_text` is private, all references to it must be in this subclass.
278278
# The last few lines of `parse_starttag` are reversed so that `handle_starttag`
279279
# can override `cdata_mode` in certain situations (in a code span).
280-
__starttag_text = None
280+
__starttag_text: str | None = None
281281

282282
def get_starttag_text(self) -> str:
283283
"""Return full source of start tag: `<...>`."""

0 commit comments

Comments
 (0)