Improve type annotations (add more and fix wrong ones)

oprypin · web-flow · commit 99425b47bc5b · 2023-10-30T13:03:22.000-04:00
The origins of these are three-fold: * Merging in stubs from https://github.com/python/typeshed/tree/main/stubs/Markdown using "merge-pyi" - Note: we can consider these annotations to be the important ones because it's what people have been adding according to their own need * Double-checking around places where stubs were already added from the above, particularly conflicts with annotations that got added in this repository already + Taking the opportunity to declare a generic "Registry of T" class * Running mypy and eliminating the most glaring errors it reported
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 
 * Fix a performance problem with HTML extraction where large HTML input could trigger quadratic line counting behavior (PR#1392).
+* Improve and expand type annotations in the code base (#1394)
 
 ## [3.5] -- 2023-10-06
 
diff --git a/markdown/blockparser.py b/markdown/blockparser.py
@@ -30,11 +30,12 @@
 from __future__ import annotations
 
 import xml.etree.ElementTree as etree
-from typing import TYPE_CHECKING, Sequence, Any
+from typing import TYPE_CHECKING, Iterable, Any
 from . import util
 
 if TYPE_CHECKING:  # pragma: no cover
     from markdown import Markdown
+    from .blockprocessors import BlockProcessor
 
 
 class State(list):
@@ -59,7 +60,7 @@ def set(self, state: Any):
         """ Set a new state. """
         self.append(state)
 
-    def reset(self):
+    def reset(self) -> None:
         """ Step back one step in nested state. """
         self.pop()
 
@@ -92,11 +93,11 @@ def __init__(self, md: Markdown):
                 [`blockprocessors`][markdown.blockprocessors].
 
         """
-        self.blockprocessors = util.Registry()
+        self.blockprocessors: util.Registry[BlockProcessor] = util.Registry()
         self.state = State()
         self.md = md
 
-    def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree:
+    def parseDocument(self, lines: Iterable[str]) -> etree.ElementTree:
         """ Parse a Markdown document into an `ElementTree`.
 
         Given a list of lines, an `ElementTree` object (not just a parent
@@ -116,7 +117,7 @@ def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree:
         self.parseChunk(self.root, '\n'.join(lines))
         return etree.ElementTree(self.root)
 
-    def parseChunk(self, parent: etree.Element, text: str):
+    def parseChunk(self, parent: etree.Element, text: str) -> None:
         """ Parse a chunk of Markdown text and attach to given `etree` node.
 
         While the `text` argument is generally assumed to contain multiple
@@ -134,7 +135,7 @@ def parseChunk(self, parent: etree.Element, text: str):
         """
         self.parseBlocks(parent, text.split('\n\n'))
 
-    def parseBlocks(self, parent: etree.Element, blocks: Sequence[str]):
+    def parseBlocks(self, parent: etree.Element, blocks: list[str]) -> None:
         """ Process blocks of Markdown text and attach to given `etree` node.
 
         Given a list of `blocks`, each `blockprocessor` is stepped through
diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
@@ -82,7 +82,7 @@ def lastChild(self, parent: etree.Element) -> etree.Element | None:
         else:
             return None
 
-    def detab(self, text: str, length: int = None) -> str:
+    def detab(self, text: str, length: int | None = None) -> tuple[str, str]:
         """ Remove a tab from the front of each line of the given text. """
         if length is None:
             length = self.tab_length
@@ -105,7 +105,7 @@ def looseDetab(self, text: str, level: int = 1) -> str:
                 lines[i] = lines[i][self.tab_length*level:]
         return '\n'.join(lines)
 
-    def test(self, parent: etree.Element, block: list[str]) -> bool:
+    def test(self, parent: etree.Element, block: str) -> bool:
         """ Test for block type. Must be overridden by subclasses.
 
         As the parser loops through processors, it will call the `test`
@@ -214,7 +214,7 @@ def run(self, parent, blocks):
             self.create_item(sibling, block)
         self.parser.state.reset()
 
-    def create_item(self, parent: etree.Element, block: str):
+    def create_item(self, parent: etree.Element, block: str) -> None:
         """ Create a new `li` and parse the block with it as the parent. """
         li = etree.SubElement(parent, 'li')
         self.parser.parseBlocks(li, [block])
@@ -329,7 +329,7 @@ class OListProcessor(BlockProcessor):
 
     TAG: str = 'ol'
     """ The tag used for the the wrapping element. """
-    STARTSWITH: int = '1'
+    STARTSWITH: str = '1'
     """
     The integer (as a string ) with which the list starts. For example, if a list is initialized as
     `3. Item`, then the `ol` tag will be assigned an HTML attribute of `starts="3"`. Default: `"1"`.
@@ -342,7 +342,7 @@ class OListProcessor(BlockProcessor):
     This is the list of types which can be mixed.
     """
 
-    def __init__(self, parser):
+    def __init__(self, parser: BlockParser):
         super().__init__(parser)
         # Detect an item (`1. item`). `group(1)` contains contents of item.
         self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1))
@@ -448,7 +448,7 @@ class UListProcessor(OListProcessor):
     TAG: str = 'ul'
     """ The tag used for the the wrapping element. """
 
-    def __init__(self, parser):
+    def __init__(self, parser: BlockParser):
         super().__init__(parser)
         # Detect an item (`1. item`). `group(1)` contains contents of item.
         self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))
diff --git a/markdown/core.py b/markdown/core.py
@@ -23,7 +23,7 @@
 import sys
 import logging
 import importlib
-from typing import TYPE_CHECKING, Any, TextIO, Callable
+from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, Sequence, TextIO
 from . import util
 from .preprocessors import build_preprocessors
 from .blockprocessors import build_block_parser
@@ -76,7 +76,7 @@ class Markdown:
 
     doc_tag = "div"     # Element used to wrap document - later removed
 
-    output_formats: dict[str, Callable[Element]] = {
+    output_formats: ClassVar[dict[str, Callable[[Element], str]]] = {
         'html':   to_html_string,
         'xhtml':  to_xhtml_string,
     }
@@ -156,7 +156,11 @@ def build_parser(self) -> Markdown:
         self.postprocessors = build_postprocessors(self)
         return self
 
-    def registerExtensions(self, extensions: list[Extension | str], configs: dict[str, dict[str, Any]]) -> Markdown:
+    def registerExtensions(
+        self,
+        extensions: Sequence[Extension | str],
+        configs: Mapping[str, Mapping[str, Any]]
+    ) -> Markdown:
         """
         Load a list of extensions into an instance of the `Markdown` class.
 
@@ -188,7 +192,7 @@ def registerExtensions(self, extensions: list[Extension | str], configs: dict[st
                 )
         return self
 
-    def build_extension(self, ext_name: str, configs: dict[str, Any]) -> Extension:
+    def build_extension(self, ext_name: str, configs: Mapping[str, Any]) -> Extension:
         """
         Build extension from a string name, then return an instance using the given `configs`.
 
diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py
@@ -27,7 +27,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Mapping, Sequence
 from ..util import parseBoolValue
 
 if TYPE_CHECKING:  # pragma: no cover
@@ -37,7 +37,7 @@
 class Extension:
     """ Base class for extensions to subclass. """
 
-    config: dict[str, list[Any, str]] = {}
+    config: Mapping[str, list] = {}
     """
     Default configuration for an extension.
 
@@ -91,7 +91,7 @@ def getConfigInfo(self) -> list[tuple[str, str]]:
         """
         return [(key, self.config[key][1]) for key in self.config.keys()]
 
-    def setConfig(self, key: str, value: Any):
+    def setConfig(self, key: str, value: Any) -> None:
         """
         Set a configuration option.
 
@@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any):
             value = parseBoolValue(value, preserve_none=True)
         self.config[key][0] = value
 
-    def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]):
+    def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]):
         """
         Loop through a collection of configuration options, passing each to
         [`setConfig`][markdown.extensions.Extension.setConfig].
@@ -129,7 +129,7 @@ def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]):
         for key, value in items:
             self.setConfig(key, value)
 
-    def extendMarkdown(self, md: Markdown):
+    def extendMarkdown(self, md: Markdown) -> None:
         """
         Add the various processors and patterns to the Markdown Instance.
 
diff --git a/markdown/extensions/attr_list.py b/markdown/extensions/attr_list.py
@@ -146,7 +146,7 @@ def run(self, doc: Element):
                         self.assign_attrs(elem, m.group(1))
                         elem.tail = elem.tail[m.end():]
 
-    def assign_attrs(self, elem: Element, attrs: dict[str, str]):
+    def assign_attrs(self, elem: Element, attrs: str) -> None:
         """ Assign `attrs` to element. """
         for k, v in get_attrs(attrs):
             if k == '.':
diff --git a/markdown/extensions/footnotes.py b/markdown/extensions/footnotes.py
@@ -98,14 +98,14 @@ def extendMarkdown(self, md):
         # Insert a postprocessor after amp_substitute processor
         md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)
 
-    def reset(self):
+    def reset(self) -> None:
         """ Clear footnotes on reset, and prepare for distinct document. """
-        self.footnotes = OrderedDict()
+        self.footnotes: OrderedDict[str, str] = OrderedDict()
         self.unique_prefix += 1
         self.found_refs = {}
         self.used_refs = set()
 
-    def unique_ref(self, reference, found=False):
+    def unique_ref(self, reference, found: bool = False):
         """ Get a unique reference if there are duplicates. """
         if not found:
             return reference
@@ -144,7 +144,7 @@ def finder(element):
         res = finder(root)
         return res
 
-    def setFootnote(self, id, text):
+    def setFootnote(self, id, text) -> None:
         """ Store a footnote for later retrieval. """
         self.footnotes[id] = text
 
@@ -159,7 +159,7 @@ def makeFootnoteId(self, id):
         else:
             return 'fn{}{}'.format(self.get_separator(), id)
 
-    def makeFootnoteRefId(self, id, found=False):
+    def makeFootnoteRefId(self, id, found: bool = False):
         """ Return footnote back-link id. """
         if self.getConfig("UNIQUE_IDS"):
             return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
@@ -329,7 +329,7 @@ class FootnotePostTreeprocessor(Treeprocessor):
     def __init__(self, footnotes):
         self.footnotes = footnotes
 
-    def add_duplicates(self, li, duplicates):
+    def add_duplicates(self, li, duplicates) -> None:
         """ Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """
         for link in li.iter('a'):
             # Find the link that needs to be duplicated.
@@ -355,7 +355,7 @@ def get_num_duplicates(self, li):
         link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
         return self.footnotes.found_refs.get(link_id, 0)
 
-    def handle_duplicates(self, parent):
+    def handle_duplicates(self, parent) -> None:
         """ Find duplicate footnotes and format and add the duplicates. """
         for li in list(parent):
             # Check number of duplicates footnotes and insert
diff --git a/markdown/extensions/meta.py b/markdown/extensions/meta.py
@@ -44,7 +44,7 @@ def extendMarkdown(self, md):
         self.md = md
         md.preprocessors.register(MetaPreprocessor(md), 'meta', 27)
 
-    def reset(self):
+    def reset(self) -> None:
         self.md.Meta = {}
 
 
diff --git a/markdown/extensions/smarty.py b/markdown/extensions/smarty.py
@@ -193,7 +193,7 @@ def _addPatterns(self, md, patterns, serie, priority):
             name = 'smarty-%s-%d' % (serie, ind)
             self.inlinePatterns.register(pattern, name, priority-ind)
 
-    def educateDashes(self, md):
+    def educateDashes(self, md) -> None:
         emDashesPattern = SubstituteTextPattern(
             r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md
         )
@@ -203,13 +203,13 @@ def educateDashes(self, md):
         self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50)
         self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45)
 
-    def educateEllipses(self, md):
+    def educateEllipses(self, md) -> None:
         ellipsesPattern = SubstituteTextPattern(
             r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md
         )
         self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10)
 
-    def educateAngledQuotes(self, md):
+    def educateAngledQuotes(self, md) -> None:
         leftAngledQuotePattern = SubstituteTextPattern(
             r'\<\<', (self.substitutions['left-angle-quote'],), md
         )
@@ -219,7 +219,7 @@ def educateAngledQuotes(self, md):
         self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40)
         self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35)
 
-    def educateQuotes(self, md):
+    def educateQuotes(self, md) -> None:
         lsquo = self.substitutions['left-single-quote']
         rsquo = self.substitutions['right-single-quote']
         ldquo = self.substitutions['left-double-quote']
@@ -243,7 +243,7 @@ def educateQuotes(self, md):
 
     def extendMarkdown(self, md):
         configs = self.getConfigs()
-        self.inlinePatterns = Registry()
+        self.inlinePatterns: Registry[HtmlInlineProcessor] = Registry()
         if configs['smart_ellipses']:
             self.educateEllipses(md)
         if configs['smart_quotes']:
diff --git a/markdown/extensions/toc.py b/markdown/extensions/toc.py
@@ -71,7 +71,7 @@ def get_name(el):
     return ''.join(text).strip()
 
 
-def stashedHTML2text(text, md, strip_entities=True):
+def stashedHTML2text(text, md, strip_entities: bool = True):
     """ Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
     def _html_sub(m):
         """ Substitute raw html with plain text. """
@@ -198,7 +198,7 @@ def iterparent(self, node):
                 yield node, child
                 yield from self.iterparent(child)
 
-    def replace_marker(self, root, elem):
+    def replace_marker(self, root, elem) -> None:
         """ Replace marker with elem. """
         for (p, c) in self.iterparent(root):
             text = ''.join(c.itertext()).strip()
@@ -219,14 +219,14 @@ def replace_marker(self, root, elem):
                         p[i] = elem
                         break
 
-    def set_level(self, elem):
+    def set_level(self, elem) -> None:
         """ Adjust header level according to base level. """
         level = int(elem.tag[-1]) + self.base_level
         if level > 6:
             level = 6
         elem.tag = 'h%d' % level
 
-    def add_anchor(self, c, elem_id):
+    def add_anchor(self, c, elem_id) -> None:
         anchor = etree.Element("a")
         anchor.text = c.text
         anchor.attrib["href"] = "#" + elem_id
@@ -238,7 +238,7 @@ def add_anchor(self, c, elem_id):
             c.remove(c[0])
         c.append(anchor)
 
-    def add_permalink(self, c, elem_id):
+    def add_permalink(self, c, elem_id) -> None:
         permalink = etree.Element("a")
         permalink.text = ("%spara;" % AMP_SUBSTITUTE
                           if self.use_permalinks is True
@@ -399,7 +399,7 @@ def extendMarkdown(self, md):
         tocext = self.TreeProcessorClass(md, self.getConfigs())
         md.treeprocessors.register(tocext, 'toc', 5)
 
-    def reset(self):
+    def reset(self) -> None:
         self.md.toc = ''
         self.md.toc_tokens = []
 
diff --git a/markdown/htmlparser.py b/markdown/htmlparser.py
@@ -156,7 +156,7 @@ def get_endtag_text(self, tag: str) -> str:
             # Failed to extract from raw data. Assume well formed and lowercase.
             return '</{}>'.format(tag)
 
-    def handle_starttag(self, tag: str, attrs: dict[str, str]):
+    def handle_starttag(self, tag: str, attrs: list[tuple[str, str]]):
         # Handle tags that should always be empty and do not specify a closing tag
         if tag in self.empty_tags:
             self.handle_startendtag(tag, attrs)
@@ -235,7 +235,7 @@ def handle_empty_tag(self, data: str, is_block: bool):
         else:
             self.cleandoc.append(data)
 
-    def handle_startendtag(self, tag: str, attrs: dict[str, str]):
+    def handle_startendtag(self, tag: str, attrs: list[tuple[str, str]]):
         self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag))
 
     def handle_charref(self, name: str):
@@ -277,7 +277,7 @@ def parse_html_declaration(self, i: int) -> int:
     # As `__startag_text` is private, all references to it must be in this subclass.
     # The last few lines of `parse_starttag` are reversed so that `handle_starttag`
     # can override `cdata_mode` in certain situations (in a code span).
-    __starttag_text = None
+    __starttag_text: str | None = None
 
     def get_starttag_text(self) -> str:
         """Return full source of start tag: `<...>`."""
diff --git a/markdown/inlinepatterns.py b/markdown/inlinepatterns.py
diff --git a/markdown/postprocessors.py b/markdown/postprocessors.py
diff --git a/markdown/preprocessors.py b/markdown/preprocessors.py
diff --git a/markdown/treeprocessors.py b/markdown/treeprocessors.py
diff --git a/markdown/util.py b/markdown/util.py