Python-Markdown · oprypin · Oct 31, 2023 · Oct 31, 2023 · Oct 31, 2023 · Nov 1, 2023
diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
@@ -71,7 +71,7 @@ jobs:
       fail-fast: false
       max-parallel: 4
       matrix:
-        tox-env: [flake8, pep517check, checkspelling]
+        tox-env: [mypy, flake8, pep517check, checkspelling]
 
     env:
       TOXENV: ${{ matrix.tox-env }}

diff --git a/markdown/__main__.py b/markdown/__main__.py
@@ -24,6 +24,11 @@
 import codecs
 import warnings
 import markdown
+import logging
+from logging import DEBUG, WARNING, CRITICAL
+from typing import Any, Callable, IO, Mapping
+
+yaml_load: Callable[[IO], Any]
 try:
     # We use `unsafe_load` because users may need to pass in actual Python
     # objects. As this is only available from the CLI, the user has much
@@ -32,18 +37,16 @@
 except ImportError:  # pragma: no cover
     try:
         # Fall back to PyYAML <5.1
-        from yaml import load as yaml_load
+        from yaml import load as yaml_load  # type: ignore
     except ImportError:
         # Fall back to JSON
         from json import load as yaml_load
 
-import logging
-from logging import DEBUG, WARNING, CRITICAL
 
 logger = logging.getLogger('MARKDOWN')
 
 
-def parse_options(args=None, values=None):
+def parse_options(args=None, values=None) -> tuple[Mapping[str, Any], bool]:
     """
     Define and parse `optparse` options for command-line usage.
     """

diff --git a/markdown/blockprocessors.py b/markdown/blockprocessors.py
@@ -121,7 +121,7 @@ def test(self, parent: etree.Element, block: str) -> bool:
             parent: An `etree` element which will be the parent of the block.
             block: A block of text from the source which has been split at blank lines.
         """
-        pass  # pragma: no cover
+        raise NotImplementedError()  # pragma: no cover
 
     def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
         """ Run processor. Must be overridden by subclasses.
@@ -147,7 +147,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
             parent: An `etree` element which is the parent of the current block.
             blocks: A list of all remaining blocks of the document.
         """
-        pass  # pragma: no cover
+        raise NotImplementedError()  # pragma: no cover
 
 
 class ListIndentProcessor(BlockProcessor):
@@ -167,18 +167,18 @@ class ListIndentProcessor(BlockProcessor):
     LIST_TYPES = ['ul', 'ol']
     """ Types of lists this processor can operate on. """
 
-    def __init__(self, *args):
+    def __init__(self, *args) -> None:
         super().__init__(*args)
         self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return block.startswith(' '*self.tab_length) and \
             not self.parser.state.isstate('detabbed') and \
             (parent.tag in self.ITEM_TYPES or
-                (len(parent) and parent[-1] is not None and
+                (len(parent) > 0 and parent[-1] is not None and
                     (parent[-1].tag in self.LIST_TYPES)))
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         level, sibling = self.get_level(parent, block)
         block = self.looseDetab(block, level)
@@ -251,10 +251,10 @@ def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Eleme
 class CodeBlockProcessor(BlockProcessor):
     """ Process code blocks. """
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return block.startswith(' '*self.tab_length)
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         sibling = self.lastChild(parent)
         block = blocks.pop(0)
         theRest = ''
@@ -286,10 +286,10 @@ class BlockQuoteProcessor(BlockProcessor):
 
     RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return bool(self.RE.search(block)) and not util.nearing_recursion_limit()
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         m = self.RE.search(block)
         if m:
@@ -353,10 +353,10 @@ def __init__(self, parser: BlockParser):
         self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' %
                                     (self.tab_length, self.tab_length * 2 - 1))
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return bool(self.RE.match(block))
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         # Check for multiple items in one block.
         items = self.get_items(blocks.pop(0))
         sibling = self.lastChild(parent)
@@ -417,7 +417,7 @@ def run(self, parent, blocks):
 
     def get_items(self, block: str) -> list[str]:
         """ Break a block into list items. """
-        items = []
+        items: list[str] = []
         for line in block.split('\n'):
             m = self.CHILD_RE.match(line)
             if m:
@@ -426,7 +426,9 @@ def get_items(self, block: str) -> list[str]:
                 if not items and self.TAG == 'ol':
                     # Detect the integer value of first list item
                     INTEGER_RE = re.compile(r'(\d+)')
-                    self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
+                    int_match = INTEGER_RE.match(m.group(1))
+                    assert int_match is not None
+                    self.STARTSWITH = int_match.group()
                 # Append to the list
                 items.append(m.group(3))
             elif self.INDENT_RE.match(line):
@@ -460,10 +462,10 @@ class HashHeaderProcessor(BlockProcessor):
     # Detect a header at start of any line in block
     RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return bool(self.RE.search(block))
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         m = self.RE.search(block)
         if m:
@@ -491,10 +493,10 @@ class SetextHeaderProcessor(BlockProcessor):
     # Detect Setext-style header. Must be first 2 lines of block.
     RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE)
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return bool(self.RE.match(block))
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         lines = blocks.pop(0).split('\n')
         # Determine level. `=` is 1 and `-` is 2.
         if lines[1].startswith('='):
@@ -517,15 +519,15 @@ class HRProcessor(BlockProcessor):
     # Detect hr on any line of a block.
     SEARCH_RE = re.compile(RE, re.MULTILINE)
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         m = self.SEARCH_RE.search(block)
         if m:
             # Save match object on class instance so we can use it later.
             self.match = m
             return True
         return False
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         match = self.match
         # Check for lines in block before `hr`.
@@ -545,10 +547,10 @@ def run(self, parent, blocks):
 class EmptyBlockProcessor(BlockProcessor):
     """ Process blocks that are empty or start with an empty line. """
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return not block or block.startswith('\n')
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         filler = '\n\n'
         if block:
@@ -575,10 +577,10 @@ class ReferenceProcessor(BlockProcessor):
         r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE
     )
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return True
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> bool:
         block = blocks.pop(0)
         m = self.RE.search(block)
         if m:
@@ -601,10 +603,10 @@ def run(self, parent, blocks):
 class ParagraphProcessor(BlockProcessor):
     """ Process Paragraph blocks. """
 
-    def test(self, parent, block):
+    def test(self, parent: etree.Element, block: str) -> bool:
         return True
 
-    def run(self, parent, blocks):
+    def run(self, parent: etree.Element, blocks: list[str]) -> None:
         block = blocks.pop(0)
         if block.strip():
             # Not a blank block. Add to parent, otherwise throw it away.

diff --git a/markdown/core.py b/markdown/core.py
@@ -23,7 +23,7 @@
 import sys
 import logging
 import importlib
-from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, Sequence, TextIO
+from typing import TYPE_CHECKING, Any, BinaryIO, Callable, ClassVar, Mapping, Sequence
 from . import util
 from .preprocessors import build_preprocessors
 from .blockprocessors import build_block_parser
@@ -36,6 +36,7 @@
 
 if TYPE_CHECKING:  # pragma: no cover
     from xml.etree.ElementTree import Element
+    from markdown.extensions.toc import TocToken
 
 __all__ = ['Markdown', 'markdown', 'markdownFromFile']
 
@@ -85,7 +86,11 @@ class Markdown:
     callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`.
     """
 
-    def __init__(self, **kwargs):
+    toc_tokens: list[TocToken]
+    toc: str
+    Meta: dict[str, Any]
+
+    def __init__(self, **kwargs: Any):
         """
         Creates a new Markdown instance.
 
@@ -159,7 +164,7 @@ def build_parser(self) -> Markdown:
     def registerExtensions(
         self,
         extensions: Sequence[Extension | str],
-        configs: Mapping[str, Mapping[str, Any]]
+        configs: Mapping[str, dict[str, Any]]
     ) -> Markdown:
         """
         Load a list of extensions into an instance of the `Markdown` class.
@@ -183,7 +188,7 @@ def registerExtensions(
                     'Successfully loaded extension "%s.%s".'
                     % (ext.__class__.__module__, ext.__class__.__name__)
                 )
-            elif ext is not None:
+            elif ext is not None:  # type: ignore[unreachable]
                 raise TypeError(
                     'Extension "{}.{}" must be of type: "{}.{}"'.format(
                         ext.__class__.__module__, ext.__class__.__name__,
@@ -387,8 +392,8 @@ def convert(self, source: str) -> str:
 
     def convertFile(
         self,
-        input: str | TextIO | None = None,
-        output: str | TextIO | None = None,
+        input: str | BinaryIO | None = None,
+        output: str | BinaryIO | None = None,
         encoding: str | None = None,
     ) -> Markdown:
         """
@@ -417,15 +422,13 @@ def convertFile(
         # Read the source
         if input:
             if isinstance(input, str):
-                input_file = codecs.open(input, mode="r", encoding=encoding)
+                with codecs.open(input, mode="r", encoding=encoding) as input_file:
+                    text = input_file.read()
             else:
-                input_file = codecs.getreader(encoding)(input)
-            text = input_file.read()
-            input_file.close()
+                with codecs.getreader(encoding)(input) as input_file:
+                    text = input_file.read()
         else:
             text = sys.stdin.read()
-            if not isinstance(text, str):  # pragma: no cover
-                text = text.decode(encoding)
 
         text = text.lstrip('\ufeff')  # remove the byte-order mark
 
@@ -442,18 +445,14 @@ def convertFile(
                 output_file.close()
             else:
                 writer = codecs.getwriter(encoding)
-                output_file = writer(output, errors="xmlcharrefreplace")
-                output_file.write(html)
+                output_writer = writer(output, errors="xmlcharrefreplace")
+                output_writer.write(html)
                 # Don't close here. User may want to write more.
         else:
             # Encode manually and write bytes to stdout.
-            html = html.encode(encoding, "xmlcharrefreplace")
-            try:
-                # Write bytes directly to buffer (Python 3).
-                sys.stdout.buffer.write(html)
-            except AttributeError:  # pragma: no cover
-                # Probably Python 2, which works with bytes by default.
-                sys.stdout.write(html)
+            html_bytes = html.encode(encoding, "xmlcharrefreplace")
+            # Write bytes directly to buffer (Python 3).
+            sys.stdout.buffer.write(html_bytes)
 
         return self
 
@@ -489,7 +488,13 @@ def markdown(text: str, **kwargs: Any) -> str:
     return md.convert(text)
 
 
-def markdownFromFile(**kwargs: Any):
+def markdownFromFile(
+    *,
+    input: str | BinaryIO | None = None,
+    output: str | BinaryIO | None = None,
+    encoding: str | None = None,
+    **kwargs: Any
+) -> None:
     """
     Read Markdown text from a file and write output to a file or a stream.
 
@@ -498,13 +503,11 @@ def markdownFromFile(**kwargs: Any):
     [`convert`][markdown.Markdown.convert].
 
     Keyword arguments:
-        input (str | TextIO): A file name or readable object.
-        output (str | TextIO): A file name or writable object.
-        encoding (str): Encoding of input and output.
+        input: A file name or readable object.
+        output: A file name or writable object.
+        encoding: Encoding of input and output.
         **kwargs: Any arguments accepted by the `Markdown` class.
 
     """
     md = Markdown(**kwargs)
-    md.convertFile(kwargs.get('input', None),
-                   kwargs.get('output', None),
-                   kwargs.get('encoding', None))
+    md.convertFile(input, output, encoding)
diff --git a/markdown/extensions/__init__.py b/markdown/extensions/__init__.py
@@ -27,7 +27,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Mapping, Sequence
+from typing import TYPE_CHECKING, Any, Iterable, Mapping
 from ..util import parseBoolValue
 
 if TYPE_CHECKING:  # pragma: no cover
@@ -53,7 +53,7 @@ class Extension:
     if a default is not set for each option.
     """
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs) -> None:
         """ Initiate Extension and set up configs. """
         self.setConfigs(kwargs)
 
@@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any) -> None:
             value = parseBoolValue(value, preserve_none=True)
         self.config[key][0] = value
 
-    def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]):
+    def setConfigs(self, items: Mapping[str, Any] | Iterable[tuple[str, Any]]) -> None:
         """
         Loop through a collection of configuration options, passing each to
         [`setConfig`][markdown.extensions.Extension.setConfig].