Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix all mypy typechecking errors, add a lot of type annotations #1399

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ jobs:
fail-fast: false
max-parallel: 4
matrix:
tox-env: [flake8, pep517check, checkspelling]
tox-env: [mypy, flake8, pep517check, checkspelling]

env:
TOXENV: ${{ matrix.tox-env }}
Expand Down
11 changes: 7 additions & 4 deletions markdown/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
import codecs
import warnings
import markdown
import logging
from logging import DEBUG, WARNING, CRITICAL
from typing import Any, Callable, IO, Mapping

yaml_load: Callable[[IO], Any]
try:
# We use `unsafe_load` because users may need to pass in actual Python
# objects. As this is only available from the CLI, the user has much
Expand All @@ -32,18 +37,16 @@
except ImportError: # pragma: no cover
try:
# Fall back to PyYAML <5.1
from yaml import load as yaml_load
from yaml import load as yaml_load # type: ignore
except ImportError:
# Fall back to JSON
from json import load as yaml_load

import logging
from logging import DEBUG, WARNING, CRITICAL

logger = logging.getLogger('MARKDOWN')


def parse_options(args=None, values=None):
def parse_options(args=None, values=None) -> tuple[Mapping[str, Any], bool]:
"""
Define and parse `optparse` options for command-line usage.
"""
Expand Down
54 changes: 28 additions & 26 deletions markdown/blockprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test(self, parent: etree.Element, block: str) -> bool:
parent: An `etree` element which will be the parent of the block.
block: A block of text from the source which has been split at blank lines.
"""
pass # pragma: no cover
raise NotImplementedError() # pragma: no cover

def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
""" Run processor. Must be overridden by subclasses.
Expand All @@ -147,7 +147,7 @@ def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
parent: An `etree` element which is the parent of the current block.
blocks: A list of all remaining blocks of the document.
"""
pass # pragma: no cover
raise NotImplementedError() # pragma: no cover


class ListIndentProcessor(BlockProcessor):
Expand All @@ -167,18 +167,18 @@ class ListIndentProcessor(BlockProcessor):
LIST_TYPES = ['ul', 'ol']
""" Types of lists this processor can operate on. """

def __init__(self, *args):
def __init__(self, *args) -> None:
super().__init__(*args)
self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
return block.startswith(' '*self.tab_length) and \
not self.parser.state.isstate('detabbed') and \
(parent.tag in self.ITEM_TYPES or
(len(parent) and parent[-1] is not None and
(len(parent) > 0 and parent[-1] is not None and
(parent[-1].tag in self.LIST_TYPES)))

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
level, sibling = self.get_level(parent, block)
block = self.looseDetab(block, level)
Expand Down Expand Up @@ -251,10 +251,10 @@ def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Eleme
class CodeBlockProcessor(BlockProcessor):
""" Process code blocks. """

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
return block.startswith(' '*self.tab_length)

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> None:
sibling = self.lastChild(parent)
block = blocks.pop(0)
theRest = ''
Expand Down Expand Up @@ -286,10 +286,10 @@ class BlockQuoteProcessor(BlockProcessor):

RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
return bool(self.RE.search(block)) and not util.nearing_recursion_limit()

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
m = self.RE.search(block)
if m:
Expand Down Expand Up @@ -353,10 +353,10 @@ def __init__(self, parser: BlockParser):
self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' %
(self.tab_length, self.tab_length * 2 - 1))

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
return bool(self.RE.match(block))

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> None:
# Check for multiple items in one block.
items = self.get_items(blocks.pop(0))
sibling = self.lastChild(parent)
Expand Down Expand Up @@ -417,7 +417,7 @@ def run(self, parent, blocks):

def get_items(self, block: str) -> list[str]:
""" Break a block into list items. """
items = []
items: list[str] = []
for line in block.split('\n'):
m = self.CHILD_RE.match(line)
if m:
Expand All @@ -426,7 +426,9 @@ def get_items(self, block: str) -> list[str]:
if not items and self.TAG == 'ol':
# Detect the integer value of first list item
INTEGER_RE = re.compile(r'(\d+)')
self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
int_match = INTEGER_RE.match(m.group(1))
assert int_match is not None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what you are trying to accomplish here. Does this make it possible for Markdown content to cause an error? If so, that would be unacceptable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With lines 429-431 reverted:

$ mypy markdown
markdown/blockprocessors.py:429: error: Item "None" of "Match[str] | None" has no attribute "group"  [union-attr]
Found 1 error in 1 file (checked 33 source files)

  • The code before:

    self.STARTSWITH = RE.match(string).group()

    where RE.match(string) can be None, and None.group() is an error

  • The code after:

    int_match = RE.match(string)
    assert int_match is not None
    self.STARTSWITH = int_match.group()

The old code doesn't care to check for the None case where it would violently error with AttributeError. mypy doesn't let it slide and exposes a potential bug.

The new code directly checks for it and errors with a clearer message.

There is no change regarding which situations an error does or does not happen.

TBD: It is worth checking whether this bug case can actually happen (in current code already). In any case we should be happy that mypy flagged this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, now we know about a bug which we didn't know about before. That is good. But this is not the way to address it. Under no circumstances should source text cause Markdown to raise an error. In fact, that is one of the primary goals of the project as documented on the home page. Therefore, this is not the proper way to fix the bug. The error needs to be silenced and some reasonable text needs to be included in the output (depending on the conditions under which the issue arises).

self.STARTSWITH = int_match.group()
# Append to the list
items.append(m.group(3))
elif self.INDENT_RE.match(line):
Expand Down Expand Up @@ -460,10 +462,10 @@ class HashHeaderProcessor(BlockProcessor):
# Detect a header at start of any line in block
RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
return bool(self.RE.search(block))

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
m = self.RE.search(block)
if m:
Expand Down Expand Up @@ -491,10 +493,10 @@ class SetextHeaderProcessor(BlockProcessor):
# Detect Setext-style header. Must be first 2 lines of block.
RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE)

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
return bool(self.RE.match(block))

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> None:
lines = blocks.pop(0).split('\n')
# Determine level. `=` is 1 and `-` is 2.
if lines[1].startswith('='):
Expand All @@ -517,15 +519,15 @@ class HRProcessor(BlockProcessor):
# Detect hr on any line of a block.
SEARCH_RE = re.compile(RE, re.MULTILINE)

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
m = self.SEARCH_RE.search(block)
if m:
# Save match object on class instance so we can use it later.
self.match = m
return True
return False

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
match = self.match
# Check for lines in block before `hr`.
Expand All @@ -545,10 +547,10 @@ def run(self, parent, blocks):
class EmptyBlockProcessor(BlockProcessor):
""" Process blocks that are empty or start with an empty line. """

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
return not block or block.startswith('\n')

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
filler = '\n\n'
if block:
Expand All @@ -575,10 +577,10 @@ class ReferenceProcessor(BlockProcessor):
r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE
)

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
return True

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> bool:
block = blocks.pop(0)
m = self.RE.search(block)
if m:
Expand All @@ -601,10 +603,10 @@ def run(self, parent, blocks):
class ParagraphProcessor(BlockProcessor):
""" Process Paragraph blocks. """

def test(self, parent, block):
def test(self, parent: etree.Element, block: str) -> bool:
return True

def run(self, parent, blocks):
def run(self, parent: etree.Element, blocks: list[str]) -> None:
block = blocks.pop(0)
if block.strip():
# Not a blank block. Add to parent, otherwise throw it away.
Expand Down
59 changes: 31 additions & 28 deletions markdown/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import sys
import logging
import importlib
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, Sequence, TextIO
from typing import TYPE_CHECKING, Any, BinaryIO, Callable, ClassVar, Mapping, Sequence
from . import util
from .preprocessors import build_preprocessors
from .blockprocessors import build_block_parser
Expand All @@ -36,6 +36,7 @@

if TYPE_CHECKING: # pragma: no cover
from xml.etree.ElementTree import Element
from markdown.extensions.toc import TocToken

__all__ = ['Markdown', 'markdown', 'markdownFromFile']

Expand Down Expand Up @@ -85,7 +86,11 @@ class Markdown:
callable which accepts an [`Element`][xml.etree.ElementTree.Element] and returns a `str`.
"""

def __init__(self, **kwargs):
toc_tokens: list[TocToken]
toc: str
Meta: dict[str, Any]

def __init__(self, **kwargs: Any):
"""
Creates a new Markdown instance.

Expand Down Expand Up @@ -159,7 +164,7 @@ def build_parser(self) -> Markdown:
def registerExtensions(
self,
extensions: Sequence[Extension | str],
configs: Mapping[str, Mapping[str, Any]]
configs: Mapping[str, dict[str, Any]]
) -> Markdown:
"""
Load a list of extensions into an instance of the `Markdown` class.
Expand All @@ -183,7 +188,7 @@ def registerExtensions(
'Successfully loaded extension "%s.%s".'
% (ext.__class__.__module__, ext.__class__.__name__)
)
elif ext is not None:
elif ext is not None: # type: ignore[unreachable]
raise TypeError(
'Extension "{}.{}" must be of type: "{}.{}"'.format(
ext.__class__.__module__, ext.__class__.__name__,
Expand Down Expand Up @@ -387,8 +392,8 @@ def convert(self, source: str) -> str:

def convertFile(
self,
input: str | TextIO | None = None,
output: str | TextIO | None = None,
input: str | BinaryIO | None = None,
output: str | BinaryIO | None = None,
encoding: str | None = None,
) -> Markdown:
"""
Expand Down Expand Up @@ -417,15 +422,13 @@ def convertFile(
# Read the source
if input:
if isinstance(input, str):
input_file = codecs.open(input, mode="r", encoding=encoding)
with codecs.open(input, mode="r", encoding=encoding) as input_file:
text = input_file.read()
else:
input_file = codecs.getreader(encoding)(input)
text = input_file.read()
input_file.close()
with codecs.getreader(encoding)(input) as input_file:
text = input_file.read()
else:
text = sys.stdin.read()
if not isinstance(text, str): # pragma: no cover
text = text.decode(encoding)

text = text.lstrip('\ufeff') # remove the byte-order mark

Expand All @@ -442,18 +445,14 @@ def convertFile(
output_file.close()
else:
writer = codecs.getwriter(encoding)
output_file = writer(output, errors="xmlcharrefreplace")
output_file.write(html)
output_writer = writer(output, errors="xmlcharrefreplace")
output_writer.write(html)
Comment on lines -443 to +444
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why this change was made at all. Note that the Contributing Guide states:

Legacy code which does not follow the guidelines should only be updated if and when other changes (bug fix, feature addition, etc.) are being made to that section of code. While new features should be given names that follow modern Python naming conventions, existing names should be preserved to avoid backward incompatible changes.

I realize that in this instance the variable name is not exposed outside of this method so there is no concern over a backward incompatible change, but the general principle remains. Please, let's refrain from making unnecessary changes just for personal preference.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

100% of code body changes in this pull request are in response to error messages produced by mypy. Don't need to tell me this regarding unrelated changes because I'm always the first one to say this as well.

This particular change is because mypy doesn't like that these two differently-typed values share the same variable name.


With lines 425-429 reverted:

$ mypy markdown                                             
markdown/core.py:427: error: Incompatible types in assignment (expression has type "StreamReader", variable has type "StreamReaderWriter")  [assignment]
Found 1 error in 1 file (checked 33 source files)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I embrace Python not being strongly typed. If that means making changes like this, then no thank you.

# Don't close here. User may want to write more.
else:
# Encode manually and write bytes to stdout.
html = html.encode(encoding, "xmlcharrefreplace")
try:
# Write bytes directly to buffer (Python 3).
sys.stdout.buffer.write(html)
except AttributeError: # pragma: no cover
# Probably Python 2, which works with bytes by default.
sys.stdout.write(html)
html_bytes = html.encode(encoding, "xmlcharrefreplace")
# Write bytes directly to buffer (Python 3).
sys.stdout.buffer.write(html_bytes)

return self

Expand Down Expand Up @@ -489,7 +488,13 @@ def markdown(text: str, **kwargs: Any) -> str:
return md.convert(text)


def markdownFromFile(**kwargs: Any):
def markdownFromFile(
*,
input: str | BinaryIO | None = None,
output: str | BinaryIO | None = None,
encoding: str | None = None,
**kwargs: Any
) -> None:
Comment on lines -485 to +491
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't recall why the specific keyword parameters were removed some years ago, but why did you feel the need to add them back in? I'm not opposed to it if there is a good reason related to this scope of this PR, but I would like to here the reason.

However, I am more concerned about why you added position arguments (*)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function's signature doesn't change, I'm just formalizing the de-facto signature. The * is to start named-only parameters, not positional-only parameters. They were also named-only before.

"""
Read Markdown text from a file and write output to a file or a stream.

Expand All @@ -498,13 +503,11 @@ def markdownFromFile(**kwargs: Any):
[`convert`][markdown.Markdown.convert].

Keyword arguments:
input (str | TextIO): A file name or readable object.
output (str | TextIO): A file name or writable object.
encoding (str): Encoding of input and output.
input: A file name or readable object.
output: A file name or writable object.
encoding: Encoding of input and output.
**kwargs: Any arguments accepted by the `Markdown` class.

"""
md = Markdown(**kwargs)
md.convertFile(kwargs.get('input', None),
kwargs.get('output', None),
kwargs.get('encoding', None))
md.convertFile(input, output, encoding)
6 changes: 3 additions & 3 deletions markdown/extensions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Iterable, Mapping
from ..util import parseBoolValue

if TYPE_CHECKING: # pragma: no cover
Expand All @@ -53,7 +53,7 @@ class Extension:
if a default is not set for each option.
"""

def __init__(self, **kwargs):
def __init__(self, **kwargs) -> None:
""" Initiate Extension and set up configs. """
self.setConfigs(kwargs)

Expand Down Expand Up @@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any) -> None:
value = parseBoolValue(value, preserve_none=True)
self.config[key][0] = value

def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]):
def setConfigs(self, items: Mapping[str, Any] | Iterable[tuple[str, Any]]) -> None:
"""
Loop through a collection of configuration options, passing each to
[`setConfig`][markdown.extensions.Extension.setConfig].
Expand Down
Loading