diff --git a/codebasin/finder.py b/codebasin/finder.py index b60254e..97781a0 100644 --- a/codebasin/finder.py +++ b/codebasin/finder.py @@ -14,9 +14,15 @@ from tqdm import tqdm -from codebasin import CodeBase, file_parser, preprocessor +from codebasin import CodeBase, file_parser from codebasin.language import FileLanguage -from codebasin.preprocessor import CodeNode, Node, Platform, SourceTree, Visit +from codebasin.preprocessor import ( + CodeNode, + Node, + Preprocessor, + SourceTree, + Visit, +) log = logging.getLogger(__name__) @@ -114,20 +120,26 @@ def get_setmap(self, codebase: CodeBase) -> dict[frozenset, int]: setmap[platform] += node.num_lines return setmap - def associate(self, filename: str, platform: Platform) -> None: + def associate(self, filename: str, preprocessor: Preprocessor) -> None: """ - Update the association for the provided filename and platform. + Update the association for `filename` using `preprocessor`. """ tree = self.get_tree(filename) association = self.get_map(filename) if tree is None or association is None: raise RuntimeError(f"Missing tree or association for '{filename}'") + + if preprocessor.platform_name is None: + raise RuntimeError(f"Cannot associate '{filename}' with 'None'") + branch_taken = [] def associator(node: Node) -> Visit: - association[node].add(platform.name) - active = node.evaluate_for_platform( - platform=platform, + association[node].add(preprocessor.platform_name) + + # TODO: Consider inverting, so preprocessor calls the function. + active = node.evaluate( + preprocessor=preprocessor, filename=self._get_realpath(filename), state=self, ) @@ -228,28 +240,26 @@ def _potential_file_generator( leave=False, disable=not show_progress, ): - file_platform = Platform(p, rootdir) - - for path in e["include_paths"]: - file_platform.add_include_path(path) - - for definition in e["defines"]: - macro = preprocessor.macro_from_definition_string(definition) - file_platform.define(macro.name, macro) + preprocessor = Preprocessor( + platform_name=p, + include_paths=e["include_paths"], + defines=e["defines"], + ) # Process include files. - # These modify the file_platform instance, but we throw away + # These modify the preprocessor instance, but we throw away # the active nodes after processing is complete. for include in e["include_files"]: - include_file = file_platform.find_include_file( + include_file = preprocessor.find_include_file( include, os.path.dirname(e["file"]), ) if include_file: state.insert_file(include_file) - state.associate(include_file, file_platform) + state.associate(include_file, preprocessor) # Process the file, to build a list of associate nodes - state.associate(e["file"], file_platform) + # TODO: Consider inverting, so preprocessor calls the function. + state.associate(e["file"], preprocessor) return state diff --git a/codebasin/preprocessor.py b/codebasin/preprocessor.py index f309a4c..f56ce00 100644 --- a/codebasin/preprocessor.py +++ b/codebasin/preprocessor.py @@ -16,6 +16,7 @@ from copy import copy from dataclasses import dataclass, field from enum import Enum +from pathlib import Path from typing import Any import numpy as np @@ -559,7 +560,7 @@ def is_end_node() -> bool: """ return False - def evaluate_for_platform(self, **kwargs: Any) -> bool: + def evaluate(self, **kwargs: Any) -> bool: """ Determine if the children of this node are active, by evaluating the statement. @@ -610,7 +611,7 @@ class FileNode(Node): def __str__(self) -> str: return str(self.filename) - def evaluate_for_platform(self, **kwargs: Any) -> bool: + def evaluate(self, **kwargs: Any) -> bool: """ Since a FileNode is always used as a root node, we are only interested in its children. @@ -720,9 +721,11 @@ class PragmaNode(DirectiveNode): expr: list[Token] - def evaluate_for_platform(self, **kwargs: Any) -> bool: + def evaluate(self, **kwargs: Any) -> bool: if self.expr and str(self.expr[0]) == "once": - kwargs["platform"].add_include_to_skip(kwargs["filename"]) + kwargs["preprocessor"].get_file_info( + kwargs["filename"], + ).is_include_once = True return False @@ -736,14 +739,14 @@ class DefineNode(DirectiveNode): args: list[Identifier] | None = None value: list[Token] | None = None - def evaluate_for_platform(self, **kwargs: Any) -> bool: + def evaluate(self, **kwargs: Any) -> bool: """ Add a definition into the platform, and return false """ if self.value is None: raise RuntimeError("Cannot expand macro to None") macro = make_macro(self.identifier, self.args, self.value) - kwargs["platform"].define(self.identifier.token, macro) + kwargs["preprocessor"].define(macro) return False @@ -755,11 +758,11 @@ class UndefNode(DirectiveNode): identifier: Identifier - def evaluate_for_platform(self, **kwargs: Any) -> bool: + def evaluate(self, **kwargs: Any) -> bool: """ Add a definition into the platform, and return false """ - kwargs["platform"].undefine(self.identifier.token) + kwargs["preprocessor"].undefine(self.identifier) return False @@ -800,7 +803,7 @@ class IncludeNode(DirectiveNode): value: IncludePath | list[Token] - def evaluate_for_platform(self, **kwargs: Any) -> bool: + def evaluate(self, **kwargs: Any) -> bool: """ Extract the filename from the #include. This cannot happen when parsing because of "computed includes" like #include FOO. After @@ -814,24 +817,31 @@ def evaluate_for_platform(self, **kwargs: Any) -> bool: include_path = self.value.path is_system_include = self.value.system else: - expansion = MacroExpander(kwargs["platform"]).expand(self.value) + expansion = MacroExpander(kwargs["preprocessor"]).expand( + self.value, + ) path_obj = DirectiveParser(expansion).include_path() include_path = path_obj.path is_system_include = path_obj.system this_path = os.path.dirname(kwargs["filename"]) - include_file = kwargs["platform"].find_include_file( + include_file = kwargs["preprocessor"].find_include_file( include_path, this_path, is_system_include, ) - if include_file and kwargs["platform"].process_include(include_file): + if ( + include_file + and not kwargs["preprocessor"] + .get_file_info(include_file) + .is_include_once + ): # include files use the same language as the file itself, # irrespective of file extension. lang = kwargs["state"].langs[kwargs["filename"]] kwargs["state"].insert_file(include_file, lang) - kwargs["state"].associate(include_file, kwargs["platform"]) + kwargs["state"].associate(include_file, kwargs["preprocessor"]) if not include_file: filename = kwargs["filename"] @@ -858,9 +868,11 @@ class IfNode(DirectiveNode): def is_start_node() -> bool: return True - def evaluate_for_platform(self, **kwargs: Any) -> bool: + def evaluate(self, **kwargs: Any) -> bool: # Perform macro substitution with tokens - expanded_tokens = MacroExpander(kwargs["platform"]).expand(self.expr) + expanded_tokens = MacroExpander(kwargs["preprocessor"]).expand( + self.expr, + ) # Evaluate the expanded tokens return ExpressionEvaluator(expanded_tokens).evaluate() @@ -891,7 +903,7 @@ class ElseNode(DirectiveNode): def is_cont_node() -> bool: return True - def evaluate_for_platform(self, **kwargs: Any) -> bool: + def evaluate(self, **kwargs: Any) -> bool: return True @@ -1646,74 +1658,132 @@ def replace( return substituted_tokens -class Platform: +class Preprocessor: """ - Represents a platform, and everything associated with a platform. - Contains a list of definitions, and include paths. + Represents a specific instance of a preprocessor, including: + - Active macro definitions + - Includes that should only be processed once + - The name of the platform associated with this pre-processor """ - def __init__(self, name: str, _root_dir: str) -> None: - self._definitions: dict[str, Macro | MacroFunction] = {} - self._skip_includes: list[str] = [] - self._include_paths: list[str] = [] - self._root_dir = _root_dir - self.name = name - self.found_incl: dict[str, str | None] = {} - - def add_include_path(self, path: str) -> None: + @dataclass + class FileInfo: """ - Insert a new path into the list of include paths for this - platform. + Stores information the Preprocessor knows about a file. """ - self._include_paths.append(path) - def undefine(self, identifier: str) -> None: - """ - Undefine a macro for this platform, if it's defined. - """ - if identifier in self._definitions: - del self._definitions[identifier] + is_include_once: bool = False - def define(self, identifier: str, macro: Macro | MacroFunction) -> None: + def __init__( + self, + *, + platform_name: str | None = None, + include_paths: list[str | os.PathLike[str]] | None = None, + defines: list[str] | None = None, + ) -> None: + if platform_name is None: + self.platform_name = None + elif not isinstance(platform_name, str): + raise TypeError("'platform_name' must be a string.") + else: + self.platform_name = platform_name + + self._include_paths: list[Path] + if include_paths is None: + self._include_paths = [] + elif not isinstance(include_paths, list): + raise TypeError("'include_paths' must be a list of paths.") + elif not all( + [isinstance(p, (str, os.PathLike)) for p in include_paths], + ): + raise TypeError( + "Each path in 'include_paths' must be PathLike.", + ) + else: + self._include_paths = [Path(p) for p in include_paths] + + self._definitions: dict[str, Macro | MacroFunction] + if defines is None: + self._definitions = {} + elif not isinstance(defines, list): + raise TypeError("'defines' must be a list of strings.") + elif not all([isinstance(d, str) for d in defines]): + raise TypeError("'defines' must be a list of strings.") + else: + self._definitions = {} + for definition in defines: + macro = macro_from_definition_string(definition) + self.define(macro) + + self._file_info: dict[str, Preprocessor.FileInfo] = {} + self._found_incl: dict[str, str | None] = {} + + def define(self, macro: Macro | MacroFunction) -> None: """ - Define a new macro for this platform, only if it's not already - defined. + Define a macro, as if the preprocessor encountered #define. + If the macro is already defined, has no effect. + + Parameters + ---------- + macro: Macro + The macro to define. """ - if identifier not in self._definitions: - self._definitions[identifier] = macro + # TODO: Check if this is consistent with other preprocessors. + if macro.name not in self._definitions: + self._definitions[macro.name] = macro - def add_include_to_skip(self, fn: str) -> None: + def undefine(self, identifier: Identifier) -> None: """ - Add an include file to the skip list for this platform. The file will - not be processed when encountered in the include directives. + Undefine a previously defined macro. + + Parameters + ---------- + identifier: Identifier + The identifier associated with the macro. """ - if fn not in self._skip_includes: - self._skip_includes.append(fn) + if identifier.token in self._definitions: + del self._definitions[identifier.token] - def process_include(self, fn: str) -> bool: + def get_macro( + self, + identifier: Identifier, + ) -> Macro | MacroFunction | None: """ - Return a boolean stating if this include file should be - processed or skipped. + Returns + ------- + Macro | MacroFunction | None + The macro associated with `identifier`, or None. """ - return fn not in self._skip_includes + if identifier.token in self._definitions: + return self._definitions[identifier.token] + return None - # FIXME: This should return a bool, but the usage relies on a str. - def is_defined(self, identifier: str) -> str: + def has_macro(self, identifier: Identifier) -> bool: """ - Return a string representing whether the macro named by 'identifier' is - defined. + Returns + ------- + bool + True if `identifier` is defined and False otherwise. """ - if identifier in self._definitions: - return "1" - return "0" + return self.get_macro(identifier) is not None - def get_macro(self, identifier: str) -> Macro | MacroFunction | None: + def get_file_info(self, filename: str) -> Preprocessor.FileInfo: """ - Return either a macro definition (if it's defined), or None. + Access information the preprocessor has about `filename`. + + Parameters + ---------- + filename: str + The name of the filename of interest. + + Returns + ------- + FileInfo + The `FileInfo` associated with this file. """ - if identifier in self._definitions: - return self._definitions[identifier] - return None + if filename not in self._file_info: + self._file_info[filename] = Preprocessor.FileInfo() + return self._file_info[filename] def find_include_file( self, @@ -1722,35 +1792,39 @@ def find_include_file( is_system_include: bool = False, ) -> str | None: """ - Determine and return the full path to an include file, named - 'filename' using the include paths for this platform. + Determine and return the full path to `filename`. - System includes do not include the rootdir, while local includes - do. - """ - try: - return self.found_incl[filename] - except KeyError: - pass + Parameters + ---------- + filename: str + The name of the include file to find. - include_file = None + this_path: str + The path where the preprocessor is currently running. + + is_system_include: bool, default: False + Whether the include file is a system header or not. + + Returns + ------- + str | None + The full path to `filename` if it was found and `None` otherwise. + """ + if filename in self._found_incl: + return self._found_incl[filename] local_paths = [] if not is_system_include: local_paths += [this_path] - # Determine the path to the include file, if it exists for path in local_paths + self._include_paths: test_path = os.path.abspath(os.path.join(path, filename)) if os.path.isfile(test_path): - include_file = test_path - self.found_incl[filename] = include_file - return include_file + self._found_incl[filename] = test_path + return test_path # TODO: Check this optimization is always valid. - if include_file is not None: - raise RuntimeError(f"Expected 'None', got '{filename}'") - self.found_incl[filename] = None + self._found_incl[filename] = None return None @@ -1823,8 +1897,8 @@ class MacroExpander: A specialized token parser for recognizing and expanding macros. """ - def __init__(self, platform: Platform) -> None: - self.platform = platform + def __init__(self, preprocessor: Preprocessor) -> None: + self.preprocessor = preprocessor self.parser_stack: list[ExpanderHelper] = [] self.no_expand: list[str] = [] @@ -1926,7 +2000,10 @@ def defined(self, identifier: Identifier) -> NumericalConstant: """ Expand a call to defined(X) or defined X. """ - value = self.platform.is_defined(str(identifier)) + if self.preprocessor.has_macro(identifier): + value = "1" + else: + value = "0" return NumericalConstant( "EXPANSION", identifier.col, @@ -1996,7 +2073,7 @@ def expand( self.replace_tok(itok) continue - macro_lookup = self.platform.get_macro(ctok.token) + macro_lookup = self.preprocessor.get_macro(ctok) if not macro_lookup: self.parser_stack[-1].pos -= 1 self.replace_tok(ctok) diff --git a/tests/macro_expansion/test_macro_expansion.py b/tests/macro_expansion/test_macro_expansion.py index 7b8a1b3..4933b8c 100644 --- a/tests/macro_expansion/test_macro_expansion.py +++ b/tests/macro_expansion/test_macro_expansion.py @@ -6,7 +6,7 @@ from pathlib import Path from codebasin import CodeBase, finder, preprocessor -from codebasin.preprocessor import Platform +from codebasin.preprocessor import Preprocessor class TestMacroExpansion(unittest.TestCase): @@ -63,7 +63,7 @@ def test_cat(self): test_str = "CATTEST=first ## 2" macro = preprocessor.macro_from_definition_string(test_str) tokens = preprocessor.Lexer("CATTEST").tokenize() - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") p._definitions = {macro.name: macro} expanded_tokens = preprocessor.MacroExpander(p).expand(tokens) expected_tokens = preprocessor.Lexer("first2").tokenize() @@ -76,7 +76,7 @@ def test_stringify_quote(self): test_str = "STR(x)= #x" macro = preprocessor.macro_from_definition_string(test_str) tokens = preprocessor.Lexer('STR(foo("4 + 5"))').tokenize() - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") p._definitions = {macro.name: macro} expanded_tokens = preprocessor.MacroExpander(p).expand(tokens) expected_tokens = preprocessor.Lexer('"foo(\\"4 + 5\\")"').tokenize() @@ -90,7 +90,7 @@ def test_stringify_ws(self): macro = preprocessor.macro_from_definition_string(test_str) to_expand_str = r'STR(L + 2-2 "\" \n")' tokens = preprocessor.Lexer(to_expand_str).tokenize() - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") p._definitions = {macro.name: macro} expanded_tokens = preprocessor.MacroExpander(p).expand(tokens) expected_str = r'TEST "L + 2-2 \"\\\" \\n\""' @@ -104,7 +104,7 @@ def test_stringify_nested(self): mac_xstr = preprocessor.macro_from_definition_string("xstr(s)=str(s)") mac_str = preprocessor.macro_from_definition_string("str(s)=#s") mac_def = preprocessor.macro_from_definition_string("foo=4") - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") p._definitions = {x.name: x for x in [mac_xstr, mac_str, mac_def]} tokens = preprocessor.Lexer("str(foo)").tokenize() @@ -149,7 +149,7 @@ def test_variadic(self): tokens = preprocessor.Lexer( 'eprintf("%d, %f, %e", a, b, c)', ).tokenize() - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") p._definitions = {macro.name: macro} expanded_tokens = preprocessor.MacroExpander(p).expand(tokens) self.assertTrue(len(expanded_tokens) == len(expected_expansion)) @@ -173,7 +173,7 @@ def test_self_reference_macros_1(self): def_string = "FOO=(4 + FOO)" macro = preprocessor.macro_from_definition_string(def_string) tokens = preprocessor.Lexer("FOO").tokenize() - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") p._definitions = {macro.name: macro} expanded_tokens = preprocessor.MacroExpander(p).expand(tokens) self.assertTrue(len(expanded_tokens) == len(expected_expansion)) @@ -202,7 +202,7 @@ def test_self_reference_macros_2(self): def_string = "FOO=FOO" macro = preprocessor.macro_from_definition_string(def_string) tokens = preprocessor.Lexer("FOO").tokenize() - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") p._definitions = {macro.name: macro} expanded_tokens = preprocessor.MacroExpander(p).expand(tokens) self.assertTrue(len(expanded_tokens) == len(expected_expansion)) @@ -227,7 +227,7 @@ def test_self_reference_macros_3(self): def_string = "foo(x)=bar x" macro = preprocessor.macro_from_definition_string(def_string) tokens = preprocessor.Lexer("foo(foo) (2)").tokenize() - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") p._definitions = {macro.name: macro} expanded_tokens = preprocessor.MacroExpander(p).expand(tokens) expected_tokens = preprocessor.Lexer("bar foo (2)").tokenize() @@ -271,7 +271,7 @@ def test_indirect_self_reference_macros(self): x_tokens = preprocessor.Lexer("x").tokenize() y_tokens = preprocessor.Lexer("y").tokenize() - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") p._definitions = {x_macro.name: x_macro, y_macro.name: y_macro} x_expanded_tokens = preprocessor.MacroExpander(p).expand(x_tokens) diff --git a/tests/operators/test_operators.py b/tests/operators/test_operators.py index 59aa008..161eba8 100644 --- a/tests/operators/test_operators.py +++ b/tests/operators/test_operators.py @@ -6,7 +6,7 @@ from pathlib import Path from codebasin import CodeBase, finder, preprocessor -from codebasin.preprocessor import Platform +from codebasin.preprocessor import Preprocessor class TestOperators(unittest.TestCase): @@ -53,7 +53,7 @@ def test_operators(self): def test_paths(self): input_str = r"FUNCTION(looks/2like/a/path/with_/bad%%identifiers)" tokens = preprocessor.Lexer(input_str).tokenize() - p = Platform("Test", self.rootdir) + p = Preprocessor(platform_name="Test") macro = preprocessor.macro_from_definition_string("FUNCTION(x)=#x") p._definitions = {macro.name: macro} _ = preprocessor.MacroExpander(p).expand(tokens) diff --git a/tests/preprocessor/test_preprocessor.py b/tests/preprocessor/test_preprocessor.py new file mode 100644 index 0000000..8c9f1bf --- /dev/null +++ b/tests/preprocessor/test_preprocessor.py @@ -0,0 +1,99 @@ +# Copyright (C) 2019-2024 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause + +import logging +import unittest +from pathlib import Path + +import codebasin +from codebasin.preprocessor import Identifier, Preprocessor + + +class TestPreprocessor(unittest.TestCase): + """ + Test Preprocessor class. + """ + + @classmethod + def setUpClass(self): + logging.disable() + + def test_constructor(self): + """Check arguments are handled correctly""" + preprocessor = Preprocessor( + platform_name="name", + include_paths=["/path/to/include"], + defines=["MACRO"], + ) + self.assertEqual(preprocessor.platform_name, "name") + self.assertCountEqual( + preprocessor._include_paths, + [Path("/path/to/include")], + ) + + macro = codebasin.preprocessor.macro_from_definition_string("MACRO") + self.assertCountEqual(preprocessor._definitions, {"MACRO": macro}) + + def test_constructor_validation(self): + """Check arguments are valid""" + + with self.assertRaises(TypeError): + Preprocessor(platform_name=1) + + with self.assertRaises(TypeError): + Preprocessor(include_paths="/not/a/list") + + with self.assertRaises(TypeError): + Preprocessor(include_paths=1) + + with self.assertRaises(TypeError): + Preprocessor(include_paths=["/path/to/include", 1]) + + with self.assertRaises(TypeError): + Preprocessor(defines="NOT_A_LIST") + + with self.assertRaises(TypeError): + Preprocessor(defines=1) + + with self.assertRaises(TypeError): + Preprocessor(defines=["MACRO", 1]) + + def test_define(self): + """Check implementation of define""" + macro = codebasin.preprocessor.macro_from_definition_string("MACRO=x") + identifier = Identifier("Unknown", -1, False, "MACRO") + + preprocessor = Preprocessor() + self.assertFalse(preprocessor.has_macro(identifier)) + self.assertIsNone(preprocessor.get_macro(identifier)) + + preprocessor.define(macro) + self.assertTrue(preprocessor.has_macro(identifier)) + self.assertEqual(preprocessor.get_macro(identifier), macro) + + def test_undefine(self): + """Check implementation of undefine""" + macro = codebasin.preprocessor.macro_from_definition_string("MACRO=x") + identifier = Identifier("Unknown", -1, False, "MACRO") + + preprocessor = Preprocessor() + preprocessor.define(macro) + preprocessor.undefine(identifier) + + self.assertFalse(preprocessor.has_macro(identifier)) + self.assertIsNone(preprocessor.get_macro(identifier)) + + def test_get_file_info(self): + """Check implementation of get_file_info""" + preprocessor = Preprocessor() + + info = preprocessor.get_file_info("filename") + self.assertFalse(info.is_include_once) + + preprocessor.get_file_info("filename").is_include_once = True + info = preprocessor.get_file_info("filename") + self.assertTrue(info.is_include_once) + + +if __name__ == "__main__": + unittest.main()