Skip to content

Enforce header entity length in grammar #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
554 changes: 10 additions & 544 deletions __init__.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions __main__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import sys
import json
import argparse
from . import parse, ValidationError
from . import parse, CollectedValidationErrors

def main():
parser = argparse.ArgumentParser(description="Parse and validate STEP file.")
Expand All @@ -22,11 +22,11 @@ def main():
if not args.json:
print("Valid", file=sys.stderr)
exit(0)
except ValidationError as exc:
except CollectedValidationErrors as exc:
if not args.json:
print(exc, file=sys.stderr)
else:
json.dump(exc.asdict(), sys.stdout)
json.dump([e.asdict() for e in exc.errors], sys.stdout, indent=2)
exit(1)

if __name__ == '__main__':
Expand Down
29 changes: 29 additions & 0 deletions fixtures/fail_multiple_duplicate_ids.ifc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
ISO-10303-21;
HEADER;
FILE_DESCRIPTION(('ViewDefinition [CoordinationView]'),'2;1');
FILE_NAME('','2022-05-04T08:08:30',(''),(''),'IfcOpenShell-0.7.0','IfcOpenShell-0.7.0','');
FILE_SCHEMA(('IFC4'));
ENDSEC;
DATA;
#1=IFCPERSON($,$,'',$,$,$,$,$);
#2=IFCORGANIZATION($,'',$,$,$);
#3=IFCPERSONANDORGANIZATION(#1,#2,$);
#4=IFCAPPLICATION(#2,'0.7.0','IfcOpenShell-0.7.0','');
#5=IFCOWNERHISTORY(#3,#4,$,.ADDED.,$,#3,#4,1651651710);
#6=IFCDIRECTION((1.,0.,0.));
#7=IFCDIRECTION((0.,0.,1.));
#8=IFCCARTESIANPOINT((0.,0.,0.));
#9=IFCAXIS2PLACEMENT3D(#8,#7,#6);
#10=IFCDIRECTION((0.,1.,0.));
#11=IFCGEOMETRICREPRESENTATIONCONTEXT($,'Model',3,1.E-05,#9,#10);
#12=IFCDIMENSIONALEXPONENTS(0,0,0,0,0,0,0);
#13=IFCSIUNIT(*,.LENGTHUNIT.,$,.METRE.);
#14=IFCSIUNIT(*,.AREAUNIT.,$,.SQUARE_METRE.);
#15=IFCSIUNIT(*,.VOLUMEUNIT.,$,.CUBIC_METRE.);
#16=IFCSIUNIT(*,.PLANEANGLEUNIT.,$,.RADIAN.);
#18=IFCMEASUREWITHUNIT(IFCPLANEANGLEMEASURE(0.017453292519943295),#16);
#18=IFCCONVERSIONBASEDUNIT(#12,.PLANEANGLEUNIT.,'DEGREE',#17);
#19=IFCUNITASSIGNMENT((#13,#14,#15,#18));
#19=IFCPROJECT('2AyG2X0sb16Bjd4gQc07yZ',#5,'',$,$,$,$,(#11),#19);
ENDSEC;
END-ISO-10303-21;
30 changes: 30 additions & 0 deletions fixtures/fail_multiple_wrong_header_fields.ifc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ISO-10303-21;
HEADER;
FILE_DESCRIPTION(('ViewDefinition [ReferenceView_V1.2]', 'ExchangeRequirement [Any]'));
FILE_NAME('Header.ifc','2025-02-13T15:58:45',('tricott'),('Trimble Inc.'),'TrimBimToIFC rel. 4.0.2','Example - Example - 2025.0','IFC4 model', '');
FILE_SCHEMA(('IFC4'));
ENDSEC;
DATA;
#1=IFCPERSON($,$,'',$,$,$,$,$);
#2=IFCORGANIZATION($,'',$,$,$);
#3=IFCPERSONANDORGANIZATION(#1,#2,$);
#4=IFCAPPLICATION(#2,'v0.7.0-6c9e130ca','IfcOpenShell-v0.7.0-6c9e130ca','');
#5=IFCOWNERHISTORY(#3,#4,$,.NOTDEFINED.,$,#3,#4,1700419055);
#6=IFCDIRECTION((1.,0.,0.));
#7=IFCDIRECTION((0.,0.,1.));
#8=IFCCARTESIANPOINT((0.,0.,0.));
#9=IFCAXIS2PLACEMENT3D(#8,#7,#6);
#10=IFCDIRECTION((0.,1.));
#11=IFCGEOMETRICREPRESENTATIONCONTEXT($,'Model',3,1.E-05,#9,#10);
#12=IFCDIMENSIONALEXPONENTS(0,0,0,0,0,0,0);
#13=IFCSIUNIT(*,.LENGTHUNIT.,$,.METRE.);
#14=IFCSIUNIT(*,.AREAUNIT.,$,.SQUARE_METRE.);
#15=IFCSIUNIT(*,.VOLUMEUNIT.,$,.CUBIC_METRE.);
#16=IFCSIUNIT(*,.PLANEANGLEUNIT.,$,.RADIAN.);
#17=IFCMEASUREWITHUNIT(IFCPLANEANGLEMEASURE(0.017453292519943295),#16);
#18=IFCCONVERSIONBASEDUNIT(#12,.PLANEANGLEUNIT.,'DEGREE',#17);
#19=IFCUNITASSIGNMENT((#13,#14,#15,#18));
#20=IFCPROJECT('0iDmeiiLP3AOllitM2Favn',#5,'',$,$,$,$,(#11),#19);
#21=IFCSITE('3rg2jGkIH10RFhrQsGZKRk',#5,$,$,$,$,$,$,$,$,$,$,$,$);
ENDSEC;
END-ISO-10303-21;
30 changes: 30 additions & 0 deletions fixtures/fail_too_many_header_entity_fields.ifc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ISO-10303-21;
HEADER;
FILE_DESCRIPTION(('ViewDefinition [ReferenceView_V1.2]', 'ExchangeRequirement [Any]'),'2;1');
FILE_NAME('Header.ifc','2025-02-13T15:58:45',('tricott'),('Trimble Inc.'),'TrimBimToIFC rel. 4.0.2','Example - Example - 2025.0','IFC4 model', '');
FILE_SCHEMA(('IFC4'));
ENDSEC;
DATA;
#1=IFCPERSON($,$,'',$,$,$,$,$);
#2=IFCORGANIZATION($,'',$,$,$);
#3=IFCPERSONANDORGANIZATION(#1,#2,$);
#4=IFCAPPLICATION(#2,'v0.7.0-6c9e130ca','IfcOpenShell-v0.7.0-6c9e130ca','');
#5=IFCOWNERHISTORY(#3,#4,$,.NOTDEFINED.,$,#3,#4,1700419055);
#6=IFCDIRECTION((1.,0.,0.));
#7=IFCDIRECTION((0.,0.,1.));
#8=IFCCARTESIANPOINT((0.,0.,0.));
#9=IFCAXIS2PLACEMENT3D(#8,#7,#6);
#10=IFCDIRECTION((0.,1.));
#11=IFCGEOMETRICREPRESENTATIONCONTEXT($,'Model',3,1.E-05,#9,#10);
#12=IFCDIMENSIONALEXPONENTS(0,0,0,0,0,0,0);
#13=IFCSIUNIT(*,.LENGTHUNIT.,$,.METRE.);
#14=IFCSIUNIT(*,.AREAUNIT.,$,.SQUARE_METRE.);
#15=IFCSIUNIT(*,.VOLUMEUNIT.,$,.CUBIC_METRE.);
#16=IFCSIUNIT(*,.PLANEANGLEUNIT.,$,.RADIAN.);
#17=IFCMEASUREWITHUNIT(IFCPLANEANGLEMEASURE(0.017453292519943295),#16);
#18=IFCCONVERSIONBASEDUNIT(#12,.PLANEANGLEUNIT.,'DEGREE',#17);
#19=IFCUNITASSIGNMENT((#13,#14,#15,#18));
#20=IFCPROJECT('0iDmeiiLP3AOllitM2Favn',#5,'',$,$,$,$,(#11),#19);
#21=IFCSITE('3rg2jGkIH10RFhrQsGZKRk',#5,$,$,$,$,$,$,$,$,$,$,$,$);
ENDSEC;
END-ISO-10303-21;
Empty file added parser/__init__.py
Empty file.
108 changes: 108 additions & 0 deletions parser/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from lark.exceptions import UnexpectedToken

class _ValidationError(Exception):
def __init__(self, *args, **kwargs):
if self.__class__ is _ValidationError:
raise TypeError("Do not raise _ValidationError directly.")
super().__init__(*args, **kwargs)

class ErrorCollector:
def __init__(self):
self.errors = []

def add(self, error):
self.errors.append(error)

def raise_if_any(self):
if self.errors:
raise CollectedValidationErrors(self.errors)

class CollectedValidationErrors(_ValidationError):
def __init__(self, errors):
self.errors = errors

def asdict(self, with_message=True):
return [e.asdict(with_message=with_message) for e in self.errors]

def __str__(self):
return f"{len(self.errors)} validation error(s) collected:\n" + "\n\n".join(str(e) for e in self.errors)

class SyntaxError(_ValidationError):
def __init__(self, filecontent, exception):
self.filecontent = filecontent
self.exception = exception

def asdict(self, with_message=True):
return {
"type": (
"unexpected_token"
if isinstance(self.exception, UnexpectedToken)
else "unexpected_character"
),
"lineno": self.exception.line,
"column": self.exception.column,
"found_type": self.exception.token.type.lower(),
"found_value": self.exception.token.value,
"expected": sorted(x for x in self.exception.accepts if "__ANON" not in x),
"line": self.filecontent.split("\n")[self.exception.line - 1],
**({"message": str(self)} if with_message else {}),
}

def __str__(self):
d = self.asdict(with_message=False)
if len(d["expected"]) == 1:
exp = d["expected"][0]
else:
exp = f"one of {' '.join(d['expected'])}"

sth = "character" if d["type"] == "unexpected_character" else ""

return f"On line {d['lineno']} column {d['column']}:\nUnexpected {sth}{d['found_type']} ('{d['found_value']}')\nExpecting {exp}\n{d['lineno']:05d} | {d['line']}\n {' ' * (self.exception.column - 1)}^"


class DuplicateNameError(_ValidationError):
def __init__(self, filecontent, name, linenumbers):
self.name = name
self.filecontent = filecontent
self.linenumbers = linenumbers

def asdict(self, with_message=True):
return {
"type": "duplicate_name",
"name": self.name,
"lineno": self.linenumbers[0],
"line": self.filecontent.split("\n")[self.linenumbers[0] - 1],
**({"message": str(self)} if with_message else {}),
}

def __str__(self):
d = self.asdict(with_message=False)

def build():
yield f"On line {d['lineno']}:\nDuplicate instance name #{d['name']}"
yield f"{d['lineno']:05d} | {d['line']}"
yield " " * 8 + "^" * len(d["line"].rstrip())

return "\n".join(build())


class HeaderFieldError(_ValidationError):
def __init__(self, field, found_len, expected_len):
self.field = field
self.found_len = found_len
self.expected_len = expected_len

def asdict(self, with_message=True):
return {
"type": "invalid_header_field",
"field": self.field,
"expected_field_count": self.expected_len,
"actual_field_count": self.found_len,
**({"message": str(self)} if with_message else {}),
}

def __str__(self):
return (
f"Invalid number of parameters for HEADER field '{self.field}'. "
f"Expected {self.expected_len}, found {self.found_len}."
)
108 changes: 108 additions & 0 deletions parser/file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import types
import re
import numbers
import itertools

from .parse import parse, ParseResult
from .grammar import HEADER_FIELDS
from .transformer import entity_instance

try:
from .mvd_info import MvdInfo, LARK_AVAILABLE
except ImportError: # in case of running module locally (e.g. test_parser.py)
from mvd_info import MvdInfo, LARK_AVAILABLE

class file:
"""
A somewhat compatible interface (but very limited) to ifcopenshell.file
"""

def __init__(self, result:ParseResult):
self.header_ = result.header
self.data_ = result.entities

@property
def schema_identifier(self) -> str:
return self.header_["FILE_SCHEMA"][0][0]

@property
def schema(self) -> str:
"""General IFC schema version: IFC2X3, IFC4, IFC4X3."""
prefixes = ("IFC", "X", "_ADD", "_TC")
reg = "".join(f"(?P<{s}>{s}\\d+)?" for s in prefixes)
match = re.match(reg, self.schema_identifier)
version_tuple = tuple(
map(
lambda pp: int(pp[1][len(pp[0]) :]) if pp[1] else None,
((p, match.group(p)) for p in prefixes),
)
)
return "".join(
"".join(map(str, t)) if t[1] else ""
for t in zip(prefixes, version_tuple[0:2])
)

@property
def schema_version(self) -> tuple[int, int, int, int]:
"""Numeric representation of the full IFC schema version.

E.g. IFC4X3_ADD2 is represented as (4, 3, 2, 0).
"""
schema = self.wrapped_data.schema
version = []
for prefix in ("IFC", "X", "_ADD", "_TC"):
number = re.search(prefix + r"(\d)", schema)
version.append(int(number.group(1)) if number else 0)
return tuple(version)


@property
def header(self):
header = {}
for field_name, namedtuple_class in HEADER_FIELDS.items():
field_data = self.header_.get(field_name.upper(), [])
header[field_name.lower()] = namedtuple_class(*field_data)

return types.SimpleNamespace(**header)


@property
def mvd(self):
if not LARK_AVAILABLE or MvdInfo is None:
return None
return MvdInfo(self.header)

def __getitem__(self, key: numbers.Integral) -> entity_instance:
return self.by_id(key)

def by_id(self, id: int) -> entity_instance:
"""Return an IFC entity instance filtered by IFC ID.

:param id: STEP numerical identifier
:type id: int

:raises RuntimeError: If `id` is not found or multiple definitions exist for `id`.

:rtype: entity_instance
"""
ns = self.data_.get(id, [])
if len(ns) == 0:
raise RuntimeError(f"Instance with id {id} not found")
elif len(ns) > 1:
raise RuntimeError(f"Duplicate definition for id {id}")
return ns[0]

def by_type(self, type: str) -> list[entity_instance]:
"""Return IFC objects filtered by IFC Type and wrapped with the entity_instance class.
:rtype: list[entity_instance]
"""
type_lc = type.lower()
return list(
filter(
lambda ent: ent.type.lower() == type_lc,
itertools.chain.from_iterable(self.data_.values()),
)
)

def open(fn, only_header= False) -> file:
return file(parse(filename=fn, only_header=only_header))
Loading