Skip to content

Commit 21b13a6

Browse files
committed
feat: add derivation expression evaluator
1 parent c6a5fcf commit 21b13a6

14 files changed

+3939
-1
lines changed

.devcontainer/Dockerfile

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
FROM mcr.microsoft.com/vscode/devcontainers/python:3.10-buster
2+
USER vscode
3+
RUN curl -s "https://get.sdkman.io" | bash
4+
SHELL ["/bin/bash", "-c"]
5+
RUN source "/home/vscode/.sdkman/bin/sdkman-init.sh" && sdk install java 20.0.2-graalce
6+
RUN mkdir -p ~/lib && cd ~/lib && curl -L -O http://www.antlr.org/download/antlr-4.13.1-complete.jar
7+
ENV ANTLR_JAR="~/lib/antlr-4.13.1-complete.jar"
8+
USER root

.devcontainer/devcontainer.json

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"name": "substrait-python-devcontainer",
3+
"build": {
4+
"context": "..",
5+
"dockerfile": "Dockerfile"
6+
},
7+
8+
// Features to add to the dev container. More info: https://containers.dev/features.
9+
// "features": {
10+
// "ghcr.io/devcontainers/features/nix:1": {}
11+
// },
12+
13+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
14+
// "forwardPorts": [],
15+
16+
// Use 'postCreateCommand' to run commands after the container is created.
17+
// "postCreateCommand": "poetry install"
18+
19+
// Configure tool-specific properties.
20+
// "customizations": {},
21+
22+
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
23+
// "remoteUser": "root"
24+
}

Makefile

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
antlr:
2+
java -jar ${ANTLR_JAR} -o src/substrait/gen/antlr -Dlanguage=Python3 SubstraitType.g4

SubstraitType.g4

+209
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
grammar SubstraitType;
2+
3+
//
4+
fragment A : [aA];
5+
fragment B : [bB];
6+
fragment C : [cC];
7+
fragment D : [dD];
8+
fragment E : [eE];
9+
fragment F : [fF];
10+
fragment G : [gG];
11+
fragment H : [hH];
12+
fragment I : [iI];
13+
fragment J : [jJ];
14+
fragment K : [kK];
15+
fragment L : [lL];
16+
fragment M : [mM];
17+
fragment N : [nN];
18+
fragment O : [oO];
19+
fragment P : [pP];
20+
fragment Q : [qQ];
21+
fragment R : [rR];
22+
fragment S : [sS];
23+
fragment T : [tT];
24+
fragment U : [uU];
25+
fragment V : [vV];
26+
fragment W : [wW];
27+
fragment X : [xX];
28+
fragment Y : [yY];
29+
fragment Z : [zZ];
30+
31+
32+
If : I F;
33+
Then : T H E N;
34+
Else : E L S E;
35+
36+
// TYPES
37+
Boolean : B O O L E A N;
38+
I8 : I '8';
39+
I16 : I '16';
40+
I32 : I '32';
41+
I64 : I '64';
42+
FP32 : F P '32';
43+
FP64 : F P '64';
44+
String : S T R I N G;
45+
Binary : B I N A R Y;
46+
Timestamp: T I M E S T A M P;
47+
TimestampTZ: T I M E S T A M P '_' T Z;
48+
Date : D A T E;
49+
Time : T I M E;
50+
IntervalYear: I N T E R V A L '_' Y E A R;
51+
IntervalDay: I N T E R V A L '_' D A Y;
52+
IntervalCompound: I N T E R V A L '_' C O M P O U N D;
53+
UUID : U U I D;
54+
Decimal : D E C I M A L;
55+
PrecisionTimestamp: P R E C I S I O N '_' T I M E S T A M P;
56+
PrecisionTimestampTZ: P R E C I S I O N '_' T I M E S T A M P '_' T Z;
57+
FixedChar: F I X E D C H A R;
58+
VarChar : V A R C H A R;
59+
FixedBinary: F I X E D B I N A R Y;
60+
Struct : S T R U C T;
61+
NStruct : N S T R U C T;
62+
List : L I S T;
63+
Map : M A P;
64+
ANY : A N Y;
65+
UserDefined: U '!';
66+
67+
68+
// OPERATIONS
69+
And : A N D;
70+
Or : O R;
71+
Assign : ':=';
72+
73+
// COMPARE
74+
Eq : '=';
75+
NotEquals: '!=';
76+
Gte : '>=';
77+
Lte : '<=';
78+
Gt : '>';
79+
Lt : '<';
80+
Bang : '!';
81+
82+
83+
// MATH
84+
Plus : '+';
85+
Minus : '-';
86+
Asterisk : '*';
87+
ForwardSlash : '/';
88+
Percent : '%';
89+
90+
// ORGANIZE
91+
OBracket : '[';
92+
CBracket : ']';
93+
OParen : '(';
94+
CParen : ')';
95+
SColon : ';';
96+
Comma : ',';
97+
QMark : '?';
98+
Colon : ':';
99+
SingleQuote: '\'';
100+
101+
102+
Number
103+
: '-'? Int
104+
;
105+
106+
Identifier
107+
: ('a'..'z' | 'A'..'Z' | '_' | '$') ('a'..'z' | 'A'..'Z' | '_' | '$' | Digit)*
108+
;
109+
110+
LineComment
111+
: '//' ~[\r\n]* -> channel(HIDDEN)
112+
;
113+
114+
BlockComment
115+
: ( '/*'
116+
( '/'* BlockComment
117+
| ~[/*]
118+
| '/'+ ~[/*]
119+
| '*'+ ~[/*]
120+
)*
121+
'*'*
122+
'*/'
123+
) -> channel(HIDDEN)
124+
;
125+
126+
Whitespace
127+
: [ \t]+ -> channel(HIDDEN)
128+
;
129+
130+
Newline
131+
: ( '\r' '\n'?
132+
| '\n'
133+
)
134+
;
135+
136+
137+
fragment Int
138+
: '1'..'9' Digit*
139+
| '0'
140+
;
141+
142+
fragment Digit
143+
: '0'..'9'
144+
;
145+
146+
start: expr EOF;
147+
148+
scalarType
149+
: Boolean #Boolean
150+
| I8 #i8
151+
| I16 #i16
152+
| I32 #i32
153+
| I64 #i64
154+
| FP32 #fp32
155+
| FP64 #fp64
156+
| String #string
157+
| Binary #binary
158+
| Timestamp #timestamp
159+
| TimestampTZ #timestampTz
160+
| Date #date
161+
| Time #time
162+
| IntervalYear #intervalYear
163+
| UUID #uuid
164+
| UserDefined Identifier #userDefined
165+
;
166+
167+
parameterizedType
168+
: FixedChar isnull='?'? Lt len=numericParameter Gt #fixedChar
169+
| VarChar isnull='?'? Lt len=numericParameter Gt #varChar
170+
| FixedBinary isnull='?'? Lt len=numericParameter Gt #fixedBinary
171+
| Decimal isnull='?'? Lt precision=numericParameter Comma scale=numericParameter Gt #decimal
172+
| IntervalDay isnull='?'? Lt precision=numericParameter Gt #intervalDay
173+
| IntervalCompound isnull='?'? Lt precision=numericParameter Gt #intervalCompound
174+
| PrecisionTimestamp isnull='?'? Lt precision=numericParameter Gt #precisionTimestamp
175+
| PrecisionTimestampTZ isnull='?'? Lt precision=numericParameter Gt #precisionTimestampTZ
176+
| Struct isnull='?'? Lt expr (Comma expr)* Gt #struct
177+
| NStruct isnull='?'? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct
178+
| List isnull='?'? Lt expr Gt #list
179+
| Map isnull='?'? Lt key=expr Comma value=expr Gt #map
180+
;
181+
182+
numericParameter
183+
: Number #numericLiteral
184+
| Identifier #numericParameterName
185+
| expr #numericExpression
186+
;
187+
188+
anyType: ANY;
189+
190+
type
191+
: scalarType isnull='?'?
192+
| parameterizedType
193+
| anyType isnull='?'?
194+
;
195+
196+
// : (OParen innerExpr CParen | innerExpr)
197+
198+
expr
199+
: OParen expr CParen #ParenExpression
200+
| Identifier Eq expr Newline+ (Identifier Eq expr Newline+)* finalType=type Newline* #MultilineDefinition
201+
| type #TypeLiteral
202+
| number=Number #LiteralNumber
203+
| identifier=Identifier isnull='?'? #TypeParam
204+
| Identifier OParen (expr (Comma expr)*)? CParen #FunctionCall
205+
| left=expr op=(And | Or | Plus | Minus | Lt | Gt | Eq | NotEquals | Lte | Gte | Asterisk | ForwardSlash) right=expr #BinaryExpr
206+
| If ifExpr=expr Then thenExpr=expr Else elseExpr=expr #IfExpr
207+
| (Bang) expr #NotExpr
208+
| ifExpr=expr QMark thenExpr=expr Colon elseExpr=expr #Ternary
209+
;

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ authors = [{name = "Substrait contributors", email = "[email protected]
55
license = {text = "Apache-2.0"}
66
readme = "README.md"
77
requires-python = ">=3.8.1"
8-
dependencies = ["protobuf >= 3.20"]
8+
dependencies = ["protobuf >= 3.20", "antlr4-python3-runtime"]
99
dynamic = ["version"]
1010

1111
[tool.setuptools_scm]
+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
from typing import Optional
2+
from antlr4 import InputStream, CommonTokenStream
3+
from substrait.gen.antlr.SubstraitTypeLexer import SubstraitTypeLexer
4+
from substrait.gen.antlr.SubstraitTypeParser import SubstraitTypeParser
5+
from substrait.gen.proto.type_pb2 import Type
6+
7+
8+
def _evaluate(x, values: dict):
9+
if type(x) == SubstraitTypeParser.BinaryExprContext:
10+
left = _evaluate(x.left, values)
11+
right = _evaluate(x.right, values)
12+
13+
if x.op.text == "+":
14+
return left + right
15+
elif x.op.text == "-":
16+
return left - right
17+
elif x.op.text == "*":
18+
return left * right
19+
elif x.op.text == ">":
20+
return left > right
21+
elif x.op.text == ">=":
22+
return left >= right
23+
elif x.op.text == "<":
24+
return left < right
25+
elif x.op.text == "<=":
26+
return left <= right
27+
else:
28+
raise Exception(f"Unknown binary op {x.op.text}")
29+
elif type(x) == SubstraitTypeParser.LiteralNumberContext:
30+
return int(x.number.text)
31+
elif type(x) == SubstraitTypeParser.TypeParamContext:
32+
return values[x.identifier.text]
33+
elif type(x) == SubstraitTypeParser.NumericParameterNameContext:
34+
return values[x.Identifier().symbol.text]
35+
elif type(x) == SubstraitTypeParser.ParenExpressionContext:
36+
return _evaluate(x.expr(), values)
37+
elif type(x) == SubstraitTypeParser.FunctionCallContext:
38+
exprs = [_evaluate(e, values) for e in x.expr()]
39+
func = x.Identifier().symbol.text
40+
41+
if func == "min":
42+
return min(*exprs)
43+
elif func == "max":
44+
return max(*exprs)
45+
else:
46+
raise Exception(f"Unknown function {func}")
47+
elif type(x) == SubstraitTypeParser.TypeContext:
48+
scalar_type = x.scalarType()
49+
parametrized_type = x.parameterizedType()
50+
if scalar_type:
51+
if isinstance(scalar_type, SubstraitTypeParser.I8Context):
52+
return Type(i8=Type.I8())
53+
elif isinstance(scalar_type, SubstraitTypeParser.I16Context):
54+
return Type(i16=Type.I16())
55+
elif isinstance(scalar_type, SubstraitTypeParser.I32Context):
56+
return Type(i32=Type.I32())
57+
elif isinstance(scalar_type, SubstraitTypeParser.I64Context):
58+
return Type(i64=Type.I64())
59+
elif isinstance(scalar_type, SubstraitTypeParser.Fp32Context):
60+
return Type(fp32=Type.FP32())
61+
elif isinstance(scalar_type, SubstraitTypeParser.Fp64Context):
62+
return Type(fp64=Type.FP64())
63+
elif isinstance(scalar_type, SubstraitTypeParser.BooleanContext):
64+
return Type(bool=Type.Boolean())
65+
else:
66+
raise Exception(f"Unknown scalar type {type(scalar_type)}")
67+
elif parametrized_type:
68+
if isinstance(parametrized_type, SubstraitTypeParser.DecimalContext):
69+
precision = _evaluate(parametrized_type.precision, values)
70+
scale = _evaluate(parametrized_type.scale, values)
71+
return Type(decimal=Type.Decimal(precision=precision, scale=scale))
72+
raise Exception(f"Unknown parametrized type {type(parametrized_type)}")
73+
else:
74+
raise Exception("either scalar_type or parametrized_type is required")
75+
elif type(x) == SubstraitTypeParser.NumericExpressionContext:
76+
return _evaluate(x.expr(), values)
77+
elif type(x) == SubstraitTypeParser.TernaryContext:
78+
ifExpr = _evaluate(x.ifExpr, values)
79+
thenExpr = _evaluate(x.thenExpr, values)
80+
elseExpr = _evaluate(x.elseExpr, values)
81+
82+
return thenExpr if ifExpr else elseExpr
83+
elif type(x) == SubstraitTypeParser.MultilineDefinitionContext:
84+
lines = zip(x.Identifier(), x.expr())
85+
86+
for i, e in lines:
87+
identifier = i.symbol.text
88+
expr_eval = _evaluate(e, values)
89+
values[identifier] = expr_eval
90+
91+
return _evaluate(x.finalType, values)
92+
elif type(x) == SubstraitTypeParser.TypeLiteralContext:
93+
return _evaluate(x.type_(), values)
94+
else:
95+
raise Exception(f"Unknown token type {type(x)}")
96+
97+
98+
def evaluate(x: str, values: Optional[dict] = None):
99+
lexer = SubstraitTypeLexer(InputStream(x))
100+
stream = CommonTokenStream(lexer)
101+
parser = SubstraitTypeParser(stream)
102+
return _evaluate(parser.expr(), values)

0 commit comments

Comments
 (0)