Skip to content

Commit cb3d58e

Browse files
committed
add get_valid_identifier
1 parent aea01d4 commit cb3d58e

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed

tools/generated_classes.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from typing import Any, Union
2+
3+
class AggregateExpression:
4+
def __init__(self, agg: str, label: str = None):
5+
self.agg = agg
6+
self.label = label
7+
8+
9+
class AggregateTransform:
10+
def __init__(self, value: Union["Argmax", "Argmin", "Avg", "Count", "Max", "Min", "First", "Last", "Median", "Mode", "Product", "Quantile", "Stddev", "StddevPop", "Sum", "Variance", "VarPop"]):
11+
self.value = value

tools/schema_wrapper/utils.py

+64-1
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,67 @@
3737
"boolean": "bool",
3838
"array": "list",
3939
"null": "None",
40-
}
40+
}
41+
42+
43+
def get_valid_identifier(
44+
prop: str,
45+
replacement_character: str = "",
46+
allow_unicode: bool = False,
47+
url_decode: bool = True,
48+
) -> str:
49+
"""
50+
Given a string property, generate a valid Python identifier.
51+
52+
Parameters
53+
----------
54+
prop: string
55+
Name of property to decode.
56+
replacement_character: string, default ''
57+
The character to replace invalid characters with.
58+
allow_unicode: boolean, default False
59+
If True, then allow Python 3-style unicode identifiers.
60+
url_decode: boolean, default True
61+
If True, decode URL characters in identifier names.
62+
63+
Examples
64+
--------
65+
>>> get_valid_identifier("my-var")
66+
'myvar'
67+
68+
>>> get_valid_identifier("if")
69+
'if_'
70+
71+
>>> get_valid_identifier("$schema", "_")
72+
'_schema'
73+
74+
>>> get_valid_identifier("$*#$")
75+
'_'
76+
77+
>>> get_valid_identifier("Name%3Cstring%3E")
78+
'Namestring'
79+
"""
80+
# Decode URL characters.
81+
if url_decode:
82+
prop = urllib.parse.unquote(prop)
83+
84+
# Deal with []
85+
prop = prop.replace("[]", "Array")
86+
87+
# First substitute-out all non-valid characters.
88+
flags = re.UNICODE if allow_unicode else re.ASCII
89+
valid = re.sub(r"\W", replacement_character, prop, flags=flags)
90+
91+
# If nothing is left, use just an underscore
92+
if not valid:
93+
valid = "_"
94+
95+
# first character must be a non-digit. Prefix with an underscore
96+
# if needed
97+
if re.match(r"^[\d\W]", valid):
98+
valid = "_" + valid
99+
100+
# if the result is a reserved keyword, then add an underscore at the end
101+
if keyword.iskeyword(valid):
102+
valid += "_"
103+
return valid

0 commit comments

Comments
 (0)