@@ -127,7 +127,7 @@ def convert_bigquery_schema(
127127 return data_contract_specification
128128
129129
130- def import_table_fields (table_fields ):
130+ def import_table_fields (table_fields , in_array = False ):
131131 imported_fields = {}
132132 for field in table_fields :
133133 field_name = field .get ("name" )
@@ -136,20 +136,38 @@ def import_table_fields(table_fields):
136136 imported_fields [field_name ].description = field .get ("description" )
137137
138138 if field .get ("type" ) == "RECORD" :
139- imported_fields [field_name ].type = "object"
140- imported_fields [field_name ].fields = import_table_fields (field .get ("fields" ))
139+ if field .get ("mode" ) == "REPEATED" :
140+ imported_fields [field_name ].type = "array"
141+ imported_fields [field_name ].items = Field (
142+ type = "record" , fields = import_table_fields (field .get ("fields" ), in_array = True ))
143+ else :
144+ imported_fields [field_name ].type = "object"
145+ imported_fields [field_name ].fields = import_table_fields (field .get ("fields" ), in_array = True )
146+
141147 elif field .get ("type" ) == "STRUCT" :
142148 imported_fields [field_name ].type = "struct"
143- imported_fields [field_name ].fields = import_table_fields (field .get ("fields" ))
149+ imported_fields [field_name ].fields = import_table_fields (field .get ("fields" ), in_array = in_array )
144150 elif field .get ("type" ) == "RANGE" :
145151 # This is a range of date/datetime/timestamp but multiple values
146152 # So we map it to an array
147153 imported_fields [field_name ].type = "array"
148154 imported_fields [field_name ].items = Field (
149- type = map_type_from_bigquery (field ["rangeElementType" ].get ("type" ))
155+ type = map_type_from_bigquery (field ["rangeElementType" ].get ("type" ), in_array = True )
150156 )
151- else : # primitive type
152- imported_fields [field_name ].type = map_type_from_bigquery (field .get ("type" ))
157+ elif field .get ("type" ) == "GEOGRAPHY" :
158+ imported_fields [field_name ].type = map_type_from_bigquery (field .get ("type" ), in_array = in_array )
159+ imported_fields [field_name ].config = {"bigqueryType" : "GEOGRAPHY" }
160+ elif field .get ("type" ) == "JSON" :
161+ imported_fields [field_name ].type = map_type_from_bigquery (field .get ("type" ), in_array = in_array )
162+ imported_fields [field_name ].config = {"bigqueryType" : "JSON" }
163+
164+ else :
165+ if field .get ("type" ) == "REPEATED" : # not a type record meaning type ARRAY<STRING> ARRAY<INTEGER>
166+ imported_fields [field_name ].type = "array"
167+ imported_fields [field_name ].items = Field (
168+ type = map_type_from_bigquery (field .get ("type" ), in_array = True ))
169+ else : # primitive type
170+ imported_fields [field_name ].type = map_type_from_bigquery (field .get ("type" ), in_array = in_array )
153171
154172 if field .get ("type" ) == "STRING" :
155173 # in bigquery both string and bytes have maxLength but in the datacontracts
@@ -167,13 +185,13 @@ def import_table_fields(table_fields):
167185 return imported_fields
168186
169187
170- def map_type_from_bigquery (bigquery_type_str : str ):
188+ def map_type_from_bigquery (bigquery_type_str : str , in_array = False ):
171189 if bigquery_type_str == "STRING" :
172190 return "string"
173191 elif bigquery_type_str == "BYTES" :
174192 return "bytes"
175193 elif bigquery_type_str == "INTEGER" :
176- return "int"
194+ return "bigint" if in_array else " int"
177195 elif bigquery_type_str == "INT64" :
178196 return "bigint"
179197 elif bigquery_type_str == "FLOAT" :
@@ -187,9 +205,9 @@ def map_type_from_bigquery(bigquery_type_str: str):
187205 elif bigquery_type_str == "DATE" :
188206 return "date"
189207 elif bigquery_type_str == "TIME" :
190- return "timestamp_ntz "
208+ return "timestamp_tz "
191209 elif bigquery_type_str == "DATETIME" :
192- return "timestamp "
210+ return "timestamp_ntz "
193211 elif bigquery_type_str == "NUMERIC" :
194212 return "numeric"
195213 elif bigquery_type_str == "BIGNUMERIC" :
0 commit comments