Skip to content

Commit 148a2d5

Browse files
committed
Switch to new SQLAlchemy dialect for CrateDB
This includes the `FloatVector` SQLAlchemy type.
1 parent 3a00d8f commit 148a2d5

File tree

6 files changed

+30
-163
lines changed

6 files changed

+30
-163
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## In progress
44
- Add support for container types `ARRAY`, `OBJECT`, and `FLOAT_VECTOR`.
55
- Improve write operations to be closer to `target-postgres`.
6+
- Switch to new SQLAlchemy dialect for CrateDB.
67

78
## 2023-12-08 v0.0.1
89
- Make it work. It can run the canonical Meltano GitHub -> DB example.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ dynamic = [
8989
"version",
9090
]
9191
dependencies = [
92-
"crate[sqlalchemy]<1",
9392
"cratedb-toolkit",
9493
"importlib-resources; python_version<'3.9'", # "meltanolabs-target-postgres==0.0.9",
9594
"meltanolabs-target-postgres @ git+https://github.com/singer-contrib/meltanolabs-target-postgres.git@pgvector",
95+
"sqlalchemy-cratedb[vector]",
9696
]
9797
optional-dependencies.all = [
9898
"meltano-target-cratedb[vector]",

target_cratedb/connector.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
from datetime import datetime
88

99
import sqlalchemy as sa
10-
from crate.client.sqlalchemy.types import ObjectType, ObjectTypeImpl, _ObjectArray
1110
from singer_sdk import typing as th
1211
from singer_sdk.helpers._typing import is_array_type, is_boolean_type, is_integer_type, is_number_type, is_object_type
12+
from sqlalchemy_cratedb.type import FloatVector, ObjectType
13+
from sqlalchemy_cratedb.type.array import _ObjectArray
14+
from sqlalchemy_cratedb.type.object import ObjectTypeImpl
1315
from target_postgres.connector import NOTYPE, PostgresConnector
1416

1517
from target_cratedb.sqlalchemy.patch import polyfill_refresh_after_dml_engine
16-
from target_cratedb.sqlalchemy.vector import FloatVector
1718

1819

1920
class CrateDBConnector(PostgresConnector):
@@ -226,6 +227,9 @@ def _get_type_sort_key(
226227
if isinstance(sql_type, NOTYPE):
227228
return 0, _len
228229

230+
if not hasattr(sql_type, "python_type"):
231+
raise TypeError(f"Resolving type for sort key failed: {sql_type}")
232+
229233
_pytype = t.cast(type, sql_type.python_type)
230234
if issubclass(_pytype, (str, bytes)):
231235
return 900, _len

target_cratedb/sqlalchemy/patch.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from _decimal import Decimal
22
from datetime import datetime
3+
from typing import Any, Union
34

5+
import crate.client.http
46
import sqlalchemy as sa
5-
from crate.client.http import CrateJsonEncoder
6-
from crate.client.sqlalchemy.dialect import ARRAY, TYPES_MAP, DateTime
7-
from crate.client.sqlalchemy.types import _ObjectArray
8-
from sqlalchemy.sql import sqltypes
7+
from sqlalchemy_cratedb.dialect import TYPES_MAP, DateTime
8+
from sqlalchemy_cratedb.type.array import _ObjectArray
99

1010

1111
def patch_sqlalchemy():
@@ -19,20 +19,21 @@ def patch_types():
1919
2020
TODO: Upstream to crate-python.
2121
"""
22-
TYPES_MAP["bigint"] = sqltypes.BIGINT
23-
TYPES_MAP["bigint_array"] = ARRAY(sqltypes.BIGINT)
24-
TYPES_MAP["long"] = sqltypes.BIGINT
25-
TYPES_MAP["long_array"] = ARRAY(sqltypes.BIGINT)
26-
TYPES_MAP["real"] = sqltypes.DOUBLE
27-
TYPES_MAP["real_array"] = ARRAY(sqltypes.DOUBLE)
28-
TYPES_MAP["timestamp without time zone"] = sqltypes.TIMESTAMP
29-
TYPES_MAP["timestamp with time zone"] = sqltypes.TIMESTAMP
22+
# abc()
23+
TYPES_MAP["bigint"] = sa.BIGINT
24+
TYPES_MAP["bigint_array"] = sa.ARRAY(sa.BIGINT)
25+
TYPES_MAP["long"] = sa.BIGINT
26+
TYPES_MAP["long_array"] = sa.ARRAY(sa.BIGINT)
27+
TYPES_MAP["real"] = sa.DOUBLE
28+
TYPES_MAP["real_array"] = sa.ARRAY(sa.DOUBLE)
29+
TYPES_MAP["timestamp without time zone"] = sa.TIMESTAMP
30+
TYPES_MAP["timestamp with time zone"] = sa.TIMESTAMP
3031

3132
# TODO: Can `ARRAY` be inherited from PostgreSQL's
3233
# `ARRAY`, to make type checking work?
3334

3435
def as_generic(self, allow_nulltype: bool = False):
35-
return sqltypes.ARRAY
36+
return sa.ARRAY
3637

3738
_ObjectArray.as_generic = as_generic
3839

@@ -58,14 +59,14 @@ def patch_json_encoder():
5859
TODO: Upstream to crate-python.
5960
"""
6061

61-
json_encoder_default = CrateJsonEncoder.default
62+
json_encoder_default = crate.client.http.json_encoder
6263

63-
def default(self, o):
64-
if isinstance(o, Decimal):
65-
return float(o)
66-
return json_encoder_default(o)
64+
def json_encoder_new(obj: Any) -> Union[int, str, float]:
65+
if isinstance(obj, Decimal):
66+
return float(obj)
67+
return json_encoder_default(obj)
6768

68-
CrateJsonEncoder.default = default
69+
crate.client.http.json_encoder = json_encoder_new
6970

7071

7172
def polyfill_refresh_after_dml_engine(engine: sa.Engine):

target_cratedb/sqlalchemy/vector.py

Lines changed: 0 additions & 139 deletions
This file was deleted.

target_cratedb/tests/test_standard_target.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
import jsonschema
1010
import pytest
1111
import sqlalchemy as sa
12-
from crate.client.sqlalchemy.types import ObjectTypeImpl
1312
from singer_sdk.exceptions import MissingKeyPropertiesError
1413
from singer_sdk.testing import sync_end_to_end
14+
from sqlalchemy_cratedb.type import FloatVector
15+
from sqlalchemy_cratedb.type.object import ObjectTypeImpl
1516
from target_postgres.tests.samples.aapl.aapl import Fundamentals
1617
from target_postgres.tests.samples.sample_tap_countries.countries_tap import (
1718
SampleTapCountries,
@@ -21,7 +22,6 @@
2122
from target_cratedb.connector import CrateDBConnector
2223
from target_cratedb.sinks import MELTANO_CRATEDB_STRATEGY_DIRECT
2324
from target_cratedb.sqlalchemy.patch import polyfill_refresh_after_dml_engine
24-
from target_cratedb.sqlalchemy.vector import FloatVector
2525
from target_cratedb.target import TargetCrateDB
2626

2727
try:

0 commit comments

Comments
 (0)