Skip to content

Commit b8224ff

Browse files
author
Biel Llobera
committed
feat: added better connection support in catalog
1 parent 8071909 commit b8224ff

3 files changed

Lines changed: 393 additions & 3 deletions

File tree

schemas/catalog.schema.json

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@
1111
{
1212
"additionalProperties": true,
1313
"type": "object"
14+
},
15+
{
16+
"$ref": "#/$defs/LocalFsConnectionSchema"
17+
},
18+
{
19+
"$ref": "#/$defs/GoogleCloudPlatformConnectionSchema"
1420
}
1521
],
1622
"description": "Airflow connection ID or dictionary representing a connection",
@@ -189,6 +195,12 @@
189195
{
190196
"additionalProperties": true,
191197
"type": "object"
198+
},
199+
{
200+
"$ref": "#/$defs/LocalFsConnectionSchema"
201+
},
202+
{
203+
"$ref": "#/$defs/GoogleCloudPlatformConnectionSchema"
192204
}
193205
],
194206
"description": "Airflow connection ID or dictionary representing a connection",
@@ -336,6 +348,12 @@
336348
{
337349
"additionalProperties": true,
338350
"type": "object"
351+
},
352+
{
353+
"$ref": "#/$defs/LocalFsConnectionSchema"
354+
},
355+
{
356+
"$ref": "#/$defs/GoogleCloudPlatformConnectionSchema"
339357
}
340358
],
341359
"description": "Airflow connection ID or dictionary representing a connection",
@@ -480,6 +498,224 @@
480498
"title": "ExcelSaveOptions",
481499
"type": "object"
482500
},
501+
"GoogleCloudPlatformConnectionExtraSchema": {
502+
"description": "Extra options for a Google Cloud Storage\u2013style connection.\n\nExactly one of the key sources may be provided, depending on how\ncredentials are supplied.\n\nAttributes:\n keyfile_dict: Service account credentials as a mapping (already-parsed\n JSON). Useful when credentials are injected as structured data.\n keyfile: Service account credentials as a raw JSON string.\n key_path: Filesystem path to a service account key file accessible at\n runtime.",
503+
"properties": {
504+
"keyfile_dict": {
505+
"anyOf": [
506+
{
507+
"additionalProperties": true,
508+
"type": "object"
509+
},
510+
{
511+
"type": "null"
512+
}
513+
],
514+
"default": null,
515+
"description": "Service account credentials as a mapping (parsed JSON). Use when credentials are injected as structured data.",
516+
"title": "Keyfile Dict"
517+
},
518+
"keyfile": {
519+
"anyOf": [
520+
{
521+
"type": "string"
522+
},
523+
{
524+
"type": "null"
525+
}
526+
],
527+
"default": null,
528+
"description": "Service account credentials as a raw JSON string. Mutually exclusive with 'keyfile_dict' and 'key_path'.",
529+
"title": "Keyfile"
530+
},
531+
"key_path": {
532+
"anyOf": [
533+
{
534+
"type": "string"
535+
},
536+
{
537+
"type": "null"
538+
}
539+
],
540+
"default": null,
541+
"description": "Path to a service account key file on the local or worker filesystem. Mutually exclusive with 'keyfile_dict' and 'keyfile'.",
542+
"title": "Key Path"
543+
}
544+
},
545+
"title": "GoogleCloudPlatformConnectionExtraSchema",
546+
"type": "object"
547+
},
548+
"GoogleCloudPlatformConnectionSchema": {
549+
"description": "Google Cloud Storage\u2013style connection.\n\nDescribes access to objects stored in a GCS-compatible bucket. Credentials\ncan be provided via a parsed dictionary, raw JSON, or a file path.\n\nAttributes:\n conn_type: Constant discriminator for this connection type\n (``\"google_cloud_platform\"``).\n extra: Provider-specific options, including credential sources.",
550+
"properties": {
551+
"host": {
552+
"anyOf": [
553+
{
554+
"type": "string"
555+
},
556+
{
557+
"type": "null"
558+
}
559+
],
560+
"default": null,
561+
"description": "Network host or endpoint (e.g., domain name or IP). Leave unset if not applicable.",
562+
"title": "Host"
563+
},
564+
"schema": {
565+
"anyOf": [
566+
{
567+
"type": "string"
568+
},
569+
{
570+
"type": "null"
571+
}
572+
],
573+
"default": null,
574+
"description": "Logical schema, namespace, or protocol segment associated with the connection (e.g., database name, URI scheme).",
575+
"title": "Schema"
576+
},
577+
"login": {
578+
"anyOf": [
579+
{
580+
"type": "string"
581+
},
582+
{
583+
"type": "null"
584+
}
585+
],
586+
"default": null,
587+
"description": "Username or identity used for authentication.",
588+
"title": "Login"
589+
},
590+
"password": {
591+
"anyOf": [
592+
{
593+
"type": "string"
594+
},
595+
{
596+
"type": "null"
597+
}
598+
],
599+
"default": null,
600+
"description": "Secret or token used for authentication (keep secure).",
601+
"title": "Password"
602+
},
603+
"conn_type": {
604+
"const": "google_cloud_platform",
605+
"default": "google_cloud_platform",
606+
"description": "Connection type discriminator. Always \"google_cloud_platform\" for this provider.",
607+
"title": "Conn Type",
608+
"type": "string"
609+
},
610+
"extra": {
611+
"anyOf": [
612+
{
613+
"$ref": "#/$defs/GoogleCloudPlatformConnectionExtraSchema"
614+
},
615+
{
616+
"type": "null"
617+
}
618+
],
619+
"default": null,
620+
"description": "Type-specific options including credential configuration."
621+
}
622+
},
623+
"title": "GoogleCloudPlatformConnectionSchema",
624+
"type": "object"
625+
},
626+
"LocalFsConnectionExtraSchema": {
627+
"description": "Extra options for a local filesystem connection.\n\nAttributes:\n base_path: Absolute base path on the local filesystem used to resolve\n relative dataset or resource paths.",
628+
"properties": {
629+
"base_path": {
630+
"description": "Absolute base path on the local filesystem used to resolve relative dataset or resource paths.",
631+
"title": "Base Path",
632+
"type": "string"
633+
}
634+
},
635+
"required": [
636+
"base_path"
637+
],
638+
"title": "LocalFsConnectionExtraSchema",
639+
"type": "object"
640+
},
641+
"LocalFsConnectionSchema": {
642+
"description": "Local filesystem connection.\n\nDescribes access to data stored on the same machine or mounted volumes.\n\nAttributes:\n conn_type: Constant discriminator for this connection type (``\"fs\"``).\n extra: Local filesystem\u2013specific configuration (e.g., base path).",
643+
"properties": {
644+
"host": {
645+
"anyOf": [
646+
{
647+
"type": "string"
648+
},
649+
{
650+
"type": "null"
651+
}
652+
],
653+
"default": null,
654+
"description": "Network host or endpoint (e.g., domain name or IP). Leave unset if not applicable.",
655+
"title": "Host"
656+
},
657+
"schema": {
658+
"anyOf": [
659+
{
660+
"type": "string"
661+
},
662+
{
663+
"type": "null"
664+
}
665+
],
666+
"default": null,
667+
"description": "Logical schema, namespace, or protocol segment associated with the connection (e.g., database name, URI scheme).",
668+
"title": "Schema"
669+
},
670+
"login": {
671+
"anyOf": [
672+
{
673+
"type": "string"
674+
},
675+
{
676+
"type": "null"
677+
}
678+
],
679+
"default": null,
680+
"description": "Username or identity used for authentication.",
681+
"title": "Login"
682+
},
683+
"password": {
684+
"anyOf": [
685+
{
686+
"type": "string"
687+
},
688+
{
689+
"type": "null"
690+
}
691+
],
692+
"default": null,
693+
"description": "Secret or token used for authentication (keep secure).",
694+
"title": "Password"
695+
},
696+
"conn_type": {
697+
"const": "fs",
698+
"default": "fs",
699+
"description": "Connection type discriminator. Always \"fs\" for local filesystem.",
700+
"title": "Conn Type",
701+
"type": "string"
702+
},
703+
"extra": {
704+
"anyOf": [
705+
{
706+
"$ref": "#/$defs/LocalFsConnectionExtraSchema"
707+
},
708+
{
709+
"type": "null"
710+
}
711+
],
712+
"default": null,
713+
"description": "Type-specific options for local filesystem access."
714+
}
715+
},
716+
"title": "LocalFsConnectionSchema",
717+
"type": "object"
718+
},
483719
"ParquetEntry": {
484720
"description": "Catalog entry describing a Parquet dataset.\n\nAttributes:\n file_format: Literal string identifying the file format: `'parquet'`.\n load_options: Optional configuration controlling Parquet *reading*\n behavior (see :class:`ParquetLoadOptions`).\n save_options: Optional configuration controlling Parquet *writing*\n behavior (see :class:`ParquetSaveOptions`).",
485721
"properties": {
@@ -491,6 +727,12 @@
491727
{
492728
"additionalProperties": true,
493729
"type": "object"
730+
},
731+
{
732+
"$ref": "#/$defs/LocalFsConnectionSchema"
733+
},
734+
{
735+
"$ref": "#/$defs/GoogleCloudPlatformConnectionSchema"
494736
}
495737
],
496738
"description": "Airflow connection ID or dictionary representing a connection",

src/smallcat/catalog.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import yaml
1414
from pydantic import BaseModel, Field
1515

16-
from smallcat.connections import ConnectionProtocol
16+
from smallcat.connections import ConnectionProtocol, SupportedConnectionSchemas
1717
from smallcat.datasets.base_dataset import BaseDataset
1818
from smallcat.datasets.csv_dataset import CSVDataset, CSVLoadOptions, CSVSaveOptions
1919
from smallcat.datasets.delta_table_dataset import (
@@ -44,7 +44,7 @@ class EntryBase(BaseModel, ABC):
4444
the connection configuration (e.g., credentials, file path, etc.).
4545
"""
4646

47-
connection: str | dict = Field(
47+
connection: str | dict | SupportedConnectionSchemas = Field(
4848
...,
4949
description="Airflow connection ID or dictionary representing a connection",
5050
)
@@ -68,6 +68,8 @@ def get_connection(self) -> dict | ConnectionProtocol:
6868
except ImportError:
6969
from airflow.hooks.base import BaseHook # type: ignore[attr-defined,no-redef] # noqa: I001
7070
return BaseHook.get_connection(conn_id=self.connection)
71+
if isinstance(self.connection, SupportedConnectionSchemas):
72+
return self.connection.model_dump()
7173
return self.connection
7274

7375
@abstractmethod

0 commit comments

Comments
 (0)