Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bmsdna/lakeapi/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class BasicConfig:
token_retrieval_func: "Optional[Callable[[SourceUri, str], TokenCredential]]"
should_hide_col_name: Callable[[str], bool]
default_copy_local: bool = False
max_route_init_time: int = 200 # seconds


def _should_hide_colname(name: str):
Expand All @@ -79,6 +80,7 @@ def get_default_config():
schema_cache_ttl=5 * 60, # 5 minutes
token_retrieval_func=None,
should_hide_col_name=_should_hide_colname,
max_route_init_time=200,
)


Expand Down
110 changes: 60 additions & 50 deletions bmsdna/lakeapi/core/route.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from time import time
from typing import Literal, Tuple, cast

from fastapi import APIRouter
Expand All @@ -24,6 +25,7 @@ def init_routes(configs: Configs, basic_config: BasicConfig):
all_lake_api_routers.append((basic_config, configs))
router = APIRouter()
metadata = []
now_time = time()
with ExecutionContextManager(
basic_config.default_engine,
basic_config.default_chunk_size,
Expand All @@ -34,59 +36,67 @@ def init_routes(configs: Configs, basic_config: BasicConfig):
if isinstance(config.api_method, str)
else config.api_method
)
try:
from bmsdna.lakeapi.core.datasource import Datasource

assert config.datasource is not None
with Datasource(
config.version_str,
config.tag,
config.name,
config=config.datasource,
sql_context=mgr.get_context(config.engine),
basic_config=basic_config,
accounts=configs.accounts,
) as realdataframe:
try:
schema = get_schema_cached(
basic_config,
realdataframe,
config.datasource.get_unique_hash(),
)
if schema is None:
logger.warning(
f"Could not get response type for {config.route}. Path does not exist:{realdataframe}"
)
except Exception as err:
logger.warning(
f"Could not get schema for {config.route}. Error:{err}",
exc_info=err,
)
schema = None

metadata.append(
{
"name": config.name,
"tag": config.tag,
"route": config.route,
"methods": methods,
"file_type": config.datasource.file_type,
"uri": config.datasource.uri,
"version": config.version,
"schema": {n: str(schema.field(n).type) for n in schema.names}
if schema
else None,
}
)

except Exception as err:
import traceback

print(traceback.format_exc())
if time() - now_time > basic_config.max_route_init_time:
logger.warning(
f"Could not get response type for f{config.route}. Error:{err}"
f"Route initialization time exceeded {basic_config.max_route_init_time}s. Stopping further route initialization."
)
schema = None
else:
try:
from bmsdna.lakeapi.core.datasource import Datasource

assert config.datasource is not None
with Datasource(
config.version_str,
config.tag,
config.name,
config=config.datasource,
sql_context=mgr.get_context(config.engine),
basic_config=basic_config,
accounts=configs.accounts,
) as realdataframe:
try:
schema = get_schema_cached(
basic_config,
realdataframe,
config.datasource.get_unique_hash(),
)
if schema is None:
logger.warning(
f"Could not get response type for {config.route}. Path does not exist:{realdataframe}"
)
except Exception as err:
logger.warning(
f"Could not get schema for {config.route}. Error:{err}",
exc_info=err,
)
schema = None

metadata.append(
{
"name": config.name,
"tag": config.tag,
"route": config.route,
"methods": methods,
"file_type": config.datasource.file_type,
"uri": config.datasource.uri,
"version": config.version,
"schema": {
n: str(schema.field(n).type) for n in schema.names
}
if schema
else None,
}
)

except Exception as err:
import traceback

print(traceback.format_exc())
logger.warning(
f"Could not get response type for f{config.route}. Error:{err}"
)
schema = None

response_model = (
get_response_model(
Expand Down
8 changes: 5 additions & 3 deletions bmsdna/lakeapi/utils/meta_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
DeltaTableMeta,
duckdb_apply_storage_options,
)
from deltalake2db.duckdb import apply_storage_options_fsspec
from deltalake2db.azure_helper import get_account_name_from_path
from typing import Optional


Expand All @@ -18,11 +16,15 @@
_global_duck_con: Optional[duckdb.DuckDBPyConnection] = None
_global_duck_meta_engine: Optional[DuckDBMetaEngine] = None

DELTA_META_ENGINE = os.getenv("DELTA_META_ENGINE")


def get_deltalake_meta(use_polars: bool, uri: SourceUri):
global _global_duck_con
global _global_duck_meta_engine
if use_polars:
if (
use_polars and not DELTA_META_ENGINE == "duckdb"
) or DELTA_META_ENGINE == "polars":
ab_uri, ab_opts = uri.get_uri_options(flavor="object_store")

meta_engine = PolarsMetaEngine()
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "bmsdna-lakeapi"
version = "0.27.3"
version = "0.27.4"
description = ""
authors = [{ name = "DWH Team", email = "[email protected]" }]
dependencies = [
Expand Down
5 changes: 5 additions & 0 deletions tests/test_app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from fastapi.testclient import TestClient
import sys
import polars as pl
Expand Down Expand Up @@ -468,6 +469,8 @@ def test_fake_arrow(client: TestClient):


def test_all_metadata(client: TestClient):
no_sql = os.getenv("NO_SQL_SERVER", "0") == "1"

response = client.get("/metadata")
assert response.status_code == 200
jsd = response.json()
Expand All @@ -476,6 +479,8 @@ def test_all_metadata(client: TestClient):
name = item["name"]
tag = item["tag"]
route = item["route"]
if tag == "mssql" and no_sql:
continue
meta_detail_route = route + f"/metadata_detail?%24engine={e}"
print(meta_detail_route)
response = client.get(meta_detail_route)
Expand Down
8 changes: 8 additions & 0 deletions tests/test_mssql.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
from fastapi.testclient import TestClient
import pytest
import os

no_sql = os.getenv("NO_SQL_SERVER", "0") == "1"


@pytest.mark.skipif(no_sql, reason="No SQL Server available")
def test_simple_department(client: TestClient):
response = client.get(
"/api/v1/mssql/mssql_department?format=json&limit=50",
Expand All @@ -10,6 +15,7 @@ def test_simple_department(client: TestClient):
assert len(departments) == 16


@pytest.mark.skipif(no_sql, reason="No SQL Server available")
def test_filter_group_name(client: TestClient):
response = client.get(
"/api/v1/mssql/mssql_department?format=json&limit=100&GroupName=Research%20and%20Development",
Expand All @@ -19,6 +25,7 @@ def test_filter_group_name(client: TestClient):
assert len(tables) == 3


@pytest.mark.skipif(no_sql, reason="No SQL Server available")
def test_filter_offset(client: TestClient):
response = client.get(
"/api/v1/mssql/mssql_department?format=json&limit=100&offset=10",
Expand All @@ -28,6 +35,7 @@ def test_filter_offset(client: TestClient):
assert len(tables) == 6


@pytest.mark.skipif(no_sql, reason="No SQL Server available")
def test_metadata_detail(client: TestClient):
response = client.get(
"/api/v1/mssql/mssql_department/metadata_detail",
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading