Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion bmsdna/lakeapi/context/df_duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from bmsdna.lakeapi.core.log import get_logger
import multiprocessing
from .source_uri import SourceUri
import csv


logger = get_logger(__name__)
Expand Down Expand Up @@ -154,7 +155,19 @@ async def write_nd_json(self, file_name: str):

async def write_csv(self, file_name: str, *, separator: str):
if not ENABLE_COPY_TO:
return await super().write_csv(file_name, separator=separator)
query = get_sql(self.original_sql, dialect="duckdb")
await run_in_threadpool(self.con.execute, query)
assert self.con.description is not None
col_names = [d[0] for d in self.con.description]
with open(file_name, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.DictWriter(
csvfile, fieldnames=col_names, delimiter=separator
)
writer.writeheader()
while chunk := self.con.fetchmany(self.chunk_size):
for row in chunk:
writer.writerow(dict(zip(col_names, row)))
return
query = get_sql(self.original_sql, dialect="duckdb")
uuidstr = _get_temp_table_name()
full_query = f"""CREATE TEMP VIEW {uuidstr} AS {query};
Expand Down
24 changes: 18 additions & 6 deletions bmsdna/lakeapi/utils/meta_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
DeltaTableMeta,
duckdb_apply_storage_options,
)
from deltalake2db.duckdb import apply_storage_options_fsspec
from deltalake2db.azure_helper import get_account_name_from_path
from typing import Optional


Expand All @@ -28,12 +30,22 @@ def get_deltalake_meta(use_polars: bool, uri: SourceUri):
ab_uri, ab_opts = uri.get_uri_options(flavor="original")

if not uri.is_local():
duckdb_apply_storage_options(
_global_duck_con,
ab_uri,
ab_opts,
use_fsspec=os.getenv("DUCKDB_DELTA_USE_FSSPEC", "0") == "1",
)
if os.getenv("DUCKDB_DELTA_USE_FSSPEC", "0") == "1" and "://" in ab_uri:
account_name_path = get_account_name_from_path(ab_uri)
fake_protocol = apply_storage_options_fsspec(
_global_duck_con,
ab_uri,
ab_opts or {},
account_name_path=account_name_path,
)
ab_uri = fake_protocol + "://" + ab_uri.split("://")[1]
else:
duckdb_apply_storage_options(
_global_duck_con,
ab_uri,
ab_opts,
use_fsspec=os.getenv("DUCKDB_DELTA_USE_FSSPEC", "0") == "1",
)
meta_engine = DuckDBMetaEngine(_global_duck_con)

if mt := _cached_meta.get(uri):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "bmsdna-lakeapi"
version = "0.27.0"
version = "0.27.1"
description = ""
authors = [{ name = "DWH Team", email = "[email protected]" }]
dependencies = [
Expand Down
8 changes: 4 additions & 4 deletions tests/test_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def call_api_1(engine, format):

duration = end - start
print(f"Engine {engine} took {duration} seconds with format {format}")
assert duration < 1.0
assert duration < 1.5

def call_api_2(engine, format):
start = time.time()
Expand All @@ -37,7 +37,7 @@ def call_api_2(engine, format):

duration = end - start
print(f"Engine {engine} took {duration} seconds with format {format}")
assert duration < 1.0
assert duration < 1.5

def call_api_3(engine, format):
start = time.time()
Expand All @@ -49,7 +49,7 @@ def call_api_3(engine, format):

duration = end - start
print(f"Engine {engine} took {duration} seconds with format {format}")
assert duration < 1.0
assert duration < 1.5

def call_api_4(engine, format):
start = time.time()
Expand All @@ -61,7 +61,7 @@ def call_api_4(engine, format):

duration = end - start
print(f"Engine {engine} took {duration} seconds with format {format}")
assert duration < 1.0
assert duration < 1.5

tasks = []
for _ in range(100):
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading