Skip to content

Commit fca3494

Browse files
committed
Add to pandas example (#5)
1 parent b19d519 commit fca3494

File tree

3 files changed

+31
-13
lines changed

3 files changed

+31
-13
lines changed

Diff for: examples/to_pandas.py

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!python3
2+
import os
3+
import pyarrow as pa
4+
import chdb
5+
6+
# get current file dir
7+
current_dir = os.path.dirname(os.path.abspath(__file__))
8+
test_parquet = current_dir + "/../contrib/arrow/cpp/submodules/parquet-testing/data/alltypes_dictionary.parquet"
9+
10+
# run SQL on parquet file and return arrow format
11+
res = chdb.query(f"select * from file('{test_parquet}', Parquet)", "Arrow")
12+
print("\nresult from chdb:")
13+
print(res.get_memview().tobytes())
14+
15+
def to_arrowTable(res):
16+
# convert arrow format to arrow table
17+
paTable = pa.RecordBatchFileReader(res.get_memview()).read_all()
18+
return paTable
19+
20+
def to_df(res):
21+
# convert arrow format to arrow table
22+
paTable = to_arrowTable(res)
23+
# convert arrow table to pandas dataframe
24+
return paTable.to_pandas(use_threads=True)
25+
26+
print("\nresult from chdb to pyarrow:")
27+
print(to_arrowTable(res))
28+
29+
# convert arrow table to pandas dataframe
30+
print("\nresult from chdb to pandas:")
31+
print(to_df(res))

Diff for: pybind/libtest.py

-8
This file was deleted.

Diff for: pybind/readarrow.py

-5
This file was deleted.

0 commit comments

Comments
 (0)