Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/workflows/build_linux_arm64_wheels-gh.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,11 @@ jobs:
sudo rm -f dist/*linux_aarch64.whl
ls -lh dist
shell: bash
- name: Setup core dump collection
run: |
mkdir -p tmp/core
echo "tmp/core/core.%p" | sudo tee /proc/sys/kernel/core_pattern
ulimit -c unlimited
- name: Test wheel on all Python versions
run: |
export PATH="$HOME/.pyenv/bin:$PATH"
Expand All @@ -171,6 +176,25 @@ jobs:
pyenv shell --unset
done
continue-on-error: false
- name: Check and upload core files if present
if: always()
run: |
if ls tmp/core/core.* >/dev/null 2>&1; then
echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV
tar -czvf core-files-linux-aarch64.tar.gz tmp/core/core.*
echo "Core files tar created: core-files-linux-aarch64.tar.gz"
ls -lh core-files-linux-aarch64.tar.gz
else
echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV
echo "No core files found in tmp/core"
fi
continue-on-error: true
- name: Upload core files if present
if: always() && env.CORE_FILES_FOUND == 'true'
uses: actions/upload-artifact@v4
with:
name: core-files-linux-aarch64
path: core-files-linux-aarch64.tar.gz
- name: Upload wheels to release
if: startsWith(github.ref, 'refs/tags/v')
run: |
Expand Down
24 changes: 24 additions & 0 deletions .github/workflows/build_linux_x86_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ jobs:
sudo rm -f dist/*-linux_x86_64.whl
ls -lh dist
shell: bash
- name: Setup core dump collection
run: |
mkdir -p tmp/core
echo "tmp/core/core.%p" | sudo tee /proc/sys/kernel/core_pattern
ulimit -c unlimited
- name: Test wheel on all Python versions
run: |
export PATH="$HOME/.pyenv/bin:$PATH"
Expand All @@ -170,6 +175,25 @@ jobs:
pyenv shell --unset
done
continue-on-error: false
- name: Check and upload core files if present
if: always()
run: |
if ls tmp/core/core.* >/dev/null 2>&1; then
echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV
tar -czvf core-files-linux-x86_64.tar.gz tmp/core/core.*
echo "Core files tar created: core-files-linux-x86_64.tar.gz"
ls -lh core-files-linux-x86_64.tar.gz
else
echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV
echo "No core files found in tmp/core"
fi
continue-on-error: true
- name: Upload core files artifact
if: always() && env.CORE_FILES_FOUND == 'true'
uses: actions/upload-artifact@v4
with:
name: core-files-linux-x86_64
path: core-files-linux-x86_64.tar.gz
- name: Upload wheels to release
if: startsWith(github.ref, 'refs/tags/v')
run: |
Expand Down
25 changes: 25 additions & 0 deletions .github/workflows/build_macos_arm64_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,12 @@ jobs:
eval "$(pyenv init -)"
pyenv shell 3.8
python -m wheel tags --platform-tag=macosx_11_0_arm64 --remove dist/*.whl
- name: Setup core dump
run: |
mkdir -p tmp/core
sudo sysctl kern.corefile=$PWD/tmp/core/core.%P
sudo sysctl kern.coredump=1
ulimit -c unlimited
- name: Test wheel on all Python versions
run: |
export PATH="$HOME/.pyenv/bin:$PATH"
Expand All @@ -170,6 +176,25 @@ jobs:
pyenv shell --unset
done
continue-on-error: false
- name: Check and upload core files if present
if: always()
run: |
if ls tmp/core/core.* >/dev/null 2>&1; then
echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV
tar -czvf core-files-macos-arm64.tar.gz tmp/core/core.*
echo "Core files tar created: core-files-macos-arm64.tar.gz"
ls -lh core-files-macos-arm64.tar.gz
else
echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV
echo "No core files found in tmp/core"
fi
continue-on-error: true
- name: Upload core files artifact
if: always() && env.CORE_FILES_FOUND == 'true'
uses: actions/upload-artifact@v4
with:
name: core-files-macos-arm64
path: core-files-macos-arm64.tar.gz
- name: Show files
run: ls -lh dist
shell: bash
Expand Down
25 changes: 25 additions & 0 deletions .github/workflows/build_macos_x86_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,12 @@ jobs:
eval "$(pyenv init -)"
pyenv shell 3.8
python -m wheel tags --platform-tag=macosx_10_15_x86_64 --remove dist/*.whl
- name: Setup core dump collection
run: |
mkdir -p tmp/core
sudo sysctl kern.corefile=$PWD/tmp/core/core.%P
sudo sysctl kern.coredump=1
ulimit -c unlimited
- name: Test wheel on all Python versions
run: |
export PATH="$HOME/.pyenv/bin:$PATH"
Expand All @@ -170,6 +176,25 @@ jobs:
pyenv shell --unset
done
continue-on-error: false
- name: Check and upload core files if present
if: always()
run: |
if ls tmp/core/core.* >/dev/null 2>&1; then
echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV
tar -czvf core-files-macos-x86_64.tar.gz tmp/core/core.*
echo "Core files tar created: core-files-macos-x86_64.tar.gz"
ls -lh core-files-macos-x86_64.tar.gz
else
echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV
echo "No core files found in tmp/core"
fi
continue-on-error: true
- name: Upload core files artifact
if: always() && env.CORE_FILES_FOUND == 'true'
uses: actions/upload-artifact@v4
with:
name: core-files-macos-x86_64
path: core-files-macos-x86_64.tar.gz
- name: Show files
run: ls -lh dist
shell: bash
Expand Down
136 changes: 120 additions & 16 deletions chdb/dbapi/cursors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# You can use it to load large dataset.
RE_INSERT_VALUES = re.compile(
r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)"
+ r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))"
+ r"(\(\s*(?:%s|%\(.+\)s|\?)\s*(?:,\s*(?:%s|%\(.+\)s|\?)\s*)*\))"
+ r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z",
re.IGNORECASE | re.DOTALL,
)
Expand Down Expand Up @@ -99,6 +99,49 @@ def _escape_args(self, args, conn):
# Worst case it will throw a Value error
return conn.escape(args)

def _format_query(self, query, args, conn):
"""Format query with arguments supporting ? and % placeholders."""
if args is None or ('?' not in query and '%' not in query):
return query

escaped_args = self._escape_args(args, conn)
if not isinstance(escaped_args, (tuple, list)):
escaped_args = (escaped_args,)

result = []
arg_index = 0
max_args = len(escaped_args)
i = 0
query_len = len(query)
in_string = False
quote_char = None

while i < query_len:
char = query[i]
if not in_string:
if char in ("'", '"'):
in_string = True
quote_char = char
elif arg_index < max_args:
if char == '?':
result.append(str(escaped_args[arg_index]))
arg_index += 1
i += 1
continue
elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
result.append(str(escaped_args[arg_index]))
arg_index += 1
i += 2
continue
elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
in_string = False
quote_char = None

result.append(char)
i += 1

return ''.join(result)

def mogrify(self, query, args=None):
"""
Returns the exact string that is sent to the database by calling the
Expand All @@ -107,11 +150,7 @@ def mogrify(self, query, args=None):
This method follows the extension to the DB API 2.0 followed by Psycopg.
"""
conn = self._get_db()

if args is not None:
query = query % self._escape_args(args, conn)

return query
return self._format_query(query, args, conn)

def execute(self, query, args=None):
"""Execute a query
Expand All @@ -124,12 +163,11 @@ def execute(self, query, args=None):
:return: Number of affected rows
:rtype: int

If args is a list or tuple, %s can be used as a placeholder in the query.
If args is a list or tuple, ? can be used as a placeholder in the query.
If args is a dict, %(name)s can be used as a placeholder in the query.
Also supports %s placeholder for backward compatibility.
"""
if args is not None:
query = query % self._escape_args(args, self.connection)

query = self._format_query(query, args, self.connection)
self._cursor.execute(query)

# Get description from column names and types
Expand Down Expand Up @@ -183,32 +221,98 @@ def executemany(self, query, args):
self.rowcount = sum(self.execute(query, arg) for arg in args)
return self.rowcount

def _find_placeholder_positions(self, query):
positions = []
i = 0
query_len = len(query)
in_string = False
quote_char = None

while i < query_len:
char = query[i]
if not in_string:
if char in ("'", '"'):
in_string = True
quote_char = char
elif char == '?':
positions.append((i, 1)) # (position, length)
elif char == '%' and i + 1 < query_len and query[i + 1] == 's':
positions.append((i, 2))
i += 1
elif char == quote_char and (i == 0 or query[i - 1] != '\\'):
in_string = False
quote_char = None
i += 1

return positions

def _do_execute_many(
self, prefix, values, postfix, args, max_stmt_length, encoding
):
conn = self._get_db()
escape = self._escape_args
if isinstance(prefix, str):
prefix = prefix.encode(encoding)
if isinstance(postfix, str):
postfix = postfix.encode(encoding)

# Pre-compute placeholder positions
placeholder_positions = self._find_placeholder_positions(values)

sql = prefix
args = iter(args)
v = values % escape(next(args), conn)

if not placeholder_positions:
values_bytes = values.encode(encoding, "surrogateescape") if isinstance(values, str) else values
sql += values_bytes
rows = 0
for _ in args:
if len(sql) + len(values_bytes) + len(postfix) + 2 > max_stmt_length:
rows += self.execute(sql + postfix)
sql = prefix + values_bytes
else:
sql += ",".encode(encoding)
sql += values_bytes
rows += self.execute(sql + postfix)
self.rowcount = rows
return rows

template_parts = []
last_pos = 0
for pos, length in placeholder_positions:
template_parts.append(values[last_pos:pos])
last_pos = pos + length
template_parts.append(values[last_pos:])

def format_values_fast(escaped_arg):
if len(escaped_arg) != len(placeholder_positions):
return values
result = template_parts[0]
for i, val in enumerate(escaped_arg):
result += str(val) + template_parts[i + 1]
return result

def format_values_with_positions(arg):
escaped_arg = self._escape_args(arg, conn)
if not isinstance(escaped_arg, (tuple, list)):
escaped_arg = (escaped_arg,)
return format_values_fast(escaped_arg)

v = format_values_with_positions(next(args))
if isinstance(v, str):
v = v.encode(encoding, "surrogateescape")
sql += v
rows = 0

for arg in args:
v = values % escape(arg, conn)
v = format_values_with_positions(arg)
if isinstance(v, str):
v = v.encode(encoding, "surrogateescape")
if len(sql) + len(v) + len(postfix) + 1 > max_stmt_length:
if len(sql) + len(v) + len(postfix) + 2 > max_stmt_length: # +2 for comma
rows += self.execute(sql + postfix)
sql = prefix
sql = prefix + v
else:
sql += ",".encode(encoding)
sql += v
sql += v
rows += self.execute(sql + postfix)
self.rowcount = rows
return rows
Expand Down
Loading
Loading