diff --git a/.github/workflows/build_linux_arm64_wheels-gh.yml b/.github/workflows/build_linux_arm64_wheels-gh.yml index bdc8b7a5ca5..d4e8b7ca4b2 100644 --- a/.github/workflows/build_linux_arm64_wheels-gh.yml +++ b/.github/workflows/build_linux_arm64_wheels-gh.yml @@ -158,6 +158,11 @@ jobs: sudo rm -f dist/*linux_aarch64.whl ls -lh dist shell: bash + - name: Setup core dump collection + run: | + mkdir -p tmp/core + echo "tmp/core/core.%p" | sudo tee /proc/sys/kernel/core_pattern + ulimit -c unlimited - name: Test wheel on all Python versions run: | export PATH="$HOME/.pyenv/bin:$PATH" @@ -171,6 +176,25 @@ jobs: pyenv shell --unset done continue-on-error: false + - name: Check and upload core files if present + if: always() + run: | + if ls tmp/core/core.* >/dev/null 2>&1; then + echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV + tar -czvf core-files-linux-aarch64.tar.gz tmp/core/core.* + echo "Core files tar created: core-files-linux-aarch64.tar.gz" + ls -lh core-files-linux-aarch64.tar.gz + else + echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV + echo "No core files found in tmp/core" + fi + continue-on-error: true + - name: Upload core files if present + if: always() && env.CORE_FILES_FOUND == 'true' + uses: actions/upload-artifact@v4 + with: + name: core-files-linux-aarch64 + path: core-files-linux-aarch64.tar.gz - name: Upload wheels to release if: startsWith(github.ref, 'refs/tags/v') run: | diff --git a/.github/workflows/build_linux_x86_wheels.yml b/.github/workflows/build_linux_x86_wheels.yml index a3d799d3679..8a7ba35f8d2 100644 --- a/.github/workflows/build_linux_x86_wheels.yml +++ b/.github/workflows/build_linux_x86_wheels.yml @@ -157,6 +157,11 @@ jobs: sudo rm -f dist/*-linux_x86_64.whl ls -lh dist shell: bash + - name: Setup core dump collection + run: | + mkdir -p tmp/core + echo "tmp/core/core.%p" | sudo tee /proc/sys/kernel/core_pattern + ulimit -c unlimited - name: Test wheel on all Python versions run: | export PATH="$HOME/.pyenv/bin:$PATH" @@ -170,6 +175,25 @@ jobs: pyenv shell --unset done continue-on-error: false + - name: Check and upload core files if present + if: always() + run: | + if ls tmp/core/core.* >/dev/null 2>&1; then + echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV + tar -czvf core-files-linux-x86_64.tar.gz tmp/core/core.* + echo "Core files tar created: core-files-linux-x86_64.tar.gz" + ls -lh core-files-linux-x86_64.tar.gz + else + echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV + echo "No core files found in tmp/core" + fi + continue-on-error: true + - name: Upload core files artifact + if: always() && env.CORE_FILES_FOUND == 'true' + uses: actions/upload-artifact@v4 + with: + name: core-files-linux-x86_64 + path: core-files-linux-x86_64.tar.gz - name: Upload wheels to release if: startsWith(github.ref, 'refs/tags/v') run: | diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml index f1cfbcb5222..13e70935898 100644 --- a/.github/workflows/build_macos_arm64_wheels.yml +++ b/.github/workflows/build_macos_arm64_wheels.yml @@ -157,6 +157,12 @@ jobs: eval "$(pyenv init -)" pyenv shell 3.8 python -m wheel tags --platform-tag=macosx_11_0_arm64 --remove dist/*.whl + - name: Setup core dump + run: | + mkdir -p tmp/core + sudo sysctl kern.corefile=$PWD/tmp/core/core.%P + sudo sysctl kern.coredump=1 + ulimit -c unlimited - name: Test wheel on all Python versions run: | export PATH="$HOME/.pyenv/bin:$PATH" @@ -170,6 +176,25 @@ jobs: pyenv shell --unset done continue-on-error: false + - name: Check and upload core files if present + if: always() + run: | + if ls tmp/core/core.* >/dev/null 2>&1; then + echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV + tar -czvf core-files-macos-arm64.tar.gz tmp/core/core.* + echo "Core files tar created: core-files-macos-arm64.tar.gz" + ls -lh core-files-macos-arm64.tar.gz + else + echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV + echo "No core files found in tmp/core" + fi + continue-on-error: true + - name: Upload core files artifact + if: always() && env.CORE_FILES_FOUND == 'true' + uses: actions/upload-artifact@v4 + with: + name: core-files-macos-arm64 + path: core-files-macos-arm64.tar.gz - name: Show files run: ls -lh dist shell: bash diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml index d1376a44228..bda715ea6a3 100644 --- a/.github/workflows/build_macos_x86_wheels.yml +++ b/.github/workflows/build_macos_x86_wheels.yml @@ -157,6 +157,12 @@ jobs: eval "$(pyenv init -)" pyenv shell 3.8 python -m wheel tags --platform-tag=macosx_10_15_x86_64 --remove dist/*.whl + - name: Setup core dump collection + run: | + mkdir -p tmp/core + sudo sysctl kern.corefile=$PWD/tmp/core/core.%P + sudo sysctl kern.coredump=1 + ulimit -c unlimited - name: Test wheel on all Python versions run: | export PATH="$HOME/.pyenv/bin:$PATH" @@ -170,6 +176,25 @@ jobs: pyenv shell --unset done continue-on-error: false + - name: Check and upload core files if present + if: always() + run: | + if ls tmp/core/core.* >/dev/null 2>&1; then + echo "CORE_FILES_FOUND=true" >> $GITHUB_ENV + tar -czvf core-files-macos-x86_64.tar.gz tmp/core/core.* + echo "Core files tar created: core-files-macos-x86_64.tar.gz" + ls -lh core-files-macos-x86_64.tar.gz + else + echo "CORE_FILES_FOUND=false" >> $GITHUB_ENV + echo "No core files found in tmp/core" + fi + continue-on-error: true + - name: Upload core files artifact + if: always() && env.CORE_FILES_FOUND == 'true' + uses: actions/upload-artifact@v4 + with: + name: core-files-macos-x86_64 + path: core-files-macos-x86_64.tar.gz - name: Show files run: ls -lh dist shell: bash diff --git a/chdb/dbapi/cursors.py b/chdb/dbapi/cursors.py index c2b464ac247..cf54e87eaeb 100644 --- a/chdb/dbapi/cursors.py +++ b/chdb/dbapi/cursors.py @@ -6,7 +6,7 @@ # You can use it to load large dataset. RE_INSERT_VALUES = re.compile( r"\s*((?:INSERT|REPLACE)\b.+\bVALUES?\s*)" - + r"(\(\s*(?:%s|%\(.+\)s)\s*(?:,\s*(?:%s|%\(.+\)s)\s*)*\))" + + r"(\(\s*(?:%s|%\(.+\)s|\?)\s*(?:,\s*(?:%s|%\(.+\)s|\?)\s*)*\))" + r"(\s*(?:ON DUPLICATE.*)?);?\s*\Z", re.IGNORECASE | re.DOTALL, ) @@ -99,6 +99,49 @@ def _escape_args(self, args, conn): # Worst case it will throw a Value error return conn.escape(args) + def _format_query(self, query, args, conn): + """Format query with arguments supporting ? and % placeholders.""" + if args is None or ('?' not in query and '%' not in query): + return query + + escaped_args = self._escape_args(args, conn) + if not isinstance(escaped_args, (tuple, list)): + escaped_args = (escaped_args,) + + result = [] + arg_index = 0 + max_args = len(escaped_args) + i = 0 + query_len = len(query) + in_string = False + quote_char = None + + while i < query_len: + char = query[i] + if not in_string: + if char in ("'", '"'): + in_string = True + quote_char = char + elif arg_index < max_args: + if char == '?': + result.append(str(escaped_args[arg_index])) + arg_index += 1 + i += 1 + continue + elif char == '%' and i + 1 < query_len and query[i + 1] == 's': + result.append(str(escaped_args[arg_index])) + arg_index += 1 + i += 2 + continue + elif char == quote_char and (i == 0 or query[i - 1] != '\\'): + in_string = False + quote_char = None + + result.append(char) + i += 1 + + return ''.join(result) + def mogrify(self, query, args=None): """ Returns the exact string that is sent to the database by calling the @@ -107,11 +150,7 @@ def mogrify(self, query, args=None): This method follows the extension to the DB API 2.0 followed by Psycopg. """ conn = self._get_db() - - if args is not None: - query = query % self._escape_args(args, conn) - - return query + return self._format_query(query, args, conn) def execute(self, query, args=None): """Execute a query @@ -124,12 +163,11 @@ def execute(self, query, args=None): :return: Number of affected rows :rtype: int - If args is a list or tuple, %s can be used as a placeholder in the query. + If args is a list or tuple, ? can be used as a placeholder in the query. If args is a dict, %(name)s can be used as a placeholder in the query. + Also supports %s placeholder for backward compatibility. """ - if args is not None: - query = query % self._escape_args(args, self.connection) - + query = self._format_query(query, args, self.connection) self._cursor.execute(query) # Get description from column names and types @@ -183,32 +221,98 @@ def executemany(self, query, args): self.rowcount = sum(self.execute(query, arg) for arg in args) return self.rowcount + def _find_placeholder_positions(self, query): + positions = [] + i = 0 + query_len = len(query) + in_string = False + quote_char = None + + while i < query_len: + char = query[i] + if not in_string: + if char in ("'", '"'): + in_string = True + quote_char = char + elif char == '?': + positions.append((i, 1)) # (position, length) + elif char == '%' and i + 1 < query_len and query[i + 1] == 's': + positions.append((i, 2)) + i += 1 + elif char == quote_char and (i == 0 or query[i - 1] != '\\'): + in_string = False + quote_char = None + i += 1 + + return positions + def _do_execute_many( self, prefix, values, postfix, args, max_stmt_length, encoding ): conn = self._get_db() - escape = self._escape_args if isinstance(prefix, str): prefix = prefix.encode(encoding) if isinstance(postfix, str): postfix = postfix.encode(encoding) + + # Pre-compute placeholder positions + placeholder_positions = self._find_placeholder_positions(values) + sql = prefix args = iter(args) - v = values % escape(next(args), conn) + + if not placeholder_positions: + values_bytes = values.encode(encoding, "surrogateescape") if isinstance(values, str) else values + sql += values_bytes + rows = 0 + for _ in args: + if len(sql) + len(values_bytes) + len(postfix) + 2 > max_stmt_length: + rows += self.execute(sql + postfix) + sql = prefix + values_bytes + else: + sql += ",".encode(encoding) + sql += values_bytes + rows += self.execute(sql + postfix) + self.rowcount = rows + return rows + + template_parts = [] + last_pos = 0 + for pos, length in placeholder_positions: + template_parts.append(values[last_pos:pos]) + last_pos = pos + length + template_parts.append(values[last_pos:]) + + def format_values_fast(escaped_arg): + if len(escaped_arg) != len(placeholder_positions): + return values + result = template_parts[0] + for i, val in enumerate(escaped_arg): + result += str(val) + template_parts[i + 1] + return result + + def format_values_with_positions(arg): + escaped_arg = self._escape_args(arg, conn) + if not isinstance(escaped_arg, (tuple, list)): + escaped_arg = (escaped_arg,) + return format_values_fast(escaped_arg) + + v = format_values_with_positions(next(args)) if isinstance(v, str): v = v.encode(encoding, "surrogateescape") sql += v rows = 0 + for arg in args: - v = values % escape(arg, conn) + v = format_values_with_positions(arg) if isinstance(v, str): v = v.encode(encoding, "surrogateescape") - if len(sql) + len(v) + len(postfix) + 1 > max_stmt_length: + if len(sql) + len(v) + len(postfix) + 2 > max_stmt_length: # +2 for comma rows += self.execute(sql + postfix) - sql = prefix + sql = prefix + v else: sql += ",".encode(encoding) - sql += v + sql += v rows += self.execute(sql + postfix) self.rowcount = rows return rows diff --git a/tests/test_dbapi_persistence.py b/tests/test_dbapi_persistence.py index 5cb3d270cbb..4dc70ae336f 100644 --- a/tests/test_dbapi_persistence.py +++ b/tests/test_dbapi_persistence.py @@ -35,6 +35,57 @@ def test_persistence(self): row = cur2.fetchone() self.assertEqual(("he", 32), row) + def test_placeholder1(self): + conn = dbapi.connect(path=test_state_dir) + cur = conn.cursor() + + cur.execute("CREATE DATABASE test ENGINE = Atomic;") + cur.execute( + "CREATE TABLE test.users (id UInt64, name String, age UInt32) " + "ENGINE = MergeTree ORDER BY id;" + ) + + cur.execute("INSERT INTO test.users (id, name, age) VALUES (?, ?, ?)", + (1, 'Alice', 25)) + + cur.execute("SELECT name, age FROM test.users WHERE id = ? AND age > ?", + (1, 20)) + row = cur.fetchone() + self.assertEqual(("Alice", 25), row) + + data = [(2, 'Bob', 30), (3, 'Charlie', 35), (4, 'David', 28)] + cur.executemany("INSERT INTO test.users (id, name, age) VALUES (?, ?, ?)", + data) + + cur.execute("SELECT COUNT(*) FROM test.users WHERE id > 1") + count = cur.fetchone()[0] + self.assertEqual(3, count) + cur.execute("SELECT name FROM test.users WHERE age = ? ORDER BY id", (30,)) + result = cur.fetchone() + self.assertEqual(("Bob",), result) + cur.close() + conn.close() + + def test_placeholder2(self): + conn = dbapi.connect(path=test_state_dir) + cur = conn.cursor() + + # Create table + cur.execute("CREATE DATABASE compat ENGINE = Atomic;") + cur.execute( + "CREATE TABLE compat.test (id UInt64, value String) " + "ENGINE = MergeTree ORDER BY id;" + ) + cur.execute("INSERT INTO compat.test (id, value) VALUES (%s, %s)", + (1, 'test_value')) + + cur.execute("SELECT value FROM compat.test") + result = cur.fetchone() + self.assertEqual(("test_value",), result) + + cur.close() + conn.close() + if __name__ == "__main__": unittest.main()