Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion exaudfclient/.bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ build:valgrind --copt -O1
build:valgrind -c dbg
build:valgrind --copt -g
build:valgrind --strip=never
build:valgrind --copt -DVALGRIND_ACTIVE
build:valgrind --copt -DVALGRIND_ACTIVE
build:swig-string-as-buffer --copt='-DSWIG_STRING_AS_BYTES_ENABLED=1' --action_env=SWIG_STRING_AS_BYTES_ENABLED=1
3 changes: 2 additions & 1 deletion exaudfclient/base/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
package(default_visibility = ["//visibility:public"])
exports_files(["filter_swig_code.py", "build_integrated.py",
"create_binary_wrapper.sh", "create_binary_wrapper_valgrind.sh",
"create_binary_wrapper_valgrind_massif.sh", "exaudfclient.template.sh"])
"create_binary_wrapper_valgrind_massif.sh", "exaudfclient.template.sh",
"add_encoding_decoding.py"])

load("//:variables.bzl", "VM_ENABLED_DEFINES")

Expand Down
29 changes: 29 additions & 0 deletions exaudfclient/base/add_encoding_decoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os
import sys
from pathlib import Path

CONVERT_TO_BYTES = """
decodeUTF8 = lambda x: x.decode(encoding='utf-8') if isinstance(x, bytes) else x
encodeUTF8 = lambda x: x.encode(encoding='utf-8') if isinstance(x, str) else x
"""

IDENTITY = """
decodeUTF8 = lambda x: x
encodeUTF8 = lambda x: x
"""

def add_encoding_decoding(target):
if "SWIG_STRING_AS_BYTES_ENABLED" in os.environ:
decoding_encdecoding = CONVERT_TO_BYTES
else:
decoding_encdecoding = IDENTITY

with open(target, "wt", encoding="utf-8") as f:
f.write(decoding_encdecoding)


if __name__ == "__main__":
if len(sys.argv) < 2:
print('Usage: add_encoding_decoding.py target')
sys.exit(1)
add_encoding_decoding(sys.argv[1])
1 change: 1 addition & 0 deletions exaudfclient/base/exaudflib/swig/swig_common.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef SWIG_COMMON_H
#define SWIG_COMMON_H

#include <cstdint>
#include <vector>
#include <string>
#include <cstdint>
Expand Down
12 changes: 5 additions & 7 deletions exaudfclient/base/python/exascript_python_preset_core.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
import sys
unicode = str
decodeUTF8 = lambda x: x
encodeUTF8 = lambda x: x

from exascript_python import *
import decimal
import datetime
import imp
import types

class ExaUDFError(Exception):
pass

def clean_stacktrace_line(line):
import re
match = re.match("""^\s+File "(.+)", line ([0-9]+), in (.+)$""",line)
match = re.match(r"""^\s+File "(.+)", line ([0-9]+), in (.+)$""",line)
if match is not None:
filename=match.group(1)
lineno=match.group(2)
Expand Down Expand Up @@ -83,14 +81,14 @@ def ci(x, tbl):
colprec = self.__meta.inputColumnPrecision(x)
colscale = self.__meta.inputColumnScale(x)
colsize = self.__meta.inputColumnSize(x)
coltn = self.__meta.inputColumnTypeName(x)
coltn = decodeUTF8(self.__meta.inputColumnTypeName(x))
elif tbl == 'output':
colname = decodeUTF8(self.__meta.outputColumnName(x))
coltype = self.__meta.outputColumnType(x)
colprec = self.__meta.outputColumnPrecision(x)
colscale = self.__meta.outputColumnScale(x)
colsize = self.__meta.outputColumnSize(x)
coltn = self.__meta.outputColumnTypeName(x)
coltn = decodeUTF8(self.__meta.outputColumnTypeName(x))
class exacolumn:
def __init__(self, cn, ct, st, cp, cs, l):
self.name = cn
Expand Down Expand Up @@ -126,7 +124,7 @@ def import_script(self, script):
modobj = self.__modules[str(code)]
else:
print("%%% new code", modname, repr(code), code in self.__modules)
modobj = imp.new_module(modname)
modobj = types.ModuleType(modname)
modobj.__file__ = "<%s>" % modname
modobj.__dict__['exa'] = self
self.__modules[str(code)] = modobj
Expand Down
22 changes: 10 additions & 12 deletions exaudfclient/base/python/exascript_python_wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
pyextdataframe_pkg = None

unicode = str
decodeUTF8 = lambda x: x
encodeUTF8 = lambda x: x
long = int

if 'LIBPYEXADATAFRAME_DIR' in os.environ:
Expand Down Expand Up @@ -45,10 +43,10 @@ def resget():
return val
return resget
def convert_date(x):
val = datetime.datetime.strptime(x, "%Y-%m-%d")
val = datetime.datetime.strptime(decodeUTF8(x), "%Y-%m-%d")
return datetime.date(val.year, val.month, val.day)
def convert_timestamp(x):
return datetime.datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f")
return datetime.datetime.strptime(decodeUTF8(x), "%Y-%m-%d %H:%M:%S.%f")
self.__incoltypes = []
for col in range(self.__meta.inputColumnCount()):
self.__incoltypes.append(self.__meta.inputColumnType(col))
Expand All @@ -67,8 +65,8 @@ def convert_timestamp(x):
data[colname] = rd(inp.getInt64, inp.wasNull, col)
elif self.__incoltypes[col] == NUMERIC:
if self.__meta.inputColumnScale(col) == 0:
data[colname] = rd(inp.getNumeric, inp.wasNull, col, lambda x: int(str(x)))
else: data[colname] = rd(inp.getNumeric, inp.wasNull, col, lambda x: decimal.Decimal(str(x)))
data[colname] = rd(inp.getNumeric, inp.wasNull, col, lambda x: int(decodeUTF8(x)))
else: data[colname] = rd(inp.getNumeric, inp.wasNull, col, lambda x: decimal.Decimal(decodeUTF8(x)))
elif self.__incoltypes[col] == DATE:
data[colname] = rd(inp.getDate, inp.wasNull, col, convert_date)
elif self.__incoltypes[col] == TIMESTAMP:
Expand Down Expand Up @@ -138,7 +136,7 @@ def emit(self, *output):
elif type(v) in (int, long):
if self.__outcoltypes[k] == INT32: self.__out.setInt32(k, int(v))
elif self.__outcoltypes[k] == INT64: self.__out.setInt64(k, int(v))
elif self.__outcoltypes[k] == NUMERIC: self.__out.setNumeric(k, str(int(v)))
elif self.__outcoltypes[k] == NUMERIC: self.__out.setNumeric(k, encodeUTF8(str(int(v))))
elif self.__outcoltypes[k] == DOUBLE: self.__out.setDouble(k, float(v))
else:
raise RuntimeError(u"E-UDF-CL-SL-PYTHON-1091: emit column '%s' is of type %s but data given have type %s" \
Expand All @@ -147,7 +145,7 @@ def emit(self, *output):
if self.__outcoltypes[k] == DOUBLE: self.__out.setDouble(k, float(v))
elif self.__outcoltypes[k] == INT32: self.__out.setInt32(k, int(v))
elif self.__outcoltypes[k] == INT64: self.__out.setInt64(k, int(v))
elif self.__outcoltypes[k] == NUMERIC: self.__out.setInt64(k, str(v))
elif self.__outcoltypes[k] == NUMERIC: self.__out.setInt64(k, encodeUTF8(str(v)))
else:
raise RuntimeError(u"E-UDF-CL-SL-PYTHON-1092: emit column '%s' is of type %s but data given have type %s" \
% (decodeUTF8(self.__meta.outputColumnName(k)), type_names.get(self.__outcoltypes[k], 'UNKONWN'), str(type(v))))
Expand All @@ -156,15 +154,15 @@ def emit(self, *output):
raise RuntimeError(u"E-UDF-CL-SL-PYTHON-1093: emit column '%s' is of type %s but data given have type %s" \
% (decodeUTF8(self.__meta.outputColumnName(k)), type_names.get(self.__outcoltypes[k], 'UNKONWN'), str(type(v))))
self.__out.setBoolean(k, bool(v))
elif type(v) in (str, unicode):
elif type(v) in (str, unicode, bytes):
v = encodeUTF8(v)
vl = len(v)
if self.__outcoltypes[k] != STRING:
raise RuntimeError(u"E-UDF-CL-SL-PYTHON-1094: emit column '%s' is of type %s but data given have type %s" \
% (decodeUTF8(self.__meta.outputColumnName(k)), type_names.get(self.__outcoltypes[k], 'UNKONWN'), str(type(v))))
self.__out.setString(k, v, vl)
elif type(v) == decimal.Decimal:
if self.__outcoltypes[k] == NUMERIC: self.__out.setNumeric(k, str(v))
if self.__outcoltypes[k] == NUMERIC: self.__out.setNumeric(k, encodeUTF8(str(v)))
elif self.__outcoltypes[k] == INT32: self.__out.setInt32(k, int(v))
elif self.__outcoltypes[k] == INT64: self.__out.setInt64(k, int(v))
elif self.__outcoltypes[k] == DOUBLE: self.__out.setDouble(k, float(v))
Expand All @@ -175,12 +173,12 @@ def emit(self, *output):
if self.__outcoltypes[k] != DATE:
raise RuntimeError("E-UDF-CL-SL-PYTHON-1096: emit column '%s' is of type %s but data given have type %s" \
% (decodeUTF8(self.__meta.outputColumnName(k)), type_names.get(self.__outcoltypes[k], 'UNKONWN'), str(type(v))))
self.__out.setDate(k, v.isoformat())
self.__out.setDate(k, encodeUTF8(v.isoformat()))
elif type(v) == datetime.datetime:
if self.__outcoltypes[k] != TIMESTAMP:
raise RuntimeError("E-UDF-CL-SL-PYTHON-1097: emit column '%s' is of type %s but data given have type %s" \
% (decodeUTF8(self.__meta.outputColumnName(k)), type_names.get(self.__outcoltypes[k], 'UNKONWN'), str(type(v))))
self.__out.setTimestamp(k, v.isoformat(' '))
self.__out.setTimestamp(k, encodeUTF8(v.isoformat(' ')))
else: raise RuntimeError("E-UDF-CL-SL-PYTHON-1098: data type %s is not supported" % str(type(v)))
msg = self.__out.checkException()
if msg: raise RuntimeError("F-UDF-CL-SL-PYTHON-1099: "+msg)
Expand Down
15 changes: 12 additions & 3 deletions exaudfclient/base/python/python3/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ genrule(
swig -v $$INCLUDES -O -DEXTERNAL_PROCESS -Wall -c++ -python -py3 -addextern -module exascript_python -o "../$(location exascript_python_tmp.cc)" exaudflib/exascript.i
""",
outs = ["exascript_python_tmp.cc", "exascript_python.py"],
srcs = ["//base/exaudflib:exascript.i","//base/exaudflib:swig/script_data_transfer_objects_wrapper.h"]
srcs = ["//base/exaudflib:exascript.i","//base/exaudflib:swig/script_data_transfer_objects_wrapper.h"],
)

genrule(
Expand All @@ -35,14 +35,23 @@ genrule(
tools = ["//base/python:extend_exascript_python_preset_py.sh"]
)

genrule(
name = "gen_encoding_decoding",
cmd = 'python3 "$(location //base:add_encoding_decoding.py)" "$(location exascript_encoding_decoding.py)"',
outs = ["exascript_encoding_decoding.py"],
srcs = [],
tools = ["//base:add_encoding_decoding.py"]
)

genrule(
name = "exascript_python_int",
cmd = """
cp $(SRCS) .
python3 $(location //base:build_integrated.py) "$(location exascript_python_int.h)" "exascript_python.py" "exascript_python_wrap.py" "exascript_python_preset.py"
python3 $(location //base:build_integrated.py) "$(location exascript_python_int.h)" "exascript_python.py" "exascript_python_wrap.py" "exascript_python_preset.py" "exascript_encoding_decoding.py"
""",
outs = ["exascript_python_int.h"],
srcs = [":exascript_python_tmp_cc", "//base/python:exascript_python_wrap.py", ":extend_exascript_python_preset_py"],
srcs = [":exascript_python_tmp_cc", "//base/python:exascript_python_wrap.py",
":extend_exascript_python_preset_py", ":gen_encoding_decoding"],
tools = ["//base:build_integrated.py"]
)

Expand Down
50 changes: 42 additions & 8 deletions exaudfclient/base/python/python3/python_ext_dataframe.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,23 @@ inline void checkPyObjIsNotNull(const PyObject *obj) {
throw std::runtime_error("F-UDF-CL-SL-PYTHON-1142");
}

inline PyObject* convertPyStringToPyBytes(PyObject* pyString) {
#ifdef SWIG_STRING_AS_BYTES_ENABLED
return PyObject_CallMethod(pyString, "encode", NULL);
#else
Py_INCREF(pyString);
return pyString;
#endif
}

inline PyObject* convertPyBytesToPyString(PyObject* pyString) {
#ifdef SWIG_STRING_AS_BYTES_ENABLED
return PyObject_CallMethod(pyString, "decode", NULL);
#else
Py_INCREF(pyString);
return pyString;
#endif
}


struct ColumnInfo
Expand Down Expand Up @@ -923,8 +940,11 @@ inline void handleEmitPyInt(
break;
}
case SWIGVMContainers::NUMERIC:
pyValue.reset(PyObject_Str(pyInt));
{
PyPtr pyStrInt(PyObject_Str(pyInt));
pyValue.reset(convertPyStringToPyBytes(pyStrInt.get()));
break;
}
case SWIGVMContainers::DOUBLE:
{
double value = PyFloat_AsDouble(pyInt);
Expand Down Expand Up @@ -977,8 +997,11 @@ inline void handleEmitPyFloat(
break;
}
case SWIGVMContainers::NUMERIC:
pyValue.reset(PyObject_Str(pyFloat));
{
PyPtr pyStrFloat(PyObject_Str(pyFloat));
pyValue.reset(convertPyStringToPyBytes(pyStrFloat.get()));
break;
}
case SWIGVMContainers::DOUBLE:
{
//pyFloat points to a 'borrowed' reference. We need to explicitly increase the ref counter here, as pyValue will decrease it again later.
Expand Down Expand Up @@ -1020,15 +1043,21 @@ inline void handleEmitPyDecimal(
case SWIGVMContainers::INT64:
case SWIGVMContainers::INT32:
{
std::cout << "handleEmitPyDecimal: INT" << std::endl;
PyPtr pyInt(PyObject_CallMethodObjArgs(pyDecimal, pyIntMethodName.get(), NULL));
pyValue.reset(pyInt.release());
break;
}
case SWIGVMContainers::NUMERIC:
pyValue.reset(PyObject_Str(pyDecimal));
{
std::cout << "handleEmitPyDecimal: NUMERIC" << std::endl;
PyPtr pyStrDecimal(PyObject_Str(pyDecimal));
pyValue.reset(convertPyStringToPyBytes(pyStrDecimal.get()));
break;
}
case SWIGVMContainers::DOUBLE:
{
std::cout << "handleEmitPyDecimal: DOUBLE" << std::endl;
PyPtr pyFloat(PyObject_CallMethodObjArgs(pyDecimal, pyFloatMethodName.get(), NULL));
pyValue.reset(pyFloat.release());
break;
Expand Down Expand Up @@ -1067,11 +1096,12 @@ inline void handleEmitPyStr(
case SWIGVMContainers::STRING:
{
Py_ssize_t size = -1;
PyPtr bytesString(convertPyStringToPyBytes(pyString));
const char *str = PyUnicode_AsUTF8AndSize(pyString, &size);
if (!str && size < 0)
throw std::runtime_error("F-UDF-CL-SL-PYTHON-1137: invalid size of string");
PyPtr pySize(PyLong_FromSsize_t(size));
pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), pyString, pySize.get(), NULL));
pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), bytesString.get(), pySize.get(), NULL));
break;
}
default:
Expand Down Expand Up @@ -1105,7 +1135,8 @@ inline void handleEmitPyDate(
case SWIGVMContainers::DATE:
{
PyPtr pyIsoDate(PyObject_CallMethodObjArgs(pyDate, pyIsoformatMethodName.get(), NULL));
pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), pyIsoDate.get(), NULL));
PyPtr bytesIsoDate(convertPyStringToPyBytes(pyIsoDate.get()));
pyResult.reset(PyObject_CallMethodObjArgs(resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), bytesIsoDate.get(), NULL));
break;
}
default:
Expand Down Expand Up @@ -1141,8 +1172,9 @@ inline void handleEmitPyTimestamp(
// it to the generated string.
PyPtr pyTzLocalize(PyObject_CallMethod(pyTimestamp, "tz_localize", "z", NULL));
PyPtr pyIsoDatetime(PyObject_CallMethod(pyTzLocalize.get(), "isoformat", "s", " "));
PyPtr bytesDateTime(convertPyStringToPyBytes(pyIsoDatetime.get()));
pyResult.reset(PyObject_CallMethodObjArgs(
resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), pyIsoDatetime.get(), NULL));
resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), bytesDateTime.get(), NULL));
break;
}
default:
Expand Down Expand Up @@ -1177,8 +1209,9 @@ inline void handleEmitNpyDateTime(
case SWIGVMContainers::TIMESTAMP:
{
PyPtr pyIsoDatetime(PyObject_CallMethod(pyTimestamp, "isoformat", "s", " "));
PyPtr bytesDateTime(convertPyStringToPyBytes(pyIsoDatetime.get()));
pyResult.reset(PyObject_CallMethodObjArgs(
resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), pyIsoDatetime.get(), NULL));
resultHandler, pyColSetMethods[c].second.get(), pyColSetMethods[c].first.get(), bytesDateTime.get(), NULL));
break;
}
default:
Expand Down Expand Up @@ -1377,7 +1410,8 @@ void emit(PyObject *resultHandler, std::vector<ColumnInfo>& colInfo, PyObject *d

PyPtr pyCheckException(PyObject_CallMethodObjArgs(resultHandler, pyCheckExceptionMethodName.get(), NULL));
if (pyCheckException.get() != Py_None) {
const char *exMsg = PyUnicode_AsUTF8(pyCheckException.get());
PyPtr exceptionAsPyBytes(convertPyBytesToPyString(pyCheckException.get()));
const char *exMsg = PyUnicode_AsUTF8(exceptionAsPyBytes.get());
if (exMsg) {
std::stringstream ss;
ss << "F-UDF-CL-SL-PYTHON-1075: emit(): " << exMsg;
Expand Down
17 changes: 12 additions & 5 deletions exaudfclient/base/python/pythoncontainer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,22 @@ PythonVMImpl::PythonVMImpl(bool checkOnly): m_checkOnly(checkOnly)
check("F-UDF-CL-SL-PYTHON-1010");
if (exatable == NULL) throw PythonVM::exception("F-UDF-CL-SL-PYTHON-1011: Failed to import code module");

code = Py_CompileString(integrated_exascript_encoding_decoding_py, "<EXASCRIPTDECENC>", Py_file_input); check("F-UDF-CL-SL-PYTHON-1143");
if (code == NULL) {check("F-UDF-CL-SL-PYTHON-1144");}

PyEval_EvalCode(code, globals, globals); check("F-UDF-CL-SL-PYTHON-1145");
Py_DECREF(code);

code = Py_CompileString(integrated_exascript_python_preset_py, "<EXASCRIPTPP>", Py_file_input); check("F-UDF-CL-SL-PYTHON-1012");
if (code == NULL) {check("F-UDF-CL-SL-PYTHON-1013");}

PyEval_EvalCode(code, globals, globals); check("F-UDF-CL-SL-PYTHON-1014");
PyEval_EvalCode(code, globals, globals); check("F-UDF-CL-SL-PYTHON-1014");
Py_DECREF(code);

PyObject *runobj = PyDict_GetItemString(globals, "__pythonvm_wrapped_parse"); check("F-UDF-CL-SL-PYTHON-1016");
//PyObject *retvalue = PyObject_CallFunction(runobj, NULL); check();
PyObject *retvalue = PyObject_CallFunctionObjArgs(runobj, globals, NULL); check("F-UDF-CL-SL-PYTHON-1017");
Py_XDECREF(retvalue); retvalue = NULL;
PyObject *runobj = PyDict_GetItemString(globals, "__pythonvm_wrapped_parse"); check("F-UDF-CL-SL-PYTHON-1016");
//PyObject *retvalue = PyObject_CallFunction(runobj, NULL); check();
PyObject *retvalue = PyObject_CallFunctionObjArgs(runobj, globals, NULL); check("F-UDF-CL-SL-PYTHON-1017");
Py_XDECREF(retvalue); retvalue = NULL;

code = Py_CompileString(integrated_exascript_python_wrap_py, "<EXASCRIPT>", Py_file_input); check("F-UDF-CL-SL-PYTHON-1018");
if (code == NULL) throw PythonVM::exception("Failed to compile wrapping script");
Expand Down Expand Up @@ -489,6 +495,7 @@ const char* PythonVMImpl::singleCall(single_call_function_id_e fn, const Executi
PyObject* p3str = PyUnicode_AsEncodedString(repr, "utf-8", "ignore");
const char *bytes = PyBytes_AS_STRING(p3str);
singleCallResult = string(bytes);
std::cout << "pythoncontainer: singleCallResult='" << singleCallResult << "'" << std::endl;
Py_XDECREF(retvalue); retvalue = NULL;
return singleCallResult.c_str();
}
Loading
Loading