diff --git a/autowrap/ConversionProvider.py b/autowrap/ConversionProvider.py index 3d35115..1c7522a 100644 --- a/autowrap/ConversionProvider.py +++ b/autowrap/ConversionProvider.py @@ -134,6 +134,22 @@ def output_conversion( ) -> Optional[Union[Code, str]]: raise NotImplementedError() + def supports_delegation(self) -> bool: + """ + Return True if this converter should be invoked when the type + is an element inside a container (e.g., std::vector). + + By default, Cython handles conversion of standard types inside + containers automatically. When this returns True, container + converters will delegate to this converter instead. + + Note: Delegation currently only works for single-level nesting. + For example, vector where T supports delegation will work, + but vector> will NOT invoke T's converter - Cython + handles the conversion automatically in nested cases. + """ + return False + @staticmethod def _code_for_instantiate_object_from_iter(cpp_type: CppType, it: str) -> str: """ @@ -1198,6 +1214,62 @@ def output_conversion(self, cpp_type: CppType, input_cpp_var: str, output_py_var ) return code else: + # Check for delegating converters for key and/or value + key_has_delegation = self._has_delegating_converter(tt_key) + value_has_delegation = self._has_delegating_converter(tt_value) + + if key_has_delegation or value_has_delegation: + item_key = mangle("itemk_" + output_py_var) + item_val = mangle("itemv_" + output_py_var) + + # Build key conversion + if key_has_delegation: + key_converter = self.cr.get(tt_key) + elem_out = key_converter.output_conversion( + tt_key, "(deref(%s)).first" % it, item_key + ) + if elem_out is None: + key_out_code = "%s = (deref(%s)).first" % (item_key, it) + elif hasattr(elem_out, "render"): + key_out_code = elem_out.render() + else: + key_out_code = str(elem_out) + key_expr = item_key + else: + key_out_code = "" + key_expr = key_conv + + # Build value conversion + if value_has_delegation: + value_converter = self.cr.get(tt_value) + elem_out = value_converter.output_conversion( + tt_value, "(deref(%s)).second" % it, item_val + ) + if elem_out is None: + value_out_code = "%s = (deref(%s)).second" % (item_val, it) + elif hasattr(elem_out, "render"): + value_out_code = elem_out.render() + else: + value_out_code = str(elem_out) + value_expr = item_val + else: + value_out_code = "" + value_expr = "<%s>(deref(%s).second)" % (cy_tt_value, it) + + code = Code().add( + """ + |$output_py_var = dict() + |cdef libcpp_map[$cy_tt_key, $cy_tt_value].iterator $it = $input_cpp_var.begin() + |while $it != $input_cpp_var.end(): + | $key_out_code + | $value_out_code + | $output_py_var[$key_expr] = $value_expr + | inc($it) + """, + locals(), + ) + return code + value_conv = "<%s>(deref(%s).second)" % (cy_tt_value, it) code = Code().add( """ @@ -1211,6 +1283,16 @@ def output_conversion(self, cpp_type: CppType, input_cpp_var: str, output_py_var ) return code + def _has_delegating_converter(self, element_type: CppType) -> bool: + """Check if element type has a converter that supports delegation.""" + if not hasattr(self, "cr"): + return False + try: + converter = self.cr.get(element_type) + return converter.supports_delegation() + except (NameError, KeyError): + return False + class StdSetConverter(TypeConverterBase): def get_base_types(self) -> List[str]: @@ -1244,6 +1326,16 @@ def type_check_expression(self, cpp_type, arg_var): .render() ) + def _has_delegating_converter(self, element_type: CppType) -> bool: + """Check if element type has a converter that supports delegation.""" + if not hasattr(self, "cr"): + return False + try: + converter = self.cr.get(element_type) + return converter.supports_delegation() + except (NameError, KeyError): + return False + def input_conversion( self, cpp_type: CppType, argument_var: str, arg_num: int ) -> Tuple[Code, str, Union[Code, str]]: @@ -1321,6 +1413,76 @@ def input_conversion( else: cleanup_code = "del %s" % temp_var return code, "deref(%s)" % temp_var, cleanup_code + + elif self._has_delegating_converter(tt): + # Element type has a converter that supports delegation + item = "item%d" % arg_num + element_converter = self.cr.get(tt) + + elem_code, elem_call_as, elem_cleanup = element_converter.input_conversion( + tt, item, arg_num + ) + + code = Code().add( + """ + |cdef libcpp_set[$inner] * $temp_var = new libcpp_set[$inner]() + """, + locals(), + ) + + if hasattr(elem_code, "content") and elem_code.content: + code.add( + """ + |for $item in $argument_var: + """, + locals(), + ) + code.add(elem_code) + code.add( + """ + | $temp_var.insert(<$inner>$item) + """, + locals(), + ) + else: + code.add( + """ + |for $item in $argument_var: + | $temp_var.insert($elem_call_as) + """, + locals(), + ) + + cleanup_code = Code().add("") + if cpp_type.is_ref and not cpp_type.is_const: + conv_item = "conv_item%d" % arg_num + out_converter = self.cr.get(tt) + elem_out = out_converter.output_conversion(tt, "deref(%s)" % it, conv_item) + if elem_out is None: + elem_out_code = "%s = deref(%s)" % (conv_item, it) + elif hasattr(elem_out, "render"): + elem_out_code = elem_out.render() + else: + elem_out_code = str(elem_out) + cleanup_code = Code().add( + """ + |replace = set() + |cdef libcpp_set[$inner].iterator $it = $temp_var.begin() + |while $it != $temp_var.end(): + | $elem_out_code + | replace.add($conv_item) + | inc($it) + |$argument_var.clear() + |$argument_var.update(replace) + |del $temp_var + """, + locals(), + ) + else: + cleanup_code = Code().add("del %s" % temp_var) + + return code, "deref(%s)" % temp_var, cleanup_code + else: inner = self.converters.cython_type(tt) # cython cares for conversion of stl containers with std types: @@ -1385,6 +1547,34 @@ def output_conversion(self, cpp_type: CppType, input_cpp_var: str, output_py_var locals(), ) return code + + elif self._has_delegating_converter(tt): + # Element type has a converter that supports delegation + it = mangle("it_" + input_cpp_var) + item = mangle("item_" + output_py_var) + element_converter = self.cr.get(tt) + + elem_out = element_converter.output_conversion(tt, "deref(%s)" % it, item) + if elem_out is None: + elem_out_code = "%s = deref(%s)" % (item, it) + elif hasattr(elem_out, "render"): + elem_out_code = elem_out.render() + else: + elem_out_code = str(elem_out) + + code = Code().add( + """ + |$output_py_var = set() + |cdef libcpp_set[$inner].iterator $it = $input_cpp_var.begin() + |while $it != $input_cpp_var.end(): + | $elem_out_code + | $output_py_var.add($item) + | inc($it) + """, + locals(), + ) + return code + else: # cython cares for conversion of stl containers with std types: code = Code().add( @@ -1668,6 +1858,16 @@ def _perform_recursion( else: bottommost_code.content.extend(bottommost_code_callback.content) + def _has_delegating_converter(self, element_type: CppType) -> bool: + """Check if element type has a converter that supports delegation.""" + if not hasattr(self, "cr"): + return False + try: + converter = self.cr.get(element_type) + return converter.supports_delegation() + except (NameError, KeyError): + return False + def input_conversion( self, cpp_type: CppType, @@ -1886,8 +2086,81 @@ def input_conversion( return code, "deref(%s)" % temp_var, cleanup_code + elif self._has_delegating_converter(tt): + # Case 5: Element type has a converter that supports delegation + # Use explicit loop with element converter instead of letting Cython handle it + item = "item%s" % arg_num + conv_item = "conv_item%s" % arg_num + element_converter = self.cr.get(tt) + + # Get element input conversion + elem_code, elem_call_as, elem_cleanup = element_converter.input_conversion( + tt, item, arg_num + ) + + code = Code().add( + """ + |cdef libcpp_vector[$inner] * $temp_var = new libcpp_vector[$inner]() + """, + locals(), + ) + + # Add element conversion code (may include variable declarations) + if hasattr(elem_code, "content") and elem_code.content: + # Extract any if-block from elem_code and wrap in loop + code.add( + """ + |for $item in $argument_var: + """, + locals(), + ) + code.add(elem_code) + code.add( + """ + | $temp_var.push_back(<$inner>$item) + """, + locals(), + ) + else: + code.add( + """ + |for $item in $argument_var: + | $temp_var.push_back($elem_call_as) + """, + locals(), + ) + + cleanup_code = Code().add("") + if cpp_type.topmost_is_ref and not cpp_type.topmost_is_const: + it = mangle("it_" + argument_var) + out_converter = self.cr.get(tt) + elem_out = out_converter.output_conversion(tt, "deref(%s)" % it, conv_item) + if elem_out is None: + elem_out_code = "%s = deref(%s)" % (conv_item, it) + elif hasattr(elem_out, "render"): + elem_out_code = elem_out.render() + else: + elem_out_code = str(elem_out) + cleanup_code = Code().add( + """ + |replace = [] + |cdef libcpp_vector[$inner].iterator $it = $temp_var.begin() + |while $it != $temp_var.end(): + | $elem_out_code + | replace.append($conv_item) + | inc($it) + |$argument_var[:] = replace + |del $temp_var + """, + locals(), + ) + else: + cleanup_code = Code().add("del %s" % temp_var) + + return code, "deref(%s)" % temp_var, cleanup_code + else: - # Case 5: We wrap a regular type + # Case 6: We wrap a regular type inner = self.converters.cython_type(tt) # cython cares for conversion of stl containers with std types: code = Code().add( @@ -1982,6 +2255,35 @@ def output_conversion(self, cpp_type: CppType, input_cpp_var: str, output_py_var ) return code + elif self._has_delegating_converter(tt): + # Element type has a converter that supports delegation + # Use explicit loop with element converter + it = mangle("it_" + input_cpp_var) + item = mangle("item_" + output_py_var) + element_converter = self.cr.get(tt) + + # Get element output conversion + elem_out = element_converter.output_conversion(tt, "deref(%s)" % it, item) + if elem_out is None: + elem_out_code = "%s = deref(%s)" % (item, it) + elif hasattr(elem_out, "render"): + elem_out_code = elem_out.render() + else: + elem_out_code = str(elem_out) + + code = Code().add( + """ + |$output_py_var = [] + |cdef libcpp_vector[$inner].iterator $it = $input_cpp_var.begin() + |while $it != $input_cpp_var.end(): + | $elem_out_code + | $output_py_var.append($item) + | inc($it) + """, + locals(), + ) + return code + else: # cython cares for conversion of stl containers with std types: code = Code().add( @@ -2406,16 +2708,21 @@ def output_conversion( class StdStringUnicodeConverter(StdStringConverter): """ This converter deals with functions that expect a C++ std::string. - Note that this provider will NOT be picked up if it is located inside - a container (e.g. std::vector aka libcpp_vector). Please use the usual - StdStringConverter to at least get the typing right. + It can be used inside containers when delegation is enabled. It can only be used in function parameters (i.e. input). It can handle both bytes and unicode strings and converts to bytes internally. + + Note: Delegation only works for single-level containers (e.g., vector). + Nested containers like vector> or map> + are NOT supported - Cython will handle conversion automatically without UTF-8 encoding. """ def get_base_types(self) -> List[str]: return ["libcpp_utf8_string"] + def supports_delegation(self) -> bool: + return True + def matching_python_type(self, cpp_type: CppType) -> str: return "" # TODO can we use "basestring"? @@ -2446,17 +2753,22 @@ def type_check_expression(self, cpp_type: CppType, argument_var: str) -> str: class StdStringUnicodeOutputConverter(StdStringUnicodeConverter): """ This converter deals with functions that return a C++ std::string. - Note that this provider will NOT be picked up if it is located inside - a container (e.g. std::vector aka libcpp_vector). Please use the usual - StdStringConverter to at least get the typing right. + It can be used inside containers when delegation is enabled. It should only be used in function returns (i.e. output). It returns unicode strings to python and therefore expects the C++ function to return something that is decodable from utf8 (including ascii) + + Note: Delegation only works for single-level containers (e.g., vector). + Nested containers like vector> are NOT supported - + Cython will handle conversion automatically without UTF-8 decoding. """ def get_base_types(self) -> List[str]: return ["libcpp_utf8_output_string"] + def supports_delegation(self) -> bool: + return True + def matching_python_type_full(self, cpp_type: CppType) -> str: return "str" # python3 @@ -2590,6 +2902,16 @@ def matching_python_type_full(self, cpp_type: CppType) -> str: inner_conv_2.matching_python_type_full(tt_value), ) + def _has_delegating_converter(self, element_type: CppType) -> bool: + """Check if element type has a converter that supports delegation.""" + if not hasattr(self, "cr"): + return False + try: + converter = self.cr.get(element_type) + return converter.supports_delegation() + except (NameError, KeyError): + return False + def type_check_expression(self, cpp_type, arg_var): tt_key, tt_value = cpp_type.template_args inner_conv_1 = self.converters.get(tt_key) @@ -2628,10 +2950,19 @@ def input_conversion( loop_key = mangle("_loop_key_" + argument_var) loop_value = mangle("_loop_value_" + argument_var) + value_conv_code = None + key_conv_code = None + if cy_tt_value.is_enum: value_conv = "<%s> %s" % (cy_tt_value, loop_value) elif tt_value.base_type in self.converters.names_of_wrapper_classes: value_conv = "deref((<%s>%s).inst.get())" % (tt_value.base_type, loop_value) + elif self._has_delegating_converter(tt_value): + # Delegate to value converter + value_converter = self.cr.get(tt_value) + v_code, v_call_as, v_cleanup = value_converter.input_conversion(tt_value, loop_value, 0) + value_conv_code = v_code + value_conv = "<%s> %s" % (cy_tt_value, loop_value) else: value_conv = "<%s> %s" % (cy_tt_value, loop_value) @@ -2639,6 +2970,12 @@ def input_conversion( key_conv = "<%s> %s" % (cy_tt_key, loop_key) elif tt_key.base_type in self.converters.names_of_wrapper_classes: key_conv = "deref(<%s *> (<%s> %s).inst.get())" % (cy_tt_key, tt_key, loop_key) + elif self._has_delegating_converter(tt_key): + # Delegate to key converter + key_converter = self.cr.get(tt_key) + k_code, k_call_as, k_cleanup = key_converter.input_conversion(tt_key, loop_key, 0) + key_conv_code = k_code + key_conv = "<%s> %s" % (cy_tt_key, loop_key) else: key_conv = "<%s> %s" % (cy_tt_key, loop_key) @@ -2648,6 +2985,15 @@ def input_conversion( + libcpp_unordered_map[$cy_tt_key, $cy_tt_value]() |for $loop_key, $loop_value in $argument_var.items(): + """, + locals(), + ) + if key_conv_code is not None: + code.add(key_conv_code) + if value_conv_code is not None: + code.add(value_conv_code) + code.add( + """ | deref($temp_var)[ $key_conv ] = $value_conv """, locals(), @@ -2838,8 +3184,63 @@ def output_conversion(self, cpp_type: CppType, input_cpp_var: str, output_py_var ) return code - # Neither key nor value is wrapped + # Neither key nor value is wrapped - check for delegating converters else: + key_has_delegation = self._has_delegating_converter(tt_key) + value_has_delegation = self._has_delegating_converter(tt_value) + + if key_has_delegation or value_has_delegation: + item_key = mangle("itemk_" + output_py_var) + item_val = mangle("itemv_" + output_py_var) + + # Build key conversion + if key_has_delegation: + key_converter = self.cr.get(tt_key) + elem_out = key_converter.output_conversion( + tt_key, "(deref(%s)).first" % it, item_key + ) + if elem_out is None: + key_out_code = "%s = (deref(%s)).first" % (item_key, it) + elif hasattr(elem_out, "render"): + key_out_code = elem_out.render() + else: + key_out_code = str(elem_out) + key_expr = item_key + else: + key_out_code = "" + key_expr = "<%s>(deref(%s).first)" % (cy_tt_key, it) + + # Build value conversion + if value_has_delegation: + value_converter = self.cr.get(tt_value) + elem_out = value_converter.output_conversion( + tt_value, "(deref(%s)).second" % it, item_val + ) + if elem_out is None: + value_out_code = "%s = (deref(%s)).second" % (item_val, it) + elif hasattr(elem_out, "render"): + value_out_code = elem_out.render() + else: + value_out_code = str(elem_out) + value_expr = item_val + else: + value_out_code = "" + value_expr = "<%s>(deref(%s).second)" % (cy_tt_value, it) + + code = Code().add( + """ + |$output_py_var = dict() + |cdef libcpp_unordered_map[$cy_tt_key, $cy_tt_value].iterator $it = $input_cpp_var.begin() + |while $it != $input_cpp_var.end(): + | $key_out_code + | $value_out_code + | $output_py_var[$key_expr] = $value_expr + | inc($it) + """, + locals(), + ) + return code + key_conv = "<%s>(deref(%s).first)" % (cy_tt_key, it) value_conv = "<%s>(deref(%s).second)" % (cy_tt_value, it) code = Code().add( @@ -2889,6 +3290,16 @@ def matching_python_type_full(self, cpp_type: CppType) -> str: inner_conv = self.converters.get(tt) return "Set[%s]" % inner_conv.matching_python_type_full(tt) + def _has_delegating_converter(self, element_type: CppType) -> bool: + """Check if element type has a converter that supports delegation.""" + if not hasattr(self, "cr"): + return False + try: + converter = self.cr.get(element_type) + return converter.supports_delegation() + except (NameError, KeyError): + return False + def type_check_expression(self, cpp_type, arg_var): (tt,) = cpp_type.template_args inner_conv = self.converters.get(tt) @@ -2978,6 +3389,76 @@ def input_conversion( else: cleanup_code = "del %s" % temp_var return code, "deref(%s)" % temp_var, cleanup_code + + elif self._has_delegating_converter(tt): + # Element type has a converter that supports delegation + item = "item%d" % arg_num + element_converter = self.cr.get(tt) + + elem_code, elem_call_as, elem_cleanup = element_converter.input_conversion( + tt, item, arg_num + ) + + code = Code().add( + """ + |cdef libcpp_unordered_set[$inner] * $temp_var = new libcpp_unordered_set[$inner]() + """, + locals(), + ) + + if hasattr(elem_code, "content") and elem_code.content: + code.add( + """ + |for $item in $argument_var: + """, + locals(), + ) + code.add(elem_code) + code.add( + """ + | $temp_var.insert(<$inner>$item) + """, + locals(), + ) + else: + code.add( + """ + |for $item in $argument_var: + | $temp_var.insert($elem_call_as) + """, + locals(), + ) + + cleanup_code = Code().add("") + if cpp_type.is_ref and not cpp_type.is_const: + conv_item = "conv_item%d" % arg_num + out_converter = self.cr.get(tt) + elem_out = out_converter.output_conversion(tt, "deref(%s)" % it, conv_item) + if elem_out is None: + elem_out_code = "%s = deref(%s)" % (conv_item, it) + elif hasattr(elem_out, "render"): + elem_out_code = elem_out.render() + else: + elem_out_code = str(elem_out) + cleanup_code = Code().add( + """ + |replace = set() + |cdef libcpp_unordered_set[$inner].iterator $it = $temp_var.begin() + |while $it != $temp_var.end(): + | $elem_out_code + | replace.add($conv_item) + | inc($it) + |$argument_var.clear() + |$argument_var.update(replace) + |del $temp_var + """, + locals(), + ) + else: + cleanup_code = Code().add("del %s" % temp_var) + + return code, "deref(%s)" % temp_var, cleanup_code + else: # Primitive types - need explicit iteration item = "item%d" % arg_num @@ -3046,6 +3527,33 @@ def output_conversion(self, cpp_type: CppType, input_cpp_var: str, output_py_var locals(), ) return code + + elif self._has_delegating_converter(tt): + # Element type has a converter that supports delegation + item = mangle("item_" + output_py_var) + element_converter = self.cr.get(tt) + + elem_out = element_converter.output_conversion(tt, "deref(%s)" % it, item) + if elem_out is None: + elem_out_code = "%s = deref(%s)" % (item, it) + elif hasattr(elem_out, "render"): + elem_out_code = elem_out.render() + else: + elem_out_code = str(elem_out) + + code = Code().add( + """ + |$output_py_var = set() + |cdef libcpp_unordered_set[$inner].iterator $it = $input_cpp_var.begin() + |while $it != $input_cpp_var.end(): + | $elem_out_code + | $output_py_var.add($item) + | inc($it) + """, + locals(), + ) + return code + else: # Primitive types - need explicit iteration code = Code().add( diff --git a/tests/test_code_generator.py b/tests/test_code_generator.py index bfcf4bd..4d6eed7 100644 --- a/tests/test_code_generator.py +++ b/tests/test_code_generator.py @@ -398,6 +398,142 @@ def test_automatic_output_string_conversion(): assert msg == expected +def test_utf8_string_vector_conversion(): + """Test that UTF-8 strings in vectors are properly converted via delegation.""" + target = os.path.join(test_files, "generated", "libcpp_utf8_string_vector_test.pyx") + include_dirs = autowrap.parse_and_generate_code( + ["libcpp_utf8_string_vector_test.pxd"], + root=test_files, + target=target, + debug=True, + ) + + wrapped = autowrap.Utils.compile_and_import( + "libcpp_utf8_string_vector_wrapped", + [ + target, + ], + include_dirs, + ) + h = wrapped.Utf8VectorTest() + + # Test output conversion - vector of UTF-8 strings should become list of str + greetings = h.get_greetings() + assert isinstance(greetings, list) + assert len(greetings) == 4 + # All elements should be unicode strings (str in Python 3) + for s in greetings: + assert isinstance(s, str), f"Expected str, got {type(s)}" + assert "Hello" in greetings + assert "World" in greetings + assert "Привет" in greetings # Russian + assert "你好" in greetings # Chinese + + # Test input conversion - list of str/bytes should be accepted + input_strings = ["Test", "Тест", "测试"] # ASCII, Russian, Chinese + result = h.echo(input_strings) + assert isinstance(result, list) + assert len(result) == 3 + for s in result: + assert isinstance(s, str), f"Expected str, got {type(s)}" + assert result == input_strings + + # Test with bytes input + input_bytes = [b"Hello", b"World"] + result = h.echo(input_bytes) + assert isinstance(result, list) + assert len(result) == 2 + + # Test count function (input only) + count = h.count_strings(["a", "b", "c"]) + assert count == 3 + + +def test_utf8_containers_delegation(): + """Test UTF-8 delegation works for all container types: vector, set, map, unordered variants.""" + target = os.path.join(test_files, "generated", "libcpp_utf8_containers_test.pyx") + include_dirs = autowrap.parse_and_generate_code( + ["libcpp_utf8_containers_test.pxd"], + root=test_files, + target=target, + debug=True, + ) + + wrapped = autowrap.Utils.compile_and_import( + "libcpp_utf8_containers_wrapped", + [target], + include_dirs, + ) + h = wrapped.Utf8ContainersTest() + + # === Vector tests === + vec = h.get_vector() + assert isinstance(vec, list) + assert all(isinstance(s, str) for s in vec), "Vector elements should be str" + assert "Hello" in vec + assert "Привет" in vec # Russian + assert "你好" in vec # Chinese + + vec_echo = h.echo_vector(["Test", "Тест", "测试"]) + assert vec_echo == ["Test", "Тест", "测试"] + + # === Set tests === + s = h.get_set() + assert isinstance(s, set) + assert all(isinstance(elem, str) for elem in s), "Set elements should be str" + assert "Alpha" in s + assert "Бета" in s # Russian + assert "伽马" in s # Chinese + + s_echo = h.echo_set({"One", "Один", "一"}) + assert s_echo == {"One", "Один", "一"} + + # === Map tests (UTF-8 keys and values) === + m = h.get_map() + assert isinstance(m, dict) + assert all(isinstance(k, str) for k in m.keys()), "Map keys should be str" + assert all(isinstance(v, str) for v in m.values()), "Map values should be str" + assert m["greeting"] == "Привет" + assert m["farewell"] == "再见" + assert m["thanks"] == "شكرا" # Arabic + + m_echo = h.echo_map({"key1": "Значение", "key2": "值"}) + assert m_echo["key1"] == "Значение" + assert m_echo["key2"] == "值" + + # === Map with UTF-8 keys === + m_keys = h.get_map_utf8_keys() + assert isinstance(m_keys, dict) + assert all(isinstance(k, str) for k in m_keys.keys()), "Map keys should be str" + assert m_keys["один"] == 1 # Russian + assert m_keys["二"] == 2 # Chinese + assert m_keys["três"] == 3 # Portuguese + + # === Unordered set tests === + us = h.get_unordered_set() + assert isinstance(us, set) + assert all(isinstance(elem, str) for elem in us), "Unordered set elements should be str" + assert "Set1" in us + assert "Набор2" in us # Russian + assert "集合3" in us # Chinese + + us_echo = h.echo_unordered_set({"A", "Б", "丙"}) + assert us_echo == {"A", "Б", "丙"} + + # === Unordered map tests (both keys and values as UTF-8) === + um = h.get_unordered_map() + assert isinstance(um, dict) + assert all(isinstance(k, str) for k in um.keys()), "Unordered map keys should be str" + assert all(isinstance(v, str) for v in um.values()), "Unordered map values should be str" + assert um["key1"] == "Значение1" + assert um["key2"] == "值2" + assert um["key3"] == "قيمة3" # Arabic + + um_echo = h.echo_unordered_map({"a": "Альфа", "b": "贝塔"}) + assert um_echo["a"] == "Альфа" + assert um_echo["b"] == "贝塔" + + def test_wrap_ignore_foreign_cimports(): """ Test that wrap-ignored classes are not included in foreign cimports. diff --git a/tests/test_files/libcpp_utf8_containers_test.hpp b/tests/test_files/libcpp_utf8_containers_test.hpp new file mode 100644 index 0000000..582abdb --- /dev/null +++ b/tests/test_files/libcpp_utf8_containers_test.hpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include +#include + +class Utf8ContainersTest { + public: + Utf8ContainersTest(){} + + // Vector tests + std::vector get_vector() const { + return {"Hello", "Привет", "你好", "مرحبا"}; + } + + std::vector echo_vector(const std::vector& input) const { + return input; + } + + // Set tests + std::set get_set() const { + return {"Alpha", "Бета", "伽马"}; + } + + std::set echo_set(const std::set& input) const { + return input; + } + + // Map tests - UTF-8 values + std::map get_map() const { + return { + {"greeting", "Привет"}, + {"farewell", "再见"}, + {"thanks", "شكرا"} + }; + } + + std::map echo_map( + const std::map& input + ) const { + return input; + } + + // Map with UTF-8 keys + std::map get_map_utf8_keys() const { + return { + {"один", 1}, + {"二", 2}, + {"três", 3} + }; + } + + // Unordered set tests + std::unordered_set get_unordered_set() const { + return {"Set1", "Набор2", "集合3"}; + } + + std::unordered_set echo_unordered_set( + const std::unordered_set& input + ) const { + return input; + } + + // Unordered map tests + std::unordered_map get_unordered_map() const { + return { + {"key1", "Значение1"}, + {"key2", "值2"}, + {"key3", "قيمة3"} + }; + } + + std::unordered_map echo_unordered_map( + const std::unordered_map& input + ) const { + return input; + } +}; diff --git a/tests/test_files/libcpp_utf8_containers_test.pxd b/tests/test_files/libcpp_utf8_containers_test.pxd new file mode 100644 index 0000000..6e0708b --- /dev/null +++ b/tests/test_files/libcpp_utf8_containers_test.pxd @@ -0,0 +1,46 @@ +# cython: language_level=3 +from libcpp.string cimport string as libcpp_utf8_output_string +from libcpp.string cimport string as libcpp_utf8_string +from libcpp.string cimport string as libcpp_string +from libcpp.vector cimport vector as libcpp_vector +from libcpp.set cimport set as libcpp_set +from libcpp.map cimport map as libcpp_map +from libcpp.unordered_set cimport unordered_set as libcpp_unordered_set +from libcpp.unordered_map cimport unordered_map as libcpp_unordered_map + +cdef extern from "libcpp_utf8_containers_test.hpp": + cdef cppclass Utf8ContainersTest: + Utf8ContainersTest() + + # Vector - output decoded to str, input accepts str/bytes + libcpp_vector[libcpp_utf8_output_string] get_vector() + libcpp_vector[libcpp_utf8_output_string] echo_vector( + libcpp_vector[libcpp_utf8_string] + ) + + # Set - output decoded to str, input accepts str/bytes + libcpp_set[libcpp_utf8_output_string] get_set() + libcpp_set[libcpp_utf8_output_string] echo_set( + libcpp_set[libcpp_utf8_string] + ) + + # Map - both keys and values as UTF-8 + libcpp_map[libcpp_utf8_output_string, libcpp_utf8_output_string] get_map() + libcpp_map[libcpp_utf8_output_string, libcpp_utf8_output_string] echo_map( + libcpp_map[libcpp_utf8_string, libcpp_utf8_string] + ) + + # Map with UTF-8 keys + libcpp_map[libcpp_utf8_output_string, int] get_map_utf8_keys() + + # Unordered set + libcpp_unordered_set[libcpp_utf8_output_string] get_unordered_set() + libcpp_unordered_set[libcpp_utf8_output_string] echo_unordered_set( + libcpp_unordered_set[libcpp_utf8_string] + ) + + # Unordered map - both keys and values as UTF-8 + libcpp_unordered_map[libcpp_utf8_output_string, libcpp_utf8_output_string] get_unordered_map() + libcpp_unordered_map[libcpp_utf8_output_string, libcpp_utf8_output_string] echo_unordered_map( + libcpp_unordered_map[libcpp_utf8_string, libcpp_utf8_string] + ) diff --git a/tests/test_files/libcpp_utf8_string_vector_test.hpp b/tests/test_files/libcpp_utf8_string_vector_test.hpp new file mode 100644 index 0000000..49c3838 --- /dev/null +++ b/tests/test_files/libcpp_utf8_string_vector_test.hpp @@ -0,0 +1,19 @@ +#include +#include + +class Utf8VectorTest { + public: + Utf8VectorTest(){} + + std::vector get_greetings() const { + return {"Hello", "World", "Привет", "你好"}; + } + + std::vector echo(const std::vector& input) const { + return input; + } + + size_t count_strings(const std::vector& input) const { + return input.size(); + } +}; diff --git a/tests/test_files/libcpp_utf8_string_vector_test.pxd b/tests/test_files/libcpp_utf8_string_vector_test.pxd new file mode 100644 index 0000000..43af2a6 --- /dev/null +++ b/tests/test_files/libcpp_utf8_string_vector_test.pxd @@ -0,0 +1,11 @@ +# cython: language_level=3 +from libcpp.string cimport string as libcpp_utf8_output_string +from libcpp.string cimport string as libcpp_utf8_string +from libcpp.vector cimport vector as libcpp_vector + +cdef extern from "libcpp_utf8_string_vector_test.hpp": + cdef cppclass Utf8VectorTest: + Utf8VectorTest() + libcpp_vector[libcpp_utf8_output_string] get_greetings() + libcpp_vector[libcpp_utf8_output_string] echo(libcpp_vector[libcpp_utf8_string]) + size_t count_strings(libcpp_vector[libcpp_utf8_string])