-
Notifications
You must be signed in to change notification settings - Fork 982
Change strings::like() pattern parameter from string_scalar to string_view #20428
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 4 commits
6a69f5a
3878334
7dc915d
d853bf4
65fd897
eddf32a
b2a1a42
8b1a6a9
4706a90
60002a7
3f274f3
c9a02fc
967be2a
cd5e2ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -2,17 +2,12 @@ | |||||||
| # SPDX-License-Identifier: Apache-2.0 | ||||||||
| from libcpp.memory cimport unique_ptr | ||||||||
| from libcpp.utility cimport move | ||||||||
| from cython.operator import dereference | ||||||||
| from libcpp.string cimport string | ||||||||
|
|
||||||||
| from pylibcudf.column cimport Column | ||||||||
| from pylibcudf.libcudf.column.column cimport column | ||||||||
| from pylibcudf.libcudf.scalar.scalar cimport string_scalar | ||||||||
| from pylibcudf.libcudf.scalar.scalar_factories cimport ( | ||||||||
| make_string_scalar as cpp_make_string_scalar, | ||||||||
| ) | ||||||||
| from pylibcudf.libcudf.strings cimport contains as cpp_contains | ||||||||
| from pylibcudf.strings.regex_program cimport RegexProgram | ||||||||
| from pylibcudf.scalar cimport Scalar | ||||||||
| from pylibcudf.utils cimport _get_stream, _get_memory_resource | ||||||||
| from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource | ||||||||
| from rmm.pylibrmm.stream cimport Stream | ||||||||
|
|
@@ -139,8 +134,8 @@ cpdef Column matches_re( | |||||||
|
|
||||||||
| cpdef Column like( | ||||||||
| Column input, | ||||||||
| ColumnOrScalar pattern, | ||||||||
| Scalar escape_character=None, | ||||||||
| str pattern, | ||||||||
| str escape_character=None, | ||||||||
| Stream stream=None, | ||||||||
| DeviceMemoryResource mr=None, | ||||||||
| ): | ||||||||
|
|
@@ -154,9 +149,9 @@ cpdef Column like( | |||||||
| ---------- | ||||||||
| input : Column | ||||||||
| The input strings | ||||||||
| pattern : Column or Scalar | ||||||||
| Like patterns to match within each string | ||||||||
| escape_character : Scalar | ||||||||
| pattern : str | ||||||||
| Like pattern to match within each string | ||||||||
|
Comment on lines
+152
to
+153
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change not only changes
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. This interface is only needed by Spark. |
||||||||
| escape_character : str | ||||||||
| Optional character specifies the escape prefix. | ||||||||
| Default is no escape character. | ||||||||
|
|
||||||||
|
|
@@ -170,35 +165,18 @@ cpdef Column like( | |||||||
| mr = _get_memory_resource(mr) | ||||||||
|
|
||||||||
| if escape_character is None: | ||||||||
| escape_character = Scalar.from_libcudf( | ||||||||
| cpp_make_string_scalar("".encode(), stream.view(), mr.get_mr()) | ||||||||
| ) | ||||||||
| escape_character = "" | ||||||||
|
|
||||||||
| cdef string c_escape_character = escape_character.encode() | ||||||||
| cdef string c_pattern = pattern.encode() | ||||||||
|
|
||||||||
| cdef const string_scalar* c_escape_character = <const string_scalar*>( | ||||||||
| escape_character.c_obj.get() | ||||||||
| ) | ||||||||
| cdef const string_scalar* c_pattern | ||||||||
|
|
||||||||
| if ColumnOrScalar is Column: | ||||||||
| with nogil: | ||||||||
| result = cpp_contains.like( | ||||||||
| input.view(), | ||||||||
| pattern.view(), | ||||||||
| dereference(c_escape_character), | ||||||||
| stream.view(), | ||||||||
| mr.get_mr() | ||||||||
| ) | ||||||||
| elif ColumnOrScalar is Scalar: | ||||||||
| c_pattern = <const string_scalar*>(pattern.c_obj.get()) | ||||||||
| with nogil: | ||||||||
| result = cpp_contains.like( | ||||||||
| input.view(), | ||||||||
| dereference(c_pattern), | ||||||||
| dereference(c_escape_character), | ||||||||
| stream.view(), | ||||||||
| mr.get_mr() | ||||||||
| ) | ||||||||
| else: | ||||||||
| raise ValueError("pattern must be a Column or a Scalar") | ||||||||
| with nogil: | ||||||||
| result = cpp_contains.like( | ||||||||
| input.view(), | ||||||||
| c_pattern, | ||||||||
| c_escape_character, | ||||||||
| stream.view(), | ||||||||
| mr.get_mr() | ||||||||
| ) | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue: the lifetime of
Suggested change
|
||||||||
|
|
||||||||
| return Column.from_libcudf(move(result), stream, mr) | ||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Stream-order/lifetime question. How do we know the
string_viewlifetime is long enough to do the stream-ordered copy to the_databuffer without inserting a sync or copying to an internal host buffer first? What guarantees safety here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good point. Technically the output is not guaranteed to be ready unless the caller does sync so either I could add a sync or just add a comment to the doxygen indicating the parameter must stay alive until the caller does a sync on the input stream.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a comment to the doxygen warning about the lifetime scope of the parameters.