@@ -30,68 +30,77 @@ static auto ReleaseArrowSchema(void *ptr) noexcept -> void {
30
30
delete schema;
31
31
}
32
32
33
- static auto CumSum (const struct ArrowArrayView *array_view,
33
+ template <size_t OffsetSize>
34
+ static auto CumSum (struct ArrowArrayStream *array_stream,
34
35
struct ArrowArray *out, bool skipna) {
35
36
bool seen_na = false ;
36
37
std::stringstream ss{};
37
38
38
- for (int64_t i = 0 ; i < array_view->length ; i++) {
39
- const bool isna = ArrowArrayViewIsNull (array_view, i);
40
- if (!skipna && (seen_na || isna)) {
41
- seen_na = true ;
42
- ArrowArrayAppendNull (out, 1 );
43
- } else {
44
- if (!isna) {
45
- const auto std_sv = ArrowArrayViewGetStringUnsafe (array_view, i);
46
- ss << std::string_view{std_sv.data ,
47
- static_cast <size_t >(std_sv.size_bytes )};
39
+ int i = 0 ;
40
+ nanoarrow::ViewArrayStream array_stream_view (array_stream);
41
+ for (const auto &array : array_stream_view) {
42
+ for (const auto &sv : nanoarrow::ViewArrayAsBytes<OffsetSize>(&array)) {
43
+ ++i;
44
+ if (i == 50 ) {
45
+ }
46
+ if ((!sv || seen_na) && !skipna) {
47
+ seen_na = true ;
48
+ ArrowArrayAppendNull (out, 1 );
49
+ } else {
50
+ if (sv) {
51
+ ss << std::string_view{(*sv).data ,
52
+ static_cast <size_t >((*sv).size_bytes )};
53
+ }
54
+ const auto str = ss.str ();
55
+ const ArrowStringView asv{str.c_str (),
56
+ static_cast <int64_t >(str.size ())};
57
+ NANOARROW_THROW_NOT_OK (ArrowArrayAppendString (out, asv));
48
58
}
49
- const auto str = ss.str ();
50
- const ArrowStringView asv{str.c_str (), static_cast <int64_t >(str.size ())};
51
- NANOARROW_THROW_NOT_OK (ArrowArrayAppendString (out, asv));
52
59
}
53
60
}
54
61
}
55
62
63
+ // TODO: doesn't seem like all compilers in CI support this?
56
64
// template <typename T>
57
65
// concept MinOrMaxOp =
58
66
// std::same_as<T, std::less<>> || std::same_as<T, std::greater<>>;
59
67
60
- template <auto Op>
68
+ template <size_t OffsetSize, auto Op>
61
69
// requires MinOrMaxOp<decltype(Op)>
62
- static auto CumMinOrMax (const struct ArrowArrayView *array_view ,
70
+ static auto CumMinOrMax (struct ArrowArrayStream *array_stream ,
63
71
struct ArrowArray *out, bool skipna) {
64
72
bool seen_na = false ;
65
73
std::optional<std::string> current_str{};
66
74
67
- for (int64_t i = 0 ; i < array_view->length ; i++) {
68
- const bool isna = ArrowArrayViewIsNull (array_view, i);
69
- if (!skipna && (seen_na || isna)) {
70
- seen_na = true ;
71
- ArrowArrayAppendNull (out, 1 );
72
- } else {
73
- if (!isna || current_str) {
74
- if (!isna) {
75
- const auto asv = ArrowArrayViewGetStringUnsafe (array_view, i);
76
- const nb::str pyval{asv.data , static_cast <size_t >(asv.size_bytes )};
77
-
78
- if (current_str) {
79
- const nb::str pycurrent{current_str->data (), current_str->size ()};
80
- if (Op (pyval, pycurrent)) {
81
- current_str =
82
- std::string{asv.data , static_cast <size_t >(asv.size_bytes )};
75
+ nanoarrow::ViewArrayStream array_stream_view (array_stream);
76
+ for (const auto &array : array_stream_view) {
77
+ for (const auto &sv : nanoarrow::ViewArrayAsBytes<OffsetSize>(&array)) {
78
+ if ((!sv || seen_na) && !skipna) {
79
+ seen_na = true ;
80
+ ArrowArrayAppendNull (out, 1 );
81
+ } else {
82
+ if (sv || current_str) {
83
+ if (sv) {
84
+ const nb::str pyval{(*sv).data ,
85
+ static_cast <size_t >((*sv).size_bytes )};
86
+ if (current_str) {
87
+ const nb::str pycurrent{current_str->data (), current_str->size ()};
88
+ if (Op (pyval, pycurrent)) {
89
+ current_str = std::string{
90
+ (*sv).data , static_cast <size_t >((*sv).size_bytes )};
91
+ }
92
+ } else {
93
+ current_str = std::string{(*sv).data ,
94
+ static_cast <size_t >((*sv).size_bytes )};
83
95
}
84
- } else {
85
- current_str =
86
- std::string{asv.data , static_cast <size_t >(asv.size_bytes )};
87
96
}
88
- }
89
97
90
- struct ArrowStringView out_sv{
91
- current_str->data (), static_cast<int64_t>(current_str->size ())};
92
- NANOARROW_THROW_NOT_OK (ArrowArrayAppendString(out, out_sv));
93
- } else {
94
- ArrowArrayAppendEmpty (out, 1 );
98
+ struct ArrowStringView out_sv{
99
+ current_str->data (), static_cast<int64_t>(current_str->size ())};
100
+ NANOARROW_THROW_NOT_OK (ArrowArrayAppendString(out, out_sv));
101
+ } else {
102
+ ArrowArrayAppendEmpty (out, 1 );
103
+ }
95
104
}
96
105
}
97
106
}
@@ -131,7 +140,6 @@ class ArrowStringAccumulation {
131
140
switch (schema_view.type ) {
132
141
case NANOARROW_TYPE_STRING:
133
142
case NANOARROW_TYPE_LARGE_STRING:
134
- case NANOARROW_TYPE_STRING_VIEW:
135
143
break ;
136
144
default :
137
145
const auto error_message =
@@ -159,30 +167,29 @@ class ArrowStringAccumulation {
159
167
160
168
NANOARROW_THROW_NOT_OK (ArrowArrayStartAppending (uarray_out.get ()));
161
169
162
- nanoarrow::UniqueArray chunk{};
163
- int errcode{};
164
-
165
- while ((errcode = ArrowArrayStreamGetNext (stream_.get (), chunk.get (),
166
- nullptr ) == 0 ) &&
167
- chunk->release != nullptr ) {
168
- struct ArrowArrayView array_view{};
169
- NANOARROW_THROW_NOT_OK (
170
- ArrowArrayViewInitFromSchema (&array_view, schema_.get (), nullptr ));
171
-
172
- NANOARROW_THROW_NOT_OK (
173
- ArrowArrayViewSetArray (&array_view, chunk.get (), nullptr ));
174
-
175
- if (accumulation_ == " cumsum" ) {
176
- CumSum (&array_view, uarray_out.get (), skipna_);
177
- } else if (accumulation_ == " cummin" ) {
178
- CumMinOrMax<std::less{}>(&array_view, uarray_out.get (), skipna_);
179
- } else if (accumulation_ == " cummax" ) {
180
- CumMinOrMax<std::greater{}>(&array_view, uarray_out.get (), skipna_);
170
+ if (accumulation_ == " cumsum" ) {
171
+ if (schema_view.type == NANOARROW_TYPE_STRING) {
172
+ CumSum<32 >(stream_.get (), uarray_out.get (), skipna_);
181
173
} else {
182
- throw std::runtime_error ( " Unexpected branch " );
174
+ CumSum< 64 >(stream_. get (), uarray_out. get (), skipna_ );
183
175
}
184
176
185
- chunk.reset ();
177
+ } else if (accumulation_ == " cummin" ) {
178
+ if (schema_view.type == NANOARROW_TYPE_STRING) {
179
+ CumMinOrMax<32 , std::less{}>(stream_.get (), uarray_out.get (), skipna_);
180
+ } else {
181
+ CumMinOrMax<64 , std::less{}>(stream_.get (), uarray_out.get (), skipna_);
182
+ }
183
+ } else if (accumulation_ == " cummax" ) {
184
+ if (schema_view.type == NANOARROW_TYPE_STRING) {
185
+ CumMinOrMax<32 , std::greater{}>(stream_.get (), uarray_out.get (),
186
+ skipna_);
187
+ } else {
188
+ CumMinOrMax<64 , std::greater{}>(stream_.get (), uarray_out.get (),
189
+ skipna_);
190
+ }
191
+ } else {
192
+ throw std::runtime_error (" Unexpected branch" );
186
193
}
187
194
188
195
NANOARROW_THROW_NOT_OK (
0 commit comments