Skip to content

Commit 906b7a8

Browse files
committed
Resolve conflict in _parquet.pyx
2 parents ffc9335 + 56436e8 commit 906b7a8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1258
-926
lines changed

.github/workflows/verify_rc.yml

+182
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
name: Verify RC
19+
20+
on:
21+
push:
22+
tags:
23+
- "*-rc*"
24+
25+
permissions:
26+
contents: read
27+
28+
env:
29+
TEST_DEFAULT: "0"
30+
VERBOSE: "1"
31+
32+
jobs:
33+
apt:
34+
name: APT
35+
runs-on: ${{ matrix.runs-on }}
36+
timeout-minutes: 30
37+
strategy:
38+
fail-fast: false
39+
matrix:
40+
runs-on:
41+
- ubuntu-latest
42+
- ubuntu-24.04-arm
43+
env:
44+
TEST_APT: "1"
45+
steps:
46+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
47+
- name: Run
48+
run: |
49+
package_id=${GITHUB_REF_NAME%-rc*}
50+
version=${package_id#apache-arrow-}
51+
rc=${GITHUB_REF_NAME#*-rc}
52+
dev/release/verify-release-candidate.sh ${version} ${rc}
53+
54+
binary:
55+
name: Binary
56+
runs-on: ubuntu-latest
57+
timeout-minutes: 30
58+
env:
59+
TEST_BINARY: "1"
60+
steps:
61+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
62+
- name: Run
63+
run: |
64+
package_id=${GITHUB_REF_NAME%-rc*}
65+
version=${package_id#apache-arrow-}
66+
rc=${GITHUB_REF_NAME#*-rc}
67+
dev/release/verify-release-candidate.sh ${version} ${rc}
68+
69+
wheels-linux:
70+
name: Wheels Linux
71+
runs-on: ubuntu-latest
72+
timeout-minutes: 30
73+
strategy:
74+
fail-fast: false
75+
matrix:
76+
distro:
77+
- almalinux-8
78+
- conda
79+
- ubuntu-22.04
80+
- ubuntu-24.04
81+
env:
82+
TEST_WHEELS: "1"
83+
steps:
84+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
85+
- uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
86+
with:
87+
python-version: 3
88+
- name: Setup Archery
89+
run: python -m pip install -e dev/archery[docker]
90+
- name: Prepare
91+
run: |
92+
package_id=${GITHUB_REF_NAME%-rc*}
93+
echo "VERSION=${package_id#apache-arrow-}" >> ${GITHUB_ENV}
94+
echo "RC=${GITHUB_REF_NAME#*-rc}" >> ${GITHUB_ENV}
95+
distro=${{ matrix.distro }}
96+
if [ "${distro}" = "conda" ]; then
97+
echo "SERVICE=${distro}-verify-rc" >> ${GITHUB_ENV}
98+
else
99+
os=${distro%-*}
100+
version=${distro#*-}
101+
echo "SERVICE=${os}-verify-rc" >> ${GITHUB_ENV}
102+
echo "$(echo ${os} | tr a-z A-Z)=${version}" >> ${GITHUB_ENV}
103+
fi
104+
- name: Run
105+
run: |
106+
archery docker run \
107+
-e TEST_DEFAULT="${TEST_DEFAULT}" \
108+
-e TEST_WHEELS="${TEST_WHEELS}" \
109+
-e VERBOSE="${VERBOSE}" \
110+
-e VERIFY_RC="${RC}" \
111+
-e VERIFY_VERSION="${VERSION}" \
112+
${SERVICE}
113+
114+
wheels-macos:
115+
name: Wheels macOS
116+
runs-on: ${{ matrix.runs-on }}
117+
timeout-minutes: 30
118+
strategy:
119+
fail-fast: false
120+
matrix:
121+
runs-on:
122+
- macos-13
123+
- macos-14
124+
env:
125+
TEST_WHEELS: "1"
126+
steps:
127+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
128+
- name: Run
129+
run: |
130+
package_id=${GITHUB_REF_NAME%-rc*}
131+
version=${package_id#apache-arrow-}
132+
rc=${GITHUB_REF_NAME#*-rc}
133+
dev/release/verify-release-candidate.sh ${version} ${rc}
134+
135+
wheels-windows:
136+
name: Wheels Windows
137+
runs-on: windows-latest
138+
timeout-minutes: 45
139+
env:
140+
PYARROW_TEST_GDB: "OFF"
141+
TEST_WHEELS: "1"
142+
steps:
143+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
144+
- name: Prepare
145+
shell: bash
146+
run: |
147+
package_id=${GITHUB_REF_NAME%-rc*}
148+
echo "VERSION=${package_id#apache-arrow-}" >> ${GITHUB_ENV}
149+
echo "RC=${GITHUB_REF_NAME#*-rc}" >> ${GITHUB_ENV}
150+
- uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
151+
- name: Install System Dependencies
152+
run: |
153+
choco install --no-progress --yes boost-msvc-14.1
154+
choco install --no-progress --yes wget
155+
- name: Download Timezone Database
156+
shell: bash
157+
run: ci/scripts/download_tz_database.sh
158+
- name: Run verification
159+
shell: cmd
160+
run: |
161+
dev/release/verify-release-candidate-wheels.bat %VERSION% %RC%
162+
163+
yum:
164+
name: Yum
165+
runs-on: ${{ matrix.runs-on }}
166+
timeout-minutes: 30
167+
strategy:
168+
fail-fast: false
169+
matrix:
170+
runs-on:
171+
- ubuntu-latest
172+
- ubuntu-24.04-arm
173+
env:
174+
TEST_YUM: "1"
175+
steps:
176+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
177+
- name: Run
178+
run: |
179+
package_id=${GITHUB_REF_NAME%-rc*}
180+
version=${package_id#apache-arrow-}
181+
rc=${GITHUB_REF_NAME#*-rc}
182+
dev/release/verify-release-candidate.sh ${version} ${rc}

c_glib/arrow-glib/basic-array.cpp

+76
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ G_BEGIN_DECLS
132132
* format data, you need to use #GArrowBinaryViewArrayBuilder to create
133133
* a new array.
134134
*
135+
* #GArrayStringViewArray is a class for variable-size string view array.
136+
* It can store zero or more string view data. If you don't have Arrow
137+
* format data, you need to use #GArrowStringViewArrayBuilder to create
138+
* a new array.
139+
*
135140
* #GArrowFixedSizeBinaryArray is a class for fixed size binary array.
136141
* It can store zero or more fixed size binary data. If you don't have
137142
* Arrow format data, you need to use
@@ -2602,6 +2607,77 @@ garrow_binary_view_array_get_value(GArrowBinaryViewArray *array, gint64 i)
26022607
return g_bytes_new_static(view.data(), view.length());
26032608
}
26042609

2610+
G_DEFINE_TYPE(GArrowStringViewArray,
2611+
garrow_string_view_array,
2612+
GARROW_TYPE_BINARY_VIEW_ARRAY)
2613+
static void
2614+
garrow_string_view_array_init(GArrowStringViewArray *object)
2615+
{
2616+
}
2617+
2618+
static void
2619+
garrow_string_view_array_class_init(GArrowStringViewArrayClass *klass)
2620+
{
2621+
}
2622+
2623+
/**
2624+
* garrow_string_view_array_new:
2625+
* @length: The number of elements.
2626+
* @views: The view buffer.
2627+
* @data_buffers: (element-type GArrowBuffer): The data buffers.
2628+
* @null_bitmap: (nullable): The bitmap that shows null elements. The
2629+
* N-th element is null when the N-th bit is 0, not null otherwise.
2630+
* If the array has no null elements, the bitmap must be %NULL and
2631+
* @n_nulls is 0.
2632+
* @n_nulls: The number of null elements. If -1 is specified, the
2633+
* number of nulls are computed from @null_bitmap.
2634+
* @offset: The position of the first element.
2635+
*
2636+
* Returns: A newly created #GArrowStringViewArray.
2637+
*
2638+
* Since: 20.0.0
2639+
*/
2640+
GArrowStringViewArray *
2641+
garrow_string_view_array_new(gint64 length,
2642+
GArrowBuffer *views,
2643+
GList *data_buffers,
2644+
GArrowBuffer *null_bitmap,
2645+
gint64 n_nulls,
2646+
gint64 offset)
2647+
{
2648+
std::vector<std::shared_ptr<arrow::Buffer>> arrow_data_buffers;
2649+
for (GList *node = data_buffers; node; node = g_list_next(node)) {
2650+
arrow_data_buffers.push_back(garrow_buffer_get_raw(GARROW_BUFFER(node->data)));
2651+
}
2652+
auto arrow_string_view_array =
2653+
std::make_shared<arrow::StringViewArray>(arrow::utf8_view(),
2654+
length,
2655+
garrow_buffer_get_raw(views),
2656+
std::move(arrow_data_buffers),
2657+
garrow_buffer_get_raw(null_bitmap),
2658+
n_nulls,
2659+
offset);
2660+
return GARROW_STRING_VIEW_ARRAY(g_object_new(GARROW_TYPE_STRING_VIEW_ARRAY,
2661+
"array",
2662+
&arrow_string_view_array,
2663+
nullptr));
2664+
}
2665+
2666+
/**
2667+
* garrow_string_view_array_get_value:
2668+
* @array: A #GArrowStringViewArray.
2669+
* @i: The index of the target value.
2670+
*
2671+
* Returns: (transfer full): The @i-th value.
2672+
*/
2673+
GBytes *
2674+
garrow_string_view_array_get_value(GArrowStringViewArray *array, gint64 i)
2675+
{
2676+
auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
2677+
auto view = static_cast<arrow::StringViewArray *>(arrow_array.get())->GetView(i);
2678+
return g_bytes_new_static(view.data(), view.length());
2679+
}
2680+
26052681
G_DEFINE_TYPE(GArrowDate32Array, garrow_date32_array, GARROW_TYPE_NUMERIC_ARRAY)
26062682

26072683
static void

c_glib/arrow-glib/basic-array.h

+25
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,31 @@ GARROW_AVAILABLE_IN_20_0
624624
GBytes *
625625
garrow_binary_view_array_get_value(GArrowBinaryViewArray *array, gint64 i);
626626

627+
#define GARROW_TYPE_STRING_VIEW_ARRAY (garrow_string_view_array_get_type())
628+
GARROW_AVAILABLE_IN_20_0
629+
G_DECLARE_DERIVABLE_TYPE(GArrowStringViewArray,
630+
garrow_string_view_array,
631+
GARROW,
632+
STRING_VIEW_ARRAY,
633+
GArrowBinaryViewArray)
634+
struct _GArrowStringViewArrayClass
635+
{
636+
GArrowBinaryViewArrayClass parent_class;
637+
};
638+
639+
GARROW_AVAILABLE_IN_20_0
640+
GArrowStringViewArray *
641+
garrow_string_view_array_new(gint64 length,
642+
GArrowBuffer *views,
643+
GList *data_buffers,
644+
GArrowBuffer *null_bitmap,
645+
gint64 n_nulls,
646+
gint64 offset);
647+
648+
GARROW_AVAILABLE_IN_20_0
649+
GBytes *
650+
garrow_string_view_array_get_value(GArrowStringViewArray *array, gint64 i);
651+
627652
#define GARROW_TYPE_DATE32_ARRAY (garrow_date32_array_get_type())
628653
GARROW_AVAILABLE_IN_ALL
629654
G_DECLARE_DERIVABLE_TYPE(

c_glib/test/test-string-view-array.rb

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
class TestStringViewArray < Test::Unit::TestCase
19+
def test_new
20+
short_string_data = "test"
21+
short_view_buffer_space = 12
22+
short_view_buffer = [short_string_data.size].pack("l")
23+
short_view_buffer += short_string_data.ljust(short_view_buffer_space, "\x00")
24+
25+
arrow_view_buffer = Arrow::Buffer.new(short_view_buffer)
26+
arrow_data_buffer = Arrow::Buffer.new(short_string_data)
27+
bitmap = Arrow::Buffer.new([0b1].pack("C*"))
28+
29+
string_view_array = Arrow::StringViewArray.new(1,
30+
arrow_view_buffer,
31+
[arrow_data_buffer],
32+
bitmap,
33+
0,
34+
0)
35+
assert do
36+
string_view_array.validate_full
37+
end
38+
assert_equal(short_string_data, string_view_array.get_value(0).to_s)
39+
end
40+
end

cpp/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}")
160160
set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support")
161161

162162
set(ARROW_LLVM_VERSIONS
163+
"20.1"
163164
"19.1"
164165
"18.1"
165166
"17.0"

cpp/src/arrow/acero/plan_test.cc

+17
Original file line numberDiff line numberDiff line change
@@ -1266,6 +1266,23 @@ TEST(ExecPlanExecution, SourceFilterProjectGroupedSumFilter) {
12661266
}
12671267
}
12681268

1269+
TEST(ExecPlanExecution, ProjectNamesSizeMismatch) {
1270+
auto input = MakeGroupableBatches();
1271+
1272+
Declaration plan = Declaration::Sequence(
1273+
{{"source", SourceNodeOptions{input.schema, input.gen(true, /*slow=*/false)}},
1274+
{"project", ProjectNodeOptions{
1275+
/*expressions=*/{field_ref("str"),
1276+
call("multiply", {field_ref("i32"), literal(2)})},
1277+
/*names=*/{"a"}}}}); // expected 2 names but only 1 provided
1278+
1279+
EXPECT_RAISES_WITH_MESSAGE_THAT(
1280+
Invalid,
1281+
::testing::HasSubstr(
1282+
"Project node's size of names 1 doesn't match size of expressions 2"),
1283+
DeclarationToTable(std::move(plan)));
1284+
}
1285+
12691286
TEST(ExecPlanExecution, SourceFilterProjectGroupedSumOrderBy) {
12701287
for (bool parallel : {false, true}) {
12711288
SCOPED_TRACE(parallel ? "parallel/merged" : "serial");

cpp/src/arrow/acero/project_node.cc

+6-1
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,13 @@ class ProjectNode : public MapNode {
5959
for (size_t i = 0; i < exprs.size(); ++i) {
6060
names[i] = exprs[i].ToString();
6161
}
62+
} else {
63+
ARROW_RETURN_IF(
64+
names.size() != exprs.size(),
65+
Status::Invalid("Project node's size of names " + std::to_string(names.size()) +
66+
" doesn't match size of expressions " +
67+
std::to_string(exprs.size())));
6268
}
63-
6469
FieldVector fields(exprs.size());
6570
int i = 0;
6671
for (auto& expr : exprs) {

0 commit comments

Comments
 (0)