Skip to content

Commit d13af71

Browse files
authored
Merge branch 'apache:main' into fix-dict-schema-inconsistency
2 parents 94255d5 + 42b215b commit d13af71

File tree

90 files changed

+4529
-3578
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+4529
-3578
lines changed

.github/workflows/csharp.yml

-8
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,6 @@ jobs:
6565
uses: actions/checkout@v4
6666
with:
6767
fetch-depth: 0
68-
- name: Install Source Link
69-
shell: bash
70-
run: dotnet tool install --global sourcelink
7168
- name: Build
7269
shell: bash
7370
run: ci/scripts/csharp_build.sh $(pwd)
@@ -93,8 +90,6 @@ jobs:
9390
uses: actions/checkout@v4
9491
with:
9592
fetch-depth: 0
96-
- name: Install Source Link
97-
run: dotnet tool install --global sourcelink
9893
- name: Build
9994
shell: bash
10095
run: ci/scripts/csharp_build.sh $(pwd)
@@ -124,9 +119,6 @@ jobs:
124119
uses: actions/checkout@v4
125120
with:
126121
fetch-depth: 0
127-
- name: Install Source Link
128-
shell: bash
129-
run: dotnet tool install --global sourcelink
130122
- name: Build
131123
shell: bash
132124
run: ci/scripts/csharp_build.sh $(pwd)

.github/workflows/verify_rc.yml

+182
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
name: Verify RC
19+
20+
on:
21+
push:
22+
tags:
23+
- "*-rc*"
24+
25+
permissions:
26+
contents: read
27+
28+
env:
29+
TEST_DEFAULT: "0"
30+
VERBOSE: "1"
31+
32+
jobs:
33+
apt:
34+
name: APT
35+
runs-on: ${{ matrix.runs-on }}
36+
timeout-minutes: 30
37+
strategy:
38+
fail-fast: false
39+
matrix:
40+
runs-on:
41+
- ubuntu-latest
42+
- ubuntu-24.04-arm
43+
env:
44+
TEST_APT: "1"
45+
steps:
46+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
47+
- name: Run
48+
run: |
49+
package_id=${GITHUB_REF_NAME%-rc*}
50+
version=${package_id#apache-arrow-}
51+
rc=${GITHUB_REF_NAME#*-rc}
52+
dev/release/verify-release-candidate.sh ${version} ${rc}
53+
54+
binary:
55+
name: Binary
56+
runs-on: ubuntu-latest
57+
timeout-minutes: 30
58+
env:
59+
TEST_BINARY: "1"
60+
steps:
61+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
62+
- name: Run
63+
run: |
64+
package_id=${GITHUB_REF_NAME%-rc*}
65+
version=${package_id#apache-arrow-}
66+
rc=${GITHUB_REF_NAME#*-rc}
67+
dev/release/verify-release-candidate.sh ${version} ${rc}
68+
69+
wheels-linux:
70+
name: Wheels Linux
71+
runs-on: ubuntu-latest
72+
timeout-minutes: 30
73+
strategy:
74+
fail-fast: false
75+
matrix:
76+
distro:
77+
- almalinux-8
78+
- conda
79+
- ubuntu-22.04
80+
- ubuntu-24.04
81+
env:
82+
TEST_WHEELS: "1"
83+
steps:
84+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
85+
- uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
86+
with:
87+
python-version: 3
88+
- name: Setup Archery
89+
run: python -m pip install -e dev/archery[docker]
90+
- name: Prepare
91+
run: |
92+
package_id=${GITHUB_REF_NAME%-rc*}
93+
echo "VERSION=${package_id#apache-arrow-}" >> ${GITHUB_ENV}
94+
echo "RC=${GITHUB_REF_NAME#*-rc}" >> ${GITHUB_ENV}
95+
distro=${{ matrix.distro }}
96+
if [ "${distro}" = "conda" ]; then
97+
echo "SERVICE=${distro}-verify-rc" >> ${GITHUB_ENV}
98+
else
99+
os=${distro%-*}
100+
version=${distro#*-}
101+
echo "SERVICE=${os}-verify-rc" >> ${GITHUB_ENV}
102+
echo "$(echo ${os} | tr a-z A-Z)=${version}" >> ${GITHUB_ENV}
103+
fi
104+
- name: Run
105+
run: |
106+
archery docker run \
107+
-e TEST_DEFAULT="${TEST_DEFAULT}" \
108+
-e TEST_WHEELS="${TEST_WHEELS}" \
109+
-e VERBOSE="${VERBOSE}" \
110+
-e VERIFY_RC="${RC}" \
111+
-e VERIFY_VERSION="${VERSION}" \
112+
${SERVICE}
113+
114+
wheels-macos:
115+
name: Wheels macOS
116+
runs-on: ${{ matrix.runs-on }}
117+
timeout-minutes: 30
118+
strategy:
119+
fail-fast: false
120+
matrix:
121+
runs-on:
122+
- macos-13
123+
- macos-14
124+
env:
125+
TEST_WHEELS: "1"
126+
steps:
127+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
128+
- name: Run
129+
run: |
130+
package_id=${GITHUB_REF_NAME%-rc*}
131+
version=${package_id#apache-arrow-}
132+
rc=${GITHUB_REF_NAME#*-rc}
133+
dev/release/verify-release-candidate.sh ${version} ${rc}
134+
135+
wheels-windows:
136+
name: Wheels Windows
137+
runs-on: windows-latest
138+
timeout-minutes: 45
139+
env:
140+
PYARROW_TEST_GDB: "OFF"
141+
TEST_WHEELS: "1"
142+
steps:
143+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
144+
- name: Prepare
145+
shell: bash
146+
run: |
147+
package_id=${GITHUB_REF_NAME%-rc*}
148+
echo "VERSION=${package_id#apache-arrow-}" >> ${GITHUB_ENV}
149+
echo "RC=${GITHUB_REF_NAME#*-rc}" >> ${GITHUB_ENV}
150+
- uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3.1.1
151+
- name: Install System Dependencies
152+
run: |
153+
choco install --no-progress --yes boost-msvc-14.1
154+
choco install --no-progress --yes wget
155+
- name: Download Timezone Database
156+
shell: bash
157+
run: ci/scripts/download_tz_database.sh
158+
- name: Run verification
159+
shell: cmd
160+
run: |
161+
dev/release/verify-release-candidate-wheels.bat %VERSION% %RC%
162+
163+
yum:
164+
name: Yum
165+
runs-on: ${{ matrix.runs-on }}
166+
timeout-minutes: 30
167+
strategy:
168+
fail-fast: false
169+
matrix:
170+
runs-on:
171+
- ubuntu-latest
172+
- ubuntu-24.04-arm
173+
env:
174+
TEST_YUM: "1"
175+
steps:
176+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
177+
- name: Run
178+
run: |
179+
package_id=${GITHUB_REF_NAME%-rc*}
180+
version=${package_id#apache-arrow-}
181+
rc=${GITHUB_REF_NAME#*-rc}
182+
dev/release/verify-release-candidate.sh ${version} ${rc}

.pre-commit-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ repos:
195195
?^ci/scripts/c_glib_build\.sh$|
196196
?^ci/scripts/c_glib_test\.sh$|
197197
?^c_glib/test/run-test\.sh$|
198+
?^dev/release/07-binary-verify\.sh$|
198199
?^dev/release/utils-generate-checksum\.sh$|
199200
)
200201
- repo: https://github.com/trim21/pre-commit-mirror-meson

c_glib/arrow-glib/basic-array.cpp

+76
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ G_BEGIN_DECLS
132132
* format data, you need to use #GArrowBinaryViewArrayBuilder to create
133133
* a new array.
134134
*
135+
* #GArrayStringViewArray is a class for variable-size string view array.
136+
* It can store zero or more string view data. If you don't have Arrow
137+
* format data, you need to use #GArrowStringViewArrayBuilder to create
138+
* a new array.
139+
*
135140
* #GArrowFixedSizeBinaryArray is a class for fixed size binary array.
136141
* It can store zero or more fixed size binary data. If you don't have
137142
* Arrow format data, you need to use
@@ -2602,6 +2607,77 @@ garrow_binary_view_array_get_value(GArrowBinaryViewArray *array, gint64 i)
26022607
return g_bytes_new_static(view.data(), view.length());
26032608
}
26042609

2610+
G_DEFINE_TYPE(GArrowStringViewArray,
2611+
garrow_string_view_array,
2612+
GARROW_TYPE_BINARY_VIEW_ARRAY)
2613+
static void
2614+
garrow_string_view_array_init(GArrowStringViewArray *object)
2615+
{
2616+
}
2617+
2618+
static void
2619+
garrow_string_view_array_class_init(GArrowStringViewArrayClass *klass)
2620+
{
2621+
}
2622+
2623+
/**
2624+
* garrow_string_view_array_new:
2625+
* @length: The number of elements.
2626+
* @views: The view buffer.
2627+
* @data_buffers: (element-type GArrowBuffer): The data buffers.
2628+
* @null_bitmap: (nullable): The bitmap that shows null elements. The
2629+
* N-th element is null when the N-th bit is 0, not null otherwise.
2630+
* If the array has no null elements, the bitmap must be %NULL and
2631+
* @n_nulls is 0.
2632+
* @n_nulls: The number of null elements. If -1 is specified, the
2633+
* number of nulls are computed from @null_bitmap.
2634+
* @offset: The position of the first element.
2635+
*
2636+
* Returns: A newly created #GArrowStringViewArray.
2637+
*
2638+
* Since: 20.0.0
2639+
*/
2640+
GArrowStringViewArray *
2641+
garrow_string_view_array_new(gint64 length,
2642+
GArrowBuffer *views,
2643+
GList *data_buffers,
2644+
GArrowBuffer *null_bitmap,
2645+
gint64 n_nulls,
2646+
gint64 offset)
2647+
{
2648+
std::vector<std::shared_ptr<arrow::Buffer>> arrow_data_buffers;
2649+
for (GList *node = data_buffers; node; node = g_list_next(node)) {
2650+
arrow_data_buffers.push_back(garrow_buffer_get_raw(GARROW_BUFFER(node->data)));
2651+
}
2652+
auto arrow_string_view_array =
2653+
std::make_shared<arrow::StringViewArray>(arrow::utf8_view(),
2654+
length,
2655+
garrow_buffer_get_raw(views),
2656+
std::move(arrow_data_buffers),
2657+
garrow_buffer_get_raw(null_bitmap),
2658+
n_nulls,
2659+
offset);
2660+
return GARROW_STRING_VIEW_ARRAY(g_object_new(GARROW_TYPE_STRING_VIEW_ARRAY,
2661+
"array",
2662+
&arrow_string_view_array,
2663+
nullptr));
2664+
}
2665+
2666+
/**
2667+
* garrow_string_view_array_get_value:
2668+
* @array: A #GArrowStringViewArray.
2669+
* @i: The index of the target value.
2670+
*
2671+
* Returns: (transfer full): The @i-th value.
2672+
*/
2673+
GBytes *
2674+
garrow_string_view_array_get_value(GArrowStringViewArray *array, gint64 i)
2675+
{
2676+
auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
2677+
auto view = static_cast<arrow::StringViewArray *>(arrow_array.get())->GetView(i);
2678+
return g_bytes_new_static(view.data(), view.length());
2679+
}
2680+
26052681
G_DEFINE_TYPE(GArrowDate32Array, garrow_date32_array, GARROW_TYPE_NUMERIC_ARRAY)
26062682

26072683
static void

c_glib/arrow-glib/basic-array.h

+25
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,31 @@ GARROW_AVAILABLE_IN_20_0
624624
GBytes *
625625
garrow_binary_view_array_get_value(GArrowBinaryViewArray *array, gint64 i);
626626

627+
#define GARROW_TYPE_STRING_VIEW_ARRAY (garrow_string_view_array_get_type())
628+
GARROW_AVAILABLE_IN_20_0
629+
G_DECLARE_DERIVABLE_TYPE(GArrowStringViewArray,
630+
garrow_string_view_array,
631+
GARROW,
632+
STRING_VIEW_ARRAY,
633+
GArrowBinaryViewArray)
634+
struct _GArrowStringViewArrayClass
635+
{
636+
GArrowBinaryViewArrayClass parent_class;
637+
};
638+
639+
GARROW_AVAILABLE_IN_20_0
640+
GArrowStringViewArray *
641+
garrow_string_view_array_new(gint64 length,
642+
GArrowBuffer *views,
643+
GList *data_buffers,
644+
GArrowBuffer *null_bitmap,
645+
gint64 n_nulls,
646+
gint64 offset);
647+
648+
GARROW_AVAILABLE_IN_20_0
649+
GBytes *
650+
garrow_string_view_array_get_value(GArrowStringViewArray *array, gint64 i);
651+
627652
#define GARROW_TYPE_DATE32_ARRAY (garrow_date32_array_get_type())
628653
GARROW_AVAILABLE_IN_ALL
629654
G_DECLARE_DERIVABLE_TYPE(

c_glib/test/test-string-view-array.rb

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
class TestStringViewArray < Test::Unit::TestCase
19+
def test_new
20+
short_string_data = "test"
21+
short_view_buffer_space = 12
22+
short_view_buffer = [short_string_data.size].pack("l")
23+
short_view_buffer += short_string_data.ljust(short_view_buffer_space, "\x00")
24+
25+
arrow_view_buffer = Arrow::Buffer.new(short_view_buffer)
26+
arrow_data_buffer = Arrow::Buffer.new(short_string_data)
27+
bitmap = Arrow::Buffer.new([0b1].pack("C*"))
28+
29+
string_view_array = Arrow::StringViewArray.new(1,
30+
arrow_view_buffer,
31+
[arrow_data_buffer],
32+
bitmap,
33+
0,
34+
0)
35+
assert do
36+
string_view_array.validate_full
37+
end
38+
assert_equal(short_string_data, string_view_array.get_value(0).to_s)
39+
end
40+
end

ci/docker/conda.dockerfile

-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
2525
curl \
2626
gdb \
2727
libc6-dbg \
28-
patch \
2928
tzdata \
3029
wget && \
3130
apt-get clean && \

ci/docker/debian-12-cpp.dockerfile

-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,6 @@ RUN apt-get update -y -q && \
8585
ninja-build \
8686
nlohmann-json3-dev \
8787
npm \
88-
patch \
8988
pkg-config \
9089
protobuf-compiler-grpc \
9190
python3-dev \

0 commit comments

Comments
 (0)