From 4313a1e74371db9a50b3c1e7ab631bf351757bc4 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 27 Jan 2025 18:30:27 -0800 Subject: [PATCH 1/9] Build Arrow from scratch for dev cookbooks --- .../deploy_development_cookbooks.yml | 2 + cpp/code/CMakeLists.txt | 37 +++++++++++++++---- cpp/dev.yml | 3 -- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/.github/workflows/deploy_development_cookbooks.yml b/.github/workflows/deploy_development_cookbooks.yml index da0ec930..aed48fb2 100644 --- a/.github/workflows/deploy_development_cookbooks.yml +++ b/.github/workflows/deploy_development_cookbooks.yml @@ -66,6 +66,8 @@ jobs: run: echo ${CONDA_PREFIX} - name: Build cookbook + env: + - ARROW_NIGHTLY: 1 run: make cpp - name: Upload cpp book diff --git a/cpp/code/CMakeLists.txt b/cpp/code/CMakeLists.txt index 46a15e97..23a0619c 100644 --- a/cpp/code/CMakeLists.txt +++ b/cpp/code/CMakeLists.txt @@ -24,14 +24,37 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") endif() # Add Arrow and other required packages -find_package(Arrow REQUIRED) -if(NOT ${ARROW_VERSION} VERSION_GREATER "9.0.0") - get_filename_component(ARROW_CMAKE_BASE_DIR ${Arrow_CONFIG} DIRECTORY) - list(INSERT CMAKE_MODULE_PATH 0 ${ARROW_CMAKE_BASE_DIR}) +if(DEFINED ENV{ARROW_NIGHTLY}) + set(CMAKE_BUILD_TYPE Debug) + set(ARROW_BUILD_SHARED True) + set(ARROW_DEPENDENCY_SOURCE "AUTO") + set(ARROW_SIMD_LEVEL NONE) # macOS-specific workaround + set(ARROW_ENABLE_THREADING ON) + + set(ARROW_ACERO ON) + set(ARROW_COMPUTE ON) + set(ARROW_DATASET ON) + set(ARROW_FILESYSTEM ON) + set(ARROW_IPC ON) + set(ARROW_FLIGHT ON) + set(ARROW_PARQUET ON) + + include(FetchContent) + + FetchContent_Declare(Arrow + GIT_REPOSITORY https://github.com/apache/arrow.git + GIT_TAG main + GIT_SHALLOW TRUE SOURCE_SUBDIR cpp + OVERRIDE_FIND_PACKAGE + ) + + FetchContent_MakeAvailable(Arrow) +else() + find_package(Arrow REQUIRED) + find_package(ArrowDataset REQUIRED) + find_package(ArrowFlight REQUIRED) + find_package(Parquet REQUIRED) endif() -find_package(ArrowDataset REQUIRED) -find_package(ArrowFlight REQUIRED) -find_package(Parquet REQUIRED) if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set(CMAKE_CXX_CLANG_TIDY "clang-tidy") diff --git a/cpp/dev.yml b/cpp/dev.yml index d0ddd068..384d87d0 100644 --- a/cpp/dev.yml +++ b/cpp/dev.yml @@ -16,15 +16,12 @@ name: cookbook-cpp-dev channels: - - arrow-nightlies - conda-forge dependencies: - python=3.9 - compilers - - arrow-nightlies::libarrow - sphinx - gtest - gmock - - arrow-nightlies::pyarrow - clang-tools - zlib From 091f450c78e25a066cc1be953402e12c2de0edf5 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 27 Jan 2025 18:49:55 -0800 Subject: [PATCH 2/9] Update contributing doc --- cpp/CONTRIBUTING.md | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CONTRIBUTING.md b/cpp/CONTRIBUTING.md index a82b37c8..f84a66dd 100644 --- a/cpp/CONTRIBUTING.md +++ b/cpp/CONTRIBUTING.md @@ -121,6 +121,7 @@ cmake build. For example: ``` mkdir cpp/code/build cd cpp/code/build +# Optional: Run `export ARROW_NIGHTLY=1` to build Arrow from git. cmake ../code -DCMAKE_BUILD_TYPE=Debug cmake --build . ctest From ad1ee61d42bf78a43ecd6bd8eb74dd60dfe70de4 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 27 Jan 2025 19:58:13 -0800 Subject: [PATCH 3/9] Reorder statements --- cpp/CONTRIBUTING.md | 2 +- cpp/code/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/CONTRIBUTING.md b/cpp/CONTRIBUTING.md index f84a66dd..2b9d8493 100644 --- a/cpp/CONTRIBUTING.md +++ b/cpp/CONTRIBUTING.md @@ -95,7 +95,7 @@ output block when the recipe is rendered into the cookbook. ## Referencing Arrow C++ Documentation The Arrow project has its own documentation for the C++ implementation that -is hosted at https://arrow.apache.org/docs/cpp/index.html. Fortunately, +is hosted at . Fortunately, this documentation is also built with Sphinx and so we can use the extension `intersphinx` to reference sections of this documentation. To do so simply write a standard Sphinx reference like so: diff --git a/cpp/code/CMakeLists.txt b/cpp/code/CMakeLists.txt index 23a0619c..0353d420 100644 --- a/cpp/code/CMakeLists.txt +++ b/cpp/code/CMakeLists.txt @@ -28,15 +28,15 @@ if(DEFINED ENV{ARROW_NIGHTLY}) set(CMAKE_BUILD_TYPE Debug) set(ARROW_BUILD_SHARED True) set(ARROW_DEPENDENCY_SOURCE "AUTO") - set(ARROW_SIMD_LEVEL NONE) # macOS-specific workaround set(ARROW_ENABLE_THREADING ON) + set(ARROW_SIMD_LEVEL NONE) # macOS-specific workaround set(ARROW_ACERO ON) set(ARROW_COMPUTE ON) set(ARROW_DATASET ON) set(ARROW_FILESYSTEM ON) - set(ARROW_IPC ON) set(ARROW_FLIGHT ON) + set(ARROW_IPC ON) set(ARROW_PARQUET ON) include(FetchContent) From 9ad28cb810064fd835a45683f349dee6db0dbf51 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 27 Jan 2025 19:58:45 -0800 Subject: [PATCH 4/9] Move find_package calls --- cpp/code/CMakeLists.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/code/CMakeLists.txt b/cpp/code/CMakeLists.txt index 0353d420..4979e84b 100644 --- a/cpp/code/CMakeLists.txt +++ b/cpp/code/CMakeLists.txt @@ -54,6 +54,10 @@ else() find_package(ArrowDataset REQUIRED) find_package(ArrowFlight REQUIRED) find_package(Parquet REQUIRED) + + # Add protobuf to flight + find_package(Threads) + find_package(gRPC CONFIG REQUIRED) endif() if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") @@ -99,11 +103,6 @@ recipe(creating_arrow_objects) recipe(datasets) recipe(flight) - -# Add protobuf to flight -find_package(gRPC CONFIG REQUIRED) -find_package(Threads) - set(PROTO_FILES protos/helloworld.proto ) From 693e30793090df8d7e0d578bd9c7c0d1291d7cb6 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 27 Jan 2025 19:58:59 -0800 Subject: [PATCH 5/9] Add Linux workarounds --- cpp/code/CMakeLists.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cpp/code/CMakeLists.txt b/cpp/code/CMakeLists.txt index 4979e84b..b045da77 100644 --- a/cpp/code/CMakeLists.txt +++ b/cpp/code/CMakeLists.txt @@ -49,6 +49,19 @@ if(DEFINED ENV{ARROW_NIGHTLY}) ) FetchContent_MakeAvailable(Arrow) + + # These are some Linux-only things the FetchContent build needs in order + # to compile + file(INSTALL "${arrow_BINARY_DIR}/src/arrow/util/config.h" + DESTINATION "${arrow_SOURCE_DIR}/cpp/src/arrow/util") + file(INSTALL "${arrow_BINARY_DIR}/src/parquet/parquet_version.h" + DESTINATION "${arrow_SOURCE_DIR}/cpp/src/parquet") + + # NOTE: I had to add this to make headers available which I find weird + target_include_directories( + arrow_shared + INTERFACE "$" + ) else() find_package(Arrow REQUIRED) find_package(ArrowDataset REQUIRED) From f910f8e967141b996df1f3f20b036809224ea32b Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 27 Jan 2025 20:39:58 -0800 Subject: [PATCH 6/9] Add -Wno-unused-parameter I was surprised to need this so it's possible something else is wrong here. --- cpp/code/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/code/CMakeLists.txt b/cpp/code/CMakeLists.txt index b045da77..a0c4858a 100644 --- a/cpp/code/CMakeLists.txt +++ b/cpp/code/CMakeLists.txt @@ -62,6 +62,8 @@ if(DEFINED ENV{ARROW_NIGHTLY}) arrow_shared INTERFACE "$" ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter") + else() find_package(Arrow REQUIRED) find_package(ArrowDataset REQUIRED) From 216d5190d0e02f9d24b919e2417f093f3c27d7be Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 27 Jan 2025 20:40:35 -0800 Subject: [PATCH 7/9] Move find_package calls back --- cpp/code/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/code/CMakeLists.txt b/cpp/code/CMakeLists.txt index a0c4858a..cb833572 100644 --- a/cpp/code/CMakeLists.txt +++ b/cpp/code/CMakeLists.txt @@ -69,10 +69,6 @@ else() find_package(ArrowDataset REQUIRED) find_package(ArrowFlight REQUIRED) find_package(Parquet REQUIRED) - - # Add protobuf to flight - find_package(Threads) - find_package(gRPC CONFIG REQUIRED) endif() if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") @@ -118,6 +114,10 @@ recipe(creating_arrow_objects) recipe(datasets) recipe(flight) +# Add protobuf to flight +find_package(Threads) +find_package(gRPC CONFIG REQUIRED) + set(PROTO_FILES protos/helloworld.proto ) From 88ad05a6bb87a79812b596cb5b42e41ecaf859a2 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 27 Jan 2025 20:40:47 -0800 Subject: [PATCH 8/9] Remove comment --- cpp/code/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/code/CMakeLists.txt b/cpp/code/CMakeLists.txt index cb833572..7e41a23a 100644 --- a/cpp/code/CMakeLists.txt +++ b/cpp/code/CMakeLists.txt @@ -56,8 +56,6 @@ if(DEFINED ENV{ARROW_NIGHTLY}) DESTINATION "${arrow_SOURCE_DIR}/cpp/src/arrow/util") file(INSTALL "${arrow_BINARY_DIR}/src/parquet/parquet_version.h" DESTINATION "${arrow_SOURCE_DIR}/cpp/src/parquet") - - # NOTE: I had to add this to make headers available which I find weird target_include_directories( arrow_shared INTERFACE "$" From 9cc87cbc02d80f49f9a20e54b94afbdc65b4f11c Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Mon, 27 Jan 2025 21:16:48 -0800 Subject: [PATCH 9/9] Add cmake to dev env file --- cpp/dev.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/dev.yml b/cpp/dev.yml index 384d87d0..91702f5f 100644 --- a/cpp/dev.yml +++ b/cpp/dev.yml @@ -20,6 +20,7 @@ channels: dependencies: - python=3.9 - compilers + - cmake - sphinx - gtest - gmock