Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NOTICE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ This product includes software from the mman-win32 project
* Copyright https://code.google.com/p/mman-win32/
* Licensed under the MIT License;

This product includes software from the Fast Static Symbol Table (FSST) project (MIT)
* Copyright (c) 2018-2020 CWI, TU Munich, FSU Jena
* https://github.com/cwida/fsst

This product includes software from the LevelDB project
* Copyright (c) 2011 The LevelDB Authors. All rights reserved.
* Use of this source code is governed by a BSD-style license that can be
Expand Down
30 changes: 30 additions & 0 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we bundle a dependency (copy to cpp/src/arrow/vendored/), we don't need to change this file.
See also:

set(ARROW_VENDORED_SRCS
vendored/base64.cpp
vendored/datetime.cpp
vendored/double-conversion/bignum-dtoa.cc
vendored/double-conversion/bignum.cc
vendored/double-conversion/cached-powers.cc
vendored/double-conversion/double-to-string.cc
vendored/double-conversion/fast-dtoa.cc
vendored/double-conversion/fixed-dtoa.cc
vendored/double-conversion/string-to-double.cc
vendored/double-conversion/strtod.cc
vendored/musl/strptime.c
vendored/uriparser/UriCommon.c
vendored/uriparser/UriCompare.c
vendored/uriparser/UriEscape.c
vendored/uriparser/UriFile.c
vendored/uriparser/UriIp4.c
vendored/uriparser/UriIp4Base.c
vendored/uriparser/UriMemory.c
vendored/uriparser/UriNormalize.c
vendored/uriparser/UriNormalizeBase.c
vendored/uriparser/UriParse.c
vendored/uriparser/UriParseBase.c
vendored/uriparser/UriQuery.c
vendored/uriparser/UriRecompose.c
vendored/uriparser/UriResolve.c
vendored/uriparser/UriShorten.c)
if(APPLE)
list(APPEND ARROW_VENDORED_SRCS vendored/datetime/ios.mm)
endif()
set_source_files_properties(vendored/datetime.cpp PROPERTIES SKIP_UNITY_BUILD_INCLUSION
ON)
arrow_add_object_library(ARROW_VENDORED ${ARROW_VENDORED_SRCS})
# Disable DLL exports in vendored uriparser library
foreach(ARROW_VENDORED_TARGET ${ARROW_VENDORED_TARGETS})
target_compile_definitions(${ARROW_VENDORED_TARGET} PRIVATE URI_STATIC_BUILD)
endforeach()

Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
Boost
Brotli
BZip2
fsst
c-ares
gflags
glog
Expand All @@ -72,6 +73,10 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
ZLIB
zstd)

set(fsst_SOURCE
"BUNDLED"
CACHE STRING "Source of fsst dependency")

# For backward compatibility. We use "BOOST_SOURCE" if "Boost_SOURCE"
# isn't specified and "BOOST_SOURCE" is specified.
# We renamed "BOOST" dependency name to "Boost" in 3.0.0 because
Expand Down Expand Up @@ -183,6 +188,8 @@ macro(build_dependency DEPENDENCY_NAME)
build_brotli()
elseif("${DEPENDENCY_NAME}" STREQUAL "BZip2")
build_bzip2()
elseif("${DEPENDENCY_NAME}" STREQUAL "fsst")
build_fsst()
elseif("${DEPENDENCY_NAME}" STREQUAL "c-ares")
build_cares()
elseif("${DEPENDENCY_NAME}" STREQUAL "gflags")
Expand Down Expand Up @@ -382,6 +389,7 @@ endif()
if(ARROW_PARQUET)
set(ARROW_WITH_RAPIDJSON ON)
set(ARROW_WITH_THRIFT ON)
set(ARROW_WITH_FSST ON)
endif()

if(ARROW_WITH_THRIFT)
Expand Down Expand Up @@ -2604,6 +2612,28 @@ if(ARROW_USE_XSIMD)
endif()
endif()

function(build_fsst)
message(STATUS "Configuring vendored FSST sources")

set(ARROW_FSST_INCLUDE_DIR
"${ARROW_SOURCE_DIR}/thirdparty/fsst"
PARENT_SCOPE)
set(ARROW_FSST_SOURCES
"${ARROW_SOURCE_DIR}/thirdparty/fsst/libfsst.cpp;${ARROW_SOURCE_DIR}/thirdparty/fsst/fsst_avx512.cpp"
PARENT_SCOPE)
set(FSST_VENDORED
TRUE
PARENT_SCOPE)
endfunction()

if(ARROW_WITH_FSST)
if(NOT fsst_SOURCE STREQUAL "BUNDLED")
message(FATAL_ERROR "FSST must currently be built from source. Set fsst_SOURCE=BUNDLED."
)
endif()
resolve_dependency(fsst IS_RUNTIME_DEPENDENCY FALSE)
endif()

macro(build_zlib)
message(STATUS "Building ZLIB from source")

Expand Down
42 changes: 40 additions & 2 deletions cpp/src/parquet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,25 @@ function(ADD_PARQUET_TEST REL_TEST_NAME)
${ARGN})

set(TEST_ARGUMENTS PREFIX "parquet" LABELS "parquet-tests")
set(_PARQUET_TEST_EXTRA_ARGS ${ARG_UNPARSED_ARGUMENTS})
if(PARQUET_TEST_EXTRA_INCLUDES)
list(APPEND _PARQUET_TEST_EXTRA_ARGS EXTRA_INCLUDES ${PARQUET_TEST_EXTRA_INCLUDES})
endif()

if(ARROW_TEST_LINKAGE STREQUAL "static")
add_test_case(${REL_TEST_NAME}
STATIC_LINK_LIBS
parquet_static
${PARQUET_TEST_LINK_LIBS}
${TEST_ARGUMENTS}
${ARG_UNPARSED_ARGUMENTS})
${_PARQUET_TEST_EXTRA_ARGS})
else()
add_test_case(${REL_TEST_NAME}
STATIC_LINK_LIBS
parquet_shared
${PARQUET_TEST_LINK_LIBS}
${TEST_ARGUMENTS}
${ARG_UNPARSED_ARGUMENTS})
${_PARQUET_TEST_EXTRA_ARGS})
endif()
endfunction()

Expand Down Expand Up @@ -134,6 +138,9 @@ elseif(NOT MSVC)
list(APPEND PARQUET_TEST_LINK_LIBS ${CMAKE_DL_LIBS})
endif()

set(PARQUET_TEST_EXTRA_INCLUDES)
set(PARQUET_PRIVATE_INCLUDE_DIRS)

#
# Generated Thrift sources
set(PARQUET_THRIFT_SOURCE_DIR "${ARROW_SOURCE_DIR}/src/generated/")
Expand Down Expand Up @@ -191,6 +198,28 @@ set(PARQUET_SRCS
stream_writer.cc
types.cc)

if(DEFINED ARROW_FSST_SOURCES)
list(APPEND PARQUET_SRCS ${ARROW_FSST_SOURCES})
endif()
if(DEFINED ARROW_FSST_INCLUDE_DIR)
list(APPEND PARQUET_PRIVATE_INCLUDE_DIRS ${ARROW_FSST_INCLUDE_DIR})
list(APPEND PARQUET_TEST_EXTRA_INCLUDES ${ARROW_FSST_INCLUDE_DIR})
endif()
if(DEFINED ARROW_FSST_SOURCES)
set_property(SOURCE ${ARROW_FSST_SOURCES}
APPEND
PROPERTY COMPILE_OPTIONS
"$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:-Wno-error=shorten-64-to-32;-Wno-shorten-64-to-32>"
"$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:-Wno-error=missing-declarations;-Wno-missing-declarations>"
"$<$<CXX_COMPILER_ID:MSVC>:/wd4244>")
set_property(SOURCE ${ARROW_FSST_SOURCES}
APPEND
PROPERTY COMPILE_OPTIONS
"$<$<AND:$<PLATFORM_ID:Windows>,$<NOT:$<CXX_COMPILER_ID:MSVC>>>:-include>"
"$<$<AND:$<PLATFORM_ID:Windows>,$<NOT:$<CXX_COMPILER_ID:MSVC>>>:${CMAKE_CURRENT_SOURCE_DIR}/fsst_compat.h>"
)
endif()

if(ARROW_HAVE_RUNTIME_AVX2)
# AVX2 is used as a proxy for BMI2.
list(APPEND PARQUET_SRCS level_comparison_avx2.cc level_conversion_bmi2.cc)
Expand Down Expand Up @@ -306,6 +335,15 @@ add_arrow_lib(parquet
STATIC_INSTALL_INTERFACE_LIBS
${PARQUET_STATIC_INSTALL_INTERFACE_LIBS})

if(PARQUET_PRIVATE_INCLUDE_DIRS)
foreach(_parquet_target parquet_objlib parquet_shared parquet_static)
if(TARGET ${_parquet_target})
target_include_directories(${_parquet_target}
PRIVATE ${PARQUET_PRIVATE_INCLUDE_DIRS})
endif()
endforeach()
endif()

if(WIN32 AND NOT (ARROW_TEST_LINKAGE STREQUAL "static"))
add_library(parquet_test_support STATIC
"${PARQUET_THRIFT_SOURCE_DIR}/parquet_types.cpp")
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,8 @@ class ColumnReaderImplBase {
case Encoding::RLE:
case Encoding::DELTA_BINARY_PACKED:
case Encoding::DELTA_BYTE_ARRAY:
case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
case Encoding::DELTA_LENGTH_BYTE_ARRAY:
case Encoding::FSST: {
auto decoder = MakeTypedDecoder<DType>(encoding, descr_, pool_);
current_decoder_ = decoder.get();
decoders_[static_cast<int>(encoding)] = std::move(decoder);
Expand Down
Loading
Loading