From 30989c1814a1bf576666fe561c6a4a9738077324 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 27 Nov 2025 14:19:48 +0100 Subject: [PATCH 1/2] GH-48275: [C++][Dev] Allow choosing verbosity when fuzzing Introduce an environment variable `ARROW_FUZZING_VERBOSITY` that allows choosing whether fuzzing errors are logged or silenced. --- cpp/src/arrow/util/fuzz_internal.cc | 25 +++++++++++++++++++++++-- cpp/src/arrow/util/fuzz_internal.h | 2 +- cpp/src/arrow/util/io_util.cc | 6 +++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/util/fuzz_internal.cc b/cpp/src/arrow/util/fuzz_internal.cc index c4eddc51265..1a34964452b 100644 --- a/cpp/src/arrow/util/fuzz_internal.cc +++ b/cpp/src/arrow/util/fuzz_internal.cc @@ -17,9 +17,14 @@ #include "arrow/util/fuzz_internal.h" +#include +#include + #include "arrow/memory_pool.h" #include "arrow/status.h" +#include "arrow/util/io_util.h" #include "arrow/util/logging_internal.h" +#include "arrow/util/value_parsing.h" namespace arrow::internal { @@ -30,8 +35,24 @@ MemoryPool* fuzzing_memory_pool() { } void LogFuzzStatus(const Status& st, const uint8_t* data, int64_t size) { - // Most fuzz inputs will be invalid and generate errors, only log potential OOMs - if (st.IsOutOfMemory()) { + static const int kVerbosity = []() { + auto maybe_env_value = GetEnvVar("ARROW_FUZZING_VERBOSITY"); + if (maybe_env_value.status().IsKeyError()) { + return 0; + } + auto env_value = std::move(maybe_env_value).ValueOrDie(); + int32_t value; + if (!ParseValue(env_value.data(), env_value.length(), &value)) { + Status::Invalid("Invalid value for ARROW_FUZZING_VERBOSITY: '", env_value, "'") + .Abort(); + } + return value; + }(); + + if (kVerbosity >= 1) { + ARROW_LOG(WARNING) << "Fuzzing input with size=" << size + << " failed: " << st.ToString(); + } else if (st.IsOutOfMemory()) { ARROW_LOG(WARNING) << "Fuzzing input with size=" << size << " hit allocation failure: " << st.ToString(); } diff --git a/cpp/src/arrow/util/fuzz_internal.h b/cpp/src/arrow/util/fuzz_internal.h index f3f104eec60..5280b7ec1ff 100644 --- a/cpp/src/arrow/util/fuzz_internal.h +++ b/cpp/src/arrow/util/fuzz_internal.h @@ -31,7 +31,7 @@ constexpr int64_t kFuzzingMemoryLimit = 2200LL * 1000 * 1000; /// Return a memory pool that will not allocate more than kFuzzingMemoryLimit bytes. ARROW_EXPORT MemoryPool* fuzzing_memory_pool(); -// Optionally log the outcome of fuzzing an input +/// Optionally log the outcome of fuzzing an input ARROW_EXPORT void LogFuzzStatus(const Status&, const uint8_t* data, int64_t size); } // namespace arrow::internal diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index 50f3bd9a15e..a761dce972a 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -1768,13 +1768,13 @@ Result GetEnvVar(const char* name) { if (res >= bufsize) { return Status::CapacityError("environment variable value too long"); } else if (res == 0) { - return Status::KeyError("environment variable undefined"); + return Status::KeyError("environment variable '", name, "'undefined"); } return std::string(c_str); #else char* c_str = getenv(name); if (c_str == nullptr) { - return Status::KeyError("environment variable undefined"); + return Status::KeyError("environment variable '", name, "'undefined"); } return std::string(c_str); #endif @@ -1793,7 +1793,7 @@ Result GetEnvVarNative(const std::string& name) { if (res >= bufsize) { return Status::CapacityError("environment variable value too long"); } else if (res == 0) { - return Status::KeyError("environment variable undefined"); + return Status::KeyError("environment variable '", name, "'undefined"); } return NativePathString(w_str); } From 4ef91040e3e53ec2f73d0b4e6a71b0402e4443ee Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 27 Nov 2025 14:51:00 +0100 Subject: [PATCH 2/2] Switch env var APIs to std::string_view --- cpp/src/arrow/util/io_util.cc | 35 +++++++++++------------------------ cpp/src/arrow/util/io_util.h | 18 ++++++------------ 2 files changed, 17 insertions(+), 36 deletions(-) diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index a761dce972a..b3ef48d2965 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -1758,13 +1758,13 @@ Status FileTruncate(int fd, const int64_t size) { // Environment variables // -Result GetEnvVar(const char* name) { +Result GetEnvVar(std::string_view name) { #ifdef _WIN32 // On Windows, getenv() reads an early copy of the process' environment // which doesn't get updated when SetEnvironmentVariable() is called. constexpr int32_t bufsize = 2000; char c_str[bufsize]; - auto res = GetEnvironmentVariableA(name, c_str, bufsize); + auto res = GetEnvironmentVariableA(name.data(), c_str, bufsize); if (res >= bufsize) { return Status::CapacityError("environment variable value too long"); } else if (res == 0) { @@ -1772,7 +1772,7 @@ Result GetEnvVar(const char* name) { } return std::string(c_str); #else - char* c_str = getenv(name); + char* c_str = getenv(name.data()); if (c_str == nullptr) { return Status::KeyError("environment variable '", name, "'undefined"); } @@ -1780,10 +1780,8 @@ Result GetEnvVar(const char* name) { #endif } -Result GetEnvVar(const std::string& name) { return GetEnvVar(name.c_str()); } - #ifdef _WIN32 -Result GetEnvVarNative(const std::string& name) { +Result GetEnvVarNative(std::string_view name) { NativePathString w_name; constexpr int32_t bufsize = 2000; wchar_t w_str[bufsize]; @@ -1798,28 +1796,23 @@ Result GetEnvVarNative(const std::string& name) { return NativePathString(w_str); } -Result GetEnvVarNative(const char* name) { - return GetEnvVarNative(std::string(name)); -} - #else -Result GetEnvVarNative(const std::string& name) { +Result GetEnvVarNative(std::string_view name) { return GetEnvVar(name); } -Result GetEnvVarNative(const char* name) { return GetEnvVar(name); } #endif -Status SetEnvVar(const char* name, const char* value) { +Status SetEnvVar(std::string_view name, std::string_view value) { #ifdef _WIN32 - if (SetEnvironmentVariableA(name, value)) { + if (SetEnvironmentVariableA(name.data(), value.data())) { return Status::OK(); } else { return Status::Invalid("failed setting environment variable"); } #else - if (setenv(name, value, 1) == 0) { + if (setenv(name.data(), value.data(), 1) == 0) { return Status::OK(); } else { return Status::Invalid("failed setting environment variable"); @@ -1827,19 +1820,15 @@ Status SetEnvVar(const char* name, const char* value) { #endif } -Status SetEnvVar(const std::string& name, const std::string& value) { - return SetEnvVar(name.c_str(), value.c_str()); -} - -Status DelEnvVar(const char* name) { +Status DelEnvVar(std::string_view name) { #ifdef _WIN32 - if (SetEnvironmentVariableA(name, nullptr)) { + if (SetEnvironmentVariableA(name.data(), nullptr)) { return Status::OK(); } else { return Status::Invalid("failed deleting environment variable"); } #else - if (unsetenv(name) == 0) { + if (unsetenv(name.data()) == 0) { return Status::OK(); } else { return Status::Invalid("failed deleting environment variable"); @@ -1847,8 +1836,6 @@ Status DelEnvVar(const char* name) { #endif } -Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); } - // // Temporary directories // diff --git a/cpp/src/arrow/util/io_util.h b/cpp/src/arrow/util/io_util.h index e9f218b5205..56bd4eff3d6 100644 --- a/cpp/src/arrow/util/io_util.h +++ b/cpp/src/arrow/util/io_util.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -238,23 +239,16 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes, ARROW_EXPORT Status MemoryAdviseWillNeed(const std::vector& regions); +// Returns KeyError if the environment variable doesn't exist ARROW_EXPORT -Result GetEnvVar(const char* name); +Result GetEnvVar(std::string_view name); ARROW_EXPORT -Result GetEnvVar(const std::string& name); -ARROW_EXPORT -Result GetEnvVarNative(const char* name); -ARROW_EXPORT -Result GetEnvVarNative(const std::string& name); +Result GetEnvVarNative(std::string_view name); ARROW_EXPORT -Status SetEnvVar(const char* name, const char* value); -ARROW_EXPORT -Status SetEnvVar(const std::string& name, const std::string& value); -ARROW_EXPORT -Status DelEnvVar(const char* name); +Status SetEnvVar(std::string_view name, std::string_view value); ARROW_EXPORT -Status DelEnvVar(const std::string& name); +Status DelEnvVar(std::string_view name); ARROW_EXPORT std::string ErrnoMessage(int errnum);