Skip to content

Commit 546ecad

Browse files
carlopiMytherin
authored andcommitted
Rework FileSystem abstraction, using FileOpenFlags and optional_ptr<FileOpener>
1 parent 26245e6 commit 546ecad

File tree

12 files changed

+164
-210
lines changed

12 files changed

+164
-210
lines changed

duckdb.patch

Lines changed: 70 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,99 @@
11
diff --git a/CMakeLists.txt b/CMakeLists.txt
2-
index aebc060c3b..12611048b0 100644
2+
index 7b1eaa7adf..036a07a56a 100644
33
--- a/CMakeLists.txt
44
+++ b/CMakeLists.txt
5-
@@ -83,6 +83,13 @@ if (EXTENSION_STATIC_BUILD AND "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
6-
endif()
5+
@@ -25,6 +25,7 @@ project(DuckDB)
6+
find_package(Threads REQUIRED)
7+
8+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
9+
+set(DUCKDB_EXPLICIT_PLATFORM "nop")
10+
11+
set (CMAKE_CXX_STANDARD 11)
12+
13+
@@ -84,6 +85,14 @@ endif()
714

815
option(DISABLE_UNITY "Disable unity builds." FALSE)
16+
917
+option(USE_WASM_THREADS "Should threads be used" FALSE)
1018
+if (${USE_WASM_THREADS})
1119
+ set(WASM_THREAD_FLAGS
1220
+ -pthread
1321
+ -sSHARED_MEMORY=1
1422
+ )
1523
+endif()
16-
24+
+
1725
option(FORCE_COLORED_OUTPUT
1826
"Always produce ANSI-colored output (GNU/Clang only)." FALSE)
19-
@@ -805,7 +812,6 @@ function(build_loadable_extension_directory NAME OUTPUT_DIRECTORY PARAMETERS)
20-
${TARGET_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE
21-
"${CMAKE_BINARY_DIR}/${OUTPUT_DIRECTORY}")
22-
endif()
23-
-
24-
if(EMSCRIPTEN)
25-
# Copy file.duckdb_extension.wasm to file.duckdb_extension.wasm.lib
26-
add_custom_command(
27-
@@ -817,7 +823,7 @@ function(build_loadable_extension_directory NAME OUTPUT_DIRECTORY PARAMETERS)
27+
if(${FORCE_COLORED_OUTPUT})
28+
@@ -711,6 +720,7 @@ if (NOT EXTENSION_CONFIG_BUILD AND NOT ${EXTENSION_TESTS_ONLY} AND NOT CLANG_TID
29+
if (NOT Python3_FOUND)
30+
MESSAGE(STATUS "Could not find python3, create extension directory step will be skipped")
31+
else()
32+
+ message(STATUS "hi from ${CMAKE_CURRENT_BINARY_DIR}")
33+
add_custom_target(
34+
duckdb_local_extension_repo ALL
35+
COMMAND
36+
@@ -813,7 +823,7 @@ function(build_loadable_extension_directory NAME OUTPUT_DIRECTORY EXTENSION_VERS
2837
add_custom_command(
2938
TARGET ${TARGET_NAME}
3039
POST_BUILD
3140
- COMMAND emcc $<TARGET_FILE:${TARGET_NAME}>.lib -o $<TARGET_FILE:${TARGET_NAME}> -sSIDE_MODULE=1 -O3
32-
+ COMMAND emcc $<TARGET_FILE:${TARGET_NAME}>.lib -o $<TARGET_FILE:${TARGET_NAME}> -O3 -sSIDE_MODULE=2 -sEXPORTED_FUNCTIONS="_${NAME}_init,_${NAME}_version" ${WASM_THREAD_FLAGS}
41+
+ COMMAND emcc $<TARGET_FILE:${TARGET_NAME}>.lib -o $<TARGET_FILE:${TARGET_NAME}> -O3 -sSIDE_MODULE=2 -sEXPORTED_FUNCTIONS="_${NAME}_init" ${WASM_THREAD_FLAGS}
3342
)
3443
endif()
35-
endfunction()
36-
diff --git a/extension/httpfs/s3fs.cpp b/extension/httpfs/s3fs.cpp
37-
index aac3169c59..fbe59fc13c 100644
38-
--- a/extension/httpfs/s3fs.cpp
39-
+++ b/extension/httpfs/s3fs.cpp
40-
@@ -203,7 +203,7 @@ unique_ptr<S3AuthParams> S3AuthParams::ReadFromStoredCredentials(FileOpener *ope
44+
add_custom_command(
45+
@@ -1220,6 +1230,7 @@ if(BUILD_PYTHON)
46+
add_extension_dependencies(duckdb_python)
47+
endif()
4148

42-
// Return the stored credentials
43-
const auto &secret = secret_match.GetSecret();
44-
- const auto &kv_secret = dynamic_cast<const KeyValueSecret &>(secret);
45-
+ const auto &kv_secret = checked_dynamic_cast<const KeyValueSecret &>(secret);
49+
+message(STATUS "${DUCKDB_EXPLICIT_PLATFORM} is the platform")
50+
if(NOT DUCKDB_EXPLICIT_PLATFORM)
51+
set(VERSION_SOURCES tools/utils/test_platform.cpp)
4652

47-
return make_uniq<S3AuthParams>(S3SecretHelper::GetParams(kv_secret));
48-
}
49-
diff --git a/src/include/duckdb/common/exception.hpp b/src/include/duckdb/common/exception.hpp
50-
index 2e45cb92af..46967277a7 100644
51-
--- a/src/include/duckdb/common/exception.hpp
52-
+++ b/src/include/duckdb/common/exception.hpp
53-
@@ -16,6 +16,7 @@
53+
diff --git a/scripts/create_local_extension_repo.py b/scripts/create_local_extension_repo.py
54+
index 8cf71abec0..9875f5fa23 100644
55+
--- a/scripts/create_local_extension_repo.py
56+
+++ b/scripts/create_local_extension_repo.py
57+
@@ -28,6 +28,7 @@ if os.name == 'nt':
5458

55-
#include <vector>
56-
#include <stdexcept>
57-
+#include <iostream>
59+
with open(duckdb_platform_out, 'r') as f:
60+
lines = f.readlines()
61+
+ print(lines)
62+
duckdb_platform = lines[0]
5863

59-
namespace duckdb {
60-
enum class PhysicalType : uint8_t;
61-
@@ -363,4 +364,13 @@ public:
62-
DUCKDB_API explicit ParameterNotResolvedException();
63-
};
64+
# Create destination path
65+
diff --git a/src/include/duckdb/common/file_open_flags.hpp b/src/include/duckdb/common/file_open_flags.hpp
66+
index d0509a214b..f1689b2975 100644
67+
--- a/src/include/duckdb/common/file_open_flags.hpp
68+
+++ b/src/include/duckdb/common/file_open_flags.hpp
69+
@@ -100,8 +100,8 @@ public:
70+
return flags & FILE_FLAGS_PARALLEL_ACCESS;
71+
}
6472

65-
+ template <typename A, typename B>
66-
+ A&& checked_dynamic_cast (B&& target) {
67-
+ if (dynamic_cast<typename std::remove_reference<A>::type*>(&target)) {
68-
+ return dynamic_cast<A&>(target);
69-
+ }
70-
+ std::cout <<"\n"<< "checked_dynamic_cast between " << typeid(A).name() << "\tand " << typeid(B).name() << " ERRORRED\n";
71-
+ throw FatalException("Failed checked_dynamic_cast");
72-
+ }
73-
+
74-
} // namespace duckdb
73+
-private:
74+
idx_t flags = 0;
75+
+private:
76+
FileLockType lock = FileLockType::NO_LOCK;
77+
FileCompressionType compression = FileCompressionType::UNCOMPRESSED;
78+
};
7579
diff --git a/src/main/extension/extension_load.cpp b/src/main/extension/extension_load.cpp
76-
index a001f2b997..c532db1b2d 100644
80+
index 002cf4aefe..5ef4b667a9 100644
7781
--- a/src/main/extension/extension_load.cpp
7882
+++ b/src/main/extension/extension_load.cpp
79-
@@ -110,6 +110,7 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const str
83+
@@ -143,6 +143,7 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const str
8084
} else {
8185
filename = fs.ExpandPath(filename);
8286
}
8387
+#ifndef WASM_LOADABLE_EXTENSIONS
8488
if (!fs.FileExists(filename)) {
8589
string message;
8690
bool exact_match = ExtensionHelper::CreateSuggestions(extension, message);
87-
@@ -119,6 +120,181 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const str
91+
@@ -152,7 +153,183 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const str
8892
error = StringUtil::Format("Extension \"%s\" not found.\n%s", filename, message);
8993
return false;
9094
}
9195
+#endif
96+
+
9297
+#ifdef WASM_LOADABLE_EXTENSIONS
9398
+ auto basename = fs.ExtractBaseName(filename);
9499
+ char *exe = NULL;
@@ -160,7 +165,7 @@ index a001f2b997..c532db1b2d 100644
160165
+ }
161166
+
162167
+ var valid = WebAssembly.validate(uInt8Array);
163-
+ var len = uInt8Array.byteLength;
168+
+ var len = uInt8Array.byteLength;
164169
+ var fileOnWasmHeap = _malloc(len + 4);
165170
+
166171
+ var properArray = new Uint8Array(uInt8Array);
@@ -182,7 +187,7 @@ index a001f2b997..c532db1b2d 100644
182187
+ len -= LEN123[3];
183188
+ len /= 256;
184189
+ Module.HEAPU8.set(LEN123, fileOnWasmHeap);
185-
+ //FIXME: found how to expose those to the logger interface
190+
+ //FIXME: found how to expose those to the logger interface
186191
+ //console.log(LEN123);
187192
+ //console.log(properArray);
188193
+ //console.log(new Uint8Array(Module.HEAPU8, fileOnWasmHeap, len+4));
@@ -213,7 +218,7 @@ index a001f2b997..c532db1b2d 100644
213218
+ LEN *= 256;
214219
+ LEN += ((uint8_t *)exe)[1];
215220
+ LEN *= 256;
216-
+ LEN += ((uint8_t *)exe)[0];
221+
+ LEN += ((uint8_t *)exe)[0];
217222
+ auto signature_offset = LEN - signature.size();
218223
+
219224
+ const idx_t maxLenChunks = 1024ULL * 1024ULL;
@@ -244,7 +249,7 @@ index a001f2b997..c532db1b2d 100644
244249
+
245250
+ string two_level_hash;
246251
+ ComputeSHA256String(hash_concatenation, &two_level_hash);
247-
+
252+
248253
+ for (idx_t j = 0; j < signature.size(); j++) {
249254
+ signature[j] = ((uint8_t *)exe)[4 + signature_offset + j];
250255
+ }
@@ -262,17 +267,19 @@ index a001f2b997..c532db1b2d 100644
262267
+ if (exe) {
263268
+ free(exe);
264269
+ }
270+
+std::string extension_version="";
265271
+#else
266-
if (!config.options.allow_unsigned_extensions) {
267-
auto handle = fs.OpenFile(filename, FileFlags::FILE_FLAGS_READ);
272+
string metadata_segment;
273+
metadata_segment.resize(512);
268274

269-
@@ -179,26 +355,9 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const str
270-
throw IOException(config.error_manager->FormatException(ErrorType::UNSIGNED_EXTENSION, filename));
271-
}
272-
}
275+
@@ -279,27 +456,10 @@ bool ExtensionHelper::TryInitialLoad(DBConfig &config, FileSystem &fs, const str
276+
auto number_metadata_fields = 3;
277+
D_ASSERT(number_metadata_fields == 3); // Currently hardcoded value
278+
metadata_field.resize(number_metadata_fields + 1);
279+
-
273280
+#endif
274281
auto filebase = fs.ExtractBaseName(filename);
275-
-
282+
276283
#ifdef WASM_LOADABLE_EXTENSIONS
277284
- EM_ASM(
278285
- {
@@ -294,57 +301,3 @@ index a001f2b997..c532db1b2d 100644
294301
#else
295302
auto dopen_from = filename;
296303
#endif
297-
diff --git a/src/planner/operator/logical_delete.cpp b/src/planner/operator/logical_delete.cpp
298-
index a028a1ea6f..d9322a4ac7 100644
299-
--- a/src/planner/operator/logical_delete.cpp
300-
+++ b/src/planner/operator/logical_delete.cpp
301-
@@ -14,7 +14,7 @@ LogicalDelete::LogicalDelete(TableCatalogEntry &table, idx_t table_index)
302-
LogicalDelete::LogicalDelete(ClientContext &context, const unique_ptr<CreateInfo> &table_info)
303-
: LogicalOperator(LogicalOperatorType::LOGICAL_DELETE),
304-
table(Catalog::GetEntry<TableCatalogEntry>(context, table_info->catalog, table_info->schema,
305-
- dynamic_cast<CreateTableInfo &>(*table_info).table)) {
306-
+ checked_dynamic_cast<CreateTableInfo &>(*table_info).table)) {
307-
}
308-
309-
idx_t LogicalDelete::EstimateCardinality(ClientContext &context) {
310-
diff --git a/src/planner/operator/logical_insert.cpp b/src/planner/operator/logical_insert.cpp
311-
index 3846ed0097..830126809e 100644
312-
--- a/src/planner/operator/logical_insert.cpp
313-
+++ b/src/planner/operator/logical_insert.cpp
314-
@@ -14,7 +14,7 @@ LogicalInsert::LogicalInsert(TableCatalogEntry &table, idx_t table_index)
315-
LogicalInsert::LogicalInsert(ClientContext &context, const unique_ptr<CreateInfo> table_info)
316-
: LogicalOperator(LogicalOperatorType::LOGICAL_INSERT),
317-
table(Catalog::GetEntry<TableCatalogEntry>(context, table_info->catalog, table_info->schema,
318-
- dynamic_cast<CreateTableInfo &>(*table_info).table)) {
319-
+ checked_dynamic_cast<CreateTableInfo &>(*table_info).table)) {
320-
}
321-
322-
idx_t LogicalInsert::EstimateCardinality(ClientContext &context) {
323-
diff --git a/src/planner/operator/logical_update.cpp b/src/planner/operator/logical_update.cpp
324-
index e66dd36d1a..7fe2e907e1 100644
325-
--- a/src/planner/operator/logical_update.cpp
326-
+++ b/src/planner/operator/logical_update.cpp
327-
@@ -12,7 +12,7 @@ LogicalUpdate::LogicalUpdate(TableCatalogEntry &table)
328-
LogicalUpdate::LogicalUpdate(ClientContext &context, const unique_ptr<CreateInfo> &table_info)
329-
: LogicalOperator(LogicalOperatorType::LOGICAL_UPDATE),
330-
table(Catalog::GetEntry<TableCatalogEntry>(context, table_info->catalog, table_info->schema,
331-
- dynamic_cast<CreateTableInfo &>(*table_info).table)) {
332-
+ checked_dynamic_cast<CreateTableInfo &>(*table_info).table)) {
333-
}
334-
335-
idx_t LogicalUpdate::EstimateCardinality(ClientContext &context) {
336-
diff --git a/src/storage/checkpoint_manager.cpp b/src/storage/checkpoint_manager.cpp
337-
index 1f97b538ff..5e16d11840 100644
338-
--- a/src/storage/checkpoint_manager.cpp
339-
+++ b/src/storage/checkpoint_manager.cpp
340-
@@ -576,8 +576,8 @@ void CheckpointReader::ReadTableData(ClientContext &context, Deserializer &deser
341-
}
342-
343-
// FIXME: icky downcast to get the underlying MetadataReader
344-
- auto &binary_deserializer = dynamic_cast<BinaryDeserializer &>(deserializer);
345-
- auto &reader = dynamic_cast<MetadataReader &>(binary_deserializer.GetStream());
346-
+ auto &binary_deserializer = checked_dynamic_cast<BinaryDeserializer &>(deserializer);
347-
+ auto &reader = checked_dynamic_cast<MetadataReader &>(binary_deserializer.GetStream());
348-
349-
MetadataReader table_data_reader(reader.GetMetadataManager(), table_pointer);
350-
TableDataReader data_reader(table_data_reader, bound_info);

lib/include/duckdb/web/io/buffered_filesystem.h

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include "duckdb/common/constants.hpp"
88
#include "duckdb/common/file_system.hpp"
9+
#include "duckdb/common/optional_ptr.hpp"
910
#include "duckdb/common/vector.hpp"
1011
#include "duckdb/web/io/file_page_buffer.h"
1112
#include "duckdb/web/utils/parallel.h"
@@ -85,9 +86,8 @@ class BufferedFileSystem : public duckdb::FileSystem {
8586

8687
public:
8788
/// Open a file
88-
duckdb::unique_ptr<duckdb::FileHandle> OpenFile(const string &path, uint8_t flags, FileLockType lock,
89-
FileCompressionType compression,
90-
FileOpener *opener = nullptr) override;
89+
duckdb::unique_ptr<duckdb::FileHandle> OpenFile(const string &path, FileOpenFlags flags,
90+
optional_ptr<FileOpener> opener = nullptr) override;
9191
/// Read exactly nr_bytes from the specified location in the file. Fails if nr_bytes could not be read. This is
9292
/// equivalent to calling SetFilePointer(location) followed by calling Read().
9393
void Read(duckdb::FileHandle &handle, void *buffer, int64_t nr_bytes, duckdb::idx_t location) override;
@@ -111,25 +111,30 @@ class BufferedFileSystem : public duckdb::FileSystem {
111111
void Truncate(duckdb::FileHandle &handle, int64_t new_size) override;
112112

113113
/// Check if a directory exists
114-
bool DirectoryExists(const string &directory) override { return filesystem_.DirectoryExists(directory); }
114+
bool DirectoryExists(const string &directory, optional_ptr<FileOpener> opener = nullptr) override {
115+
return filesystem_.DirectoryExists(directory, opener);
116+
}
115117
/// Create a directory if it does not exist
116-
void CreateDirectory(const std::string &directory) override { return filesystem_.CreateDirectory(directory); }
118+
void CreateDirectory(const std::string &directory, optional_ptr<FileOpener> opener = nullptr) override {
119+
return filesystem_.CreateDirectory(directory, opener);
120+
}
117121
/// Recursively remove a directory and all files in it
118-
void RemoveDirectory(const std::string &directory) override;
122+
void RemoveDirectory(const std::string &directory, optional_ptr<FileOpener> opener = nullptr) override;
119123
/// List files in a directory, invoking the callback method for each one with (filename, is_dir)
120124
bool ListFiles(const std::string &directory, const std::function<void(const std::string &, bool)> &callback,
121125
FileOpener *opener = nullptr) override {
122126
return filesystem_.ListFiles(directory, callback);
123127
}
124128
/// Move a file from source path to the target, StorageManager relies on this being an atomic action for ACID
125129
/// properties
126-
void MoveFile(const std::string &source, const std::string &target) override;
130+
void MoveFile(const std::string &source, const std::string &target,
131+
optional_ptr<FileOpener> opener = nullptr) override;
127132
/// Check if a file exists
128-
bool FileExists(const std::string &filename) override {
129-
return filesystem_.FileExists(PatchFilenameOwned(filename));
133+
bool FileExists(const std::string &filename, optional_ptr<FileOpener> opener = nullptr) override {
134+
return filesystem_.FileExists(PatchFilenameOwned(filename), opener);
130135
}
131136
/// Remove a file from disk
132-
void RemoveFile(const std::string &filename) override;
137+
void RemoveFile(const std::string &filename, optional_ptr<FileOpener> opener = nullptr) override;
133138

134139
/// Runs a glob on the file system, returning a list of matching files
135140
vector<std::string> Glob(const std::string &path, FileOpener *opener = nullptr) override {

lib/include/duckdb/web/io/file_page_buffer.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,7 @@ class FilePageBuffer {
8484
/// This latch ensures that reads and writes are blocked during truncation.
8585
SharedMutex file_latch = {};
8686
/// The file flags
87-
uint8_t file_flags = 0;
88-
/// The file lock type
89-
duckdb::FileLockType file_lock = duckdb::FileLockType::NO_LOCK;
87+
duckdb::FileOpenFlags file_flags = 0;
9088
/// The file size
9189
uint64_t file_size = 0;
9290
/// The user references
@@ -98,8 +96,8 @@ class FilePageBuffer {
9896
std::shared_ptr<FileStatisticsCollector> file_stats = nullptr;
9997

10098
/// Constructor
101-
BufferedFile(uint16_t file_id, std::string_view path, uint8_t flags, duckdb::FileLockType lock)
102-
: file_id(file_id), path(path), file_flags(flags), file_lock(lock) {}
99+
BufferedFile(uint16_t file_id, std::string_view path, duckdb::FileOpenFlags flags)
100+
: file_id(file_id), path(path), file_flags(flags) {}
103101
/// Get the number of references
104102
auto GetReferenceCount() const { return num_users; }
105103
};
@@ -242,7 +240,7 @@ class FilePageBuffer {
242240
/// Append n bytes
243241
void Append(void* buffer, uint64_t n, UniqueFileGuard& file_guard);
244242
/// Reopen as writeable
245-
void ReOpen(uint8_t flags, duckdb::FileLockType lock_type);
243+
void ReOpen(duckdb::FileOpenFlags flags);
246244

247245
public:
248246
/// Constructor
@@ -356,8 +354,8 @@ class FilePageBuffer {
356354
void CollectFileStatistics(std::string_view path, std::shared_ptr<FileStatisticsCollector> collector);
357355

358356
/// Open a file
359-
std::unique_ptr<FileRef> OpenFile(std::string_view path, uint8_t flags,
360-
duckdb::FileLockType lock_type = duckdb::FileLockType::NO_LOCK);
357+
std::unique_ptr<FileRef> OpenFile(std::string_view path, FileOpenFlags flags,
358+
optional_ptr<FileOpener> opener = nullptr);
361359
/// Is buffered
362360
bool BuffersFile(std::string_view path);
363361
/// Flush file matching name to disk

lib/include/duckdb/web/io/ifstream.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class InputFileStreamBuffer : public std::streambuf {
6060
/// Constructor
6161
InputFileStreamBuffer(std::shared_ptr<FilePageBuffer> file_page_buffer, std::string_view path)
6262
: file_page_buffer_(std::move(file_page_buffer)),
63-
file_(file_page_buffer_->OpenFile(path, duckdb::FileFlags::FILE_FLAGS_READ, duckdb::FileLockType::NO_LOCK)),
63+
file_(file_page_buffer_->OpenFile(path, duckdb::FileFlags::FILE_FLAGS_READ)),
6464
buffer_(file_->FixPage(0, false)),
6565
data_end_(file_->GetSize()),
6666
next_page_id_(1) {
@@ -70,8 +70,7 @@ class InputFileStreamBuffer : public std::streambuf {
7070
/// Constructor
7171
InputFileStreamBuffer(const InputFileStreamBuffer& other)
7272
: file_page_buffer_(other.file_page_buffer_),
73-
file_(other.file_page_buffer_->OpenFile(other.file_->GetPath(), duckdb::FileFlags::FILE_FLAGS_READ,
74-
duckdb::FileLockType::NO_LOCK)),
73+
file_(other.file_page_buffer_->OpenFile(other.file_->GetPath(), duckdb::FileFlags::FILE_FLAGS_READ)),
7574
buffer_(other.file_->FixPage(other.next_page_id_ - 1, false)),
7675
data_end_(other.data_end_),
7776
next_page_id_(other.next_page_id_) {

0 commit comments

Comments
 (0)