Skip to content

Commit 7d8abfb

Browse files
committed
Add file descriptor support for weightless model loading
Changes: - Add ov::FdGetterType and ov::hint::fd_getter property - Extend load_mmap_object() with fd-based overload (Linux only) - Integrate fd_getter through NPUW deserialization flow - Windows implementation throws unsupported exception Signed-off-by: Anoob Anto Kodankandath <[email protected]>
1 parent e0a41fe commit 7d8abfb

File tree

9 files changed

+87
-12
lines changed

9 files changed

+87
-12
lines changed

src/common/util/include/openvino/util/mmap_object.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ class MappedMemory {
3737
*/
3838
std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::string& path);
3939

40+
/**
41+
* @brief Returns mapped memory for a file from provided file descriptor.
42+
* Instead of reading files, we can map the memory via mmap for Linux
43+
* in order to avoid time-consuming reading and reduce memory consumption.
44+
*
45+
* @param fd File descriptor of an already opened file.
46+
* @return MappedMemory shared ptr object which keep mmaped memory and control the lifetime.
47+
*/
48+
std::shared_ptr<ov::MappedMemory> load_mmap_object(const int fd);
49+
4050
#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
4151

4252
/**

src/common/util/src/os/lin/lin_mmap_object.cpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,26 +64,33 @@ class MapHolder : public MappedMemory {
6464
MapHolder() = default;
6565

6666
void set(const std::string& path) {
67-
int prot = PROT_READ;
6867
int mode = O_RDONLY;
69-
struct stat sb = {};
7068
m_handle = HandleHolder(open(path.c_str(), mode));
7169
if (m_handle.get() == -1) {
7270
throw std::runtime_error("Can not open file " + path +
7371
" for mapping. Ensure that file exists and has appropriate permissions");
7472
}
75-
if (fstat(m_handle.get(), &sb) == -1) {
76-
throw std::runtime_error("Can not get file size for " + path);
73+
set_from_fd(m_handle.get());
74+
}
75+
76+
void set_from_fd(const int fd) {
77+
int prot = PROT_READ;
78+
struct stat sb = {};
79+
// Use the provided file descriptor directly without taking ownership
80+
if (fstat(fd, &sb) == -1) {
81+
throw std::runtime_error("Can not get file size for fd=" + std::to_string(fd));
7782
}
7883
m_size = sb.st_size;
7984
if (m_size > 0) {
80-
m_data = mmap(nullptr, m_size, prot, MAP_PRIVATE, m_handle.get(), 0);
85+
m_data = mmap(nullptr, m_size, prot, MAP_PRIVATE, fd, 0);
8186
if (m_data == MAP_FAILED) {
82-
throw std::runtime_error("Can not create file mapping for " + path + ", err=" + std::strerror(errno));
87+
throw std::runtime_error("Can not create file mapping for fd=" + std::to_string(fd) +
88+
", err=" + std::strerror(errno));
8389
}
8490
} else {
8591
m_data = MAP_FAILED;
8692
}
93+
// Don't store the handle since we don't own the fd
8794
}
8895

8996
~MapHolder() {
@@ -107,4 +114,10 @@ std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::string& path) {
107114
return holder;
108115
}
109116

117+
std::shared_ptr<ov::MappedMemory> load_mmap_object(const int fd) {
118+
auto holder = std::make_shared<MapHolder>();
119+
holder->set_from_fd(fd);
120+
return holder;
121+
}
122+
110123
} // namespace ov

src/common/util/src/os/win/win_mmap_object.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,10 @@ std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::string& path) {
134134
return holder;
135135
}
136136

137+
std::shared_ptr<ov::MappedMemory> load_mmap_object(const int fd) {
138+
OPENVINO_THROW("File descriptor-based memory mapping is not supported on Windows. Use path-based load_mmap_object instead.");
139+
}
140+
137141
#ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT
138142

139143
std::shared_ptr<ov::MappedMemory> load_mmap_object(const std::wstring& path) {

src/inference/include/openvino/runtime/properties.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,14 @@ static constexpr Property<std::string, PropertyMutability::RO> model_name{"NETWO
241241
static constexpr Property<uint32_t, PropertyMutability::RO> optimal_number_of_infer_requests{
242242
"OPTIMAL_NUMBER_OF_INFER_REQUESTS"};
243243

244+
/**
245+
* @brief Type definition for file descriptor getter callback.
246+
* Function that takes a file path string and returns a file descriptor as int.
247+
* This is useful for scenarios where file access needs to be controlled externally.
248+
* @ingroup ov_runtime_cpp_prop_api
249+
*/
250+
using FdGetterType = std::function<int(const std::string&)>;
251+
244252
/**
245253
* @brief Namespace with hint properties
246254
*/
@@ -532,6 +540,14 @@ static constexpr Property<uint32_t> num_requests{"PERFORMANCE_HINT_NUM_REQUESTS"
532540
*/
533541
static constexpr Property<std::shared_ptr<const ov::Model>> model{"MODEL_PTR"};
534542

543+
/**
544+
* @brief This key identifies callback function to get file descriptor for a given file path.
545+
* The callback takes a file path string and returns a file descriptor as int.
546+
* This is useful for scenarios where file access needs to be controlled externally.
547+
* @ingroup ov_runtime_cpp_prop_api
548+
*/
549+
static constexpr Property<FdGetterType> fd_getter{"FD_GETTER"};
550+
535551
/**
536552
* @brief Special key for auto batching feature configuration. Enabled by default
537553
* @ingroup ov_runtime_cpp_prop_api

src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,11 +1141,19 @@ std::shared_ptr<ov::npuw::CompiledModel> ov::npuw::CompiledModel::deserialize(
11411141
std::shared_ptr<ov::Model> model_ptr;
11421142
// Cache model's constants
11431143
WeightsContext::ConstsCache consts_cache;
1144+
ov::FdGetterType fd_getter = nullptr;
11441145
if (is_weightless) {
11451146
if (properties.find(ov::weights_path.name()) != properties.end()) {
11461147
weights_path = properties.at(ov::weights_path.name()).as<std::string>();
11471148
NPUW_ASSERT(!weights_path.empty() &&
11481149
"Empty weights_path. Please provide WEIGHTS_PATH or MODEL_PTR in the configuration.");
1150+
1151+
// Check if fd_getter function is provided
1152+
if (const auto fd_it = properties.find(ov::hint::fd_getter.name()); fd_it != properties.end()) {
1153+
if (fd_it->second.is<ov::FdGetterType>()) {
1154+
fd_getter = fd_it->second.as<ov::FdGetterType>();
1155+
}
1156+
}
11491157
} else if (properties.find(ov::hint::model.name()) != properties.end()) {
11501158
model_ptr = std::const_pointer_cast<ov::Model>(
11511159
properties.at(ov::hint::model.name()).as<std::shared_ptr<const ov::Model>>())
@@ -1181,7 +1189,14 @@ std::shared_ptr<ov::npuw::CompiledModel> ov::npuw::CompiledModel::deserialize(
11811189
ov::npuw::s11n::WeightsPtr weights = nullptr;
11821190
if (is_weightless) {
11831191
if (!weights_path.empty()) {
1184-
auto mapped_memory = ov::load_mmap_object(weights_path);
1192+
std::shared_ptr<ov::MappedMemory> mapped_memory;
1193+
// Use fd_getter if available, otherwise use default mmap
1194+
if (fd_getter) {
1195+
int fd = fd_getter(weights_path);
1196+
mapped_memory = ov::load_mmap_object(fd);
1197+
} else {
1198+
mapped_memory = ov::load_mmap_object(weights_path);
1199+
}
11851200
weights = std::make_shared<ov::npuw::s11n::Weights>(mapped_memory->data(),
11861201
mapped_memory->size(),
11871202
mapped_memory);
@@ -1191,7 +1206,8 @@ std::shared_ptr<ov::npuw::CompiledModel> ov::npuw::CompiledModel::deserialize(
11911206
// FIXME: prolong lifetime of ov::Model for import with MODEL_PTR.
11921207
// Unclear why it's needed, but without saving consts_cache until bank evaluation,
11931208
// the memory is freed somewhere.
1194-
compiled->m_import_weights_ctx = WeightsContext(weights, weights_path, consts_cache, compiled->m_bf16_consts);
1209+
compiled->m_import_weights_ctx =
1210+
WeightsContext(weights, weights_path, consts_cache, compiled->m_bf16_consts, fd_getter);
11951211

11961212
// Deserialize compiled submodels
11971213
std::size_t subm_size = 0;

src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,14 @@ ov::Tensor Const::eval() const {
6464
if (!m_weights_path.empty()) {
6565
NPUW_ASSERT(!m_read_from_bin &&
6666
"Trying to read weight from weights file, but the weight has been already deserialized!");
67-
auto mapped_memory = ov::load_mmap_object(m_weights_path);
67+
std::shared_ptr<ov::MappedMemory> mapped_memory;
68+
// Use fd_getter if available, otherwise use default mmap
69+
if (m_fd_getter) {
70+
int fd = m_fd_getter(m_weights_path);
71+
mapped_memory = ov::load_mmap_object(fd);
72+
} else {
73+
mapped_memory = ov::load_mmap_object(m_weights_path);
74+
}
6875
m_mmaped_weights =
6976
std::make_shared<ov::npuw::s11n::Weights>(mapped_memory->data(), mapped_memory->size(), mapped_memory);
7077
return ov::Tensor(m_cached_type, m_cached_shape, m_mmaped_weights->get_ptr(m_offset));
@@ -118,6 +125,8 @@ void Const::read_weight(const ov::npuw::s11n::WeightsContext& ctx) {
118125
NPUW_ASSERT(!ctx.weights_path.empty());
119126
// Just save weights_path for the eval() to call the actual mmap.
120127
m_weights_path = ctx.weights_path;
128+
// Also save fd_getter if available
129+
m_fd_getter = ctx.fd_getter;
121130
}
122131
} else {
123132
auto it = ctx.consts_cache.find({m_offset, m_byte_size});

src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ class Const {
108108
std::size_t m_byte_size = 0;
109109
ov::Tensor m_read_from_bin;
110110
std::string m_weights_path;
111+
ov::FdGetterType m_fd_getter = nullptr;
111112
mutable ov::npuw::s11n::WeightsPtr m_mmaped_weights = nullptr;
112113
// FIXME: special case when a new Constant was added into the model,
113114
// then made into LazyTensor during folding. We need to keep a copy of it,

src/plugins/intel_npu/src/plugin/npuw/serialization.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,13 @@ ov::npuw::s11n::WeightsContext::WeightsContext(bool _is_weightless,
2828
ov::npuw::s11n::WeightsContext::WeightsContext(const ov::npuw::s11n::WeightsPtr& _weights,
2929
const std::string& _weights_path,
3030
const s11n::WeightsContext::ConstsCache& _consts_cache,
31-
const BF16Cache& _bf16_consts)
31+
const BF16Cache& _bf16_consts,
32+
const ov::FdGetterType& _fd_getter)
3233
: weights(_weights),
3334
weights_path(_weights_path),
3435
consts_cache(_consts_cache),
35-
bf16_consts(_bf16_consts) {
36+
bf16_consts(_bf16_consts),
37+
fd_getter(_fd_getter) {
3638
is_weightless = _weights || !_consts_cache.empty();
3739
}
3840

src/plugins/intel_npu/src/plugin/npuw/serialization.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <unordered_set>
1818
#include <vector>
1919

20+
#include "openvino/runtime/properties.hpp"
21+
2022
namespace ov {
2123
namespace npuw {
2224
namespace s11n {
@@ -126,7 +128,8 @@ struct WeightsContext {
126128
WeightsContext(const ov::npuw::s11n::WeightsPtr& _weights,
127129
const std::string& _weights_path,
128130
const ConstsCache& _consts_cache,
129-
const BF16Cache& _bf16_consts);
131+
const BF16Cache& _bf16_consts,
132+
const ov::FdGetterType& _fd_getter = nullptr);
130133

131134
WeightsContext& operator=(const WeightsContext& other) = default;
132135

@@ -141,6 +144,7 @@ struct WeightsContext {
141144
std::string weights_path;
142145
ConstsCache consts_cache;
143146
BF16Cache bf16_consts;
147+
ov::FdGetterType fd_getter = nullptr;
144148
};
145149

146150
BF16Cache get_bf16_consts(const std::shared_ptr<ov::Model>& model);

0 commit comments

Comments
 (0)