-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Open
Description
问题描述
针对gpu加载时被阻塞的情况,做了以下改动
- 主线程启动子线程,尝试在GPU端加载ncnn模型;
- 主线程监控子线程,若超时(判定GPU加载阻塞),则降级到CPU加载模型(此分支逻辑正常,无崩溃);
- 若子线程未超时且GPU加载成功,主线程尝试重新在GPU端加载同一模型到主线程的ncnn::Net实例 时,会直接崩溃;
- 主线程创建Net对象,然后在子线程完成加载(加载成功),主线程复用也崩溃,只能重新加载,但重新加载仍崩溃;
- 不加载模型,只create_gpu_instance; destroy_gpu_instance; 没有问题;
- 平台差异:Windows 下上述逻辑完全正常,崩溃发生在Android端。
问题模型
- 正常yolo11n模型
- 单层relu模型
- 单层conv模型
相关代码
class ModelLoader {
private:
std::atomic<bool> load_finished{false};
std::atomic<bool> load_success{false};
int load_timeout_ms_ = 3000; // 3秒超时阈值
std::unique_ptr<NcnnOptionManager> ncnn_option_manager = std::make_unique<NcnnOptionManager>();
// 工具函数:写入GPU兼容标记(业务逻辑,不影响崩溃)
void write_gpu_compat_flag(GPU_STATUS) {}
void cardsLog(const char* msg) { __android_log_print(ANDROID_LOG_INFO, "ModelLoader", "%s", msg); }
// 核心加载函数
bool loadModel(std::string mdPath_p, std::string mdPath_b, ncnn::Net* _Net) {
if (mdPath_p.empty() || mdPath_b.empty()) { return false; }
int loadedParam = _Net->load_param(mdPath_p.c_str());
if (loadedParam != 0) return false;
int loadedBin = _Net->load_model(mdPath_b.c_str());
return loadedBin == 0;
}
// 加载核心逻辑(区分CPU/GPU)
LoadStatus load_model_core(const std::string& mdPath_p,
const std::string& mdPath_b,
ncnn::Net* net,
bool cpu_force) {
net->opt = ncnn_option_manager->configure(!cpu_force); // 非CPU强制则启用GPU
bool ok = loadModel(mdPath_p, mdPath_b, net);
return cpu_force ? (ok ? LoadStatus::CHANGE_SUCCESS : LoadStatus::CHANGE_FAILED)
: (ok ? LoadStatus::SUCCESS : LoadStatus::FAILED);
}
// GPU加载试探子线程
void gpu_test_worker(const std::string& mdPath_p, const std::string& mdPath_b) {
load_finished.store(false, std::memory_order_release);
load_success.store(false, std::memory_order_release);
try {
std::unique_ptr<NcnnOptionManager> opt_mgr(new NcnnOptionManager());
std::unique_ptr<ncnn::Net> test_net(new ncnn::Net());
test_net->opt = opt_mgr->configure(true); // 子线程启用GPU加载
bool test_ok = loadModel(mdPath_p, mdPath_b, test_net.get());
load_success.store(test_ok, std::memory_order_release);
test_net.reset(); // 释放子线程Net
} catch (...) {
load_success.store(false, std::memory_order_release);
}
load_finished.store(true, std::memory_order_release);
}
public:
LoadStatus try_gpu_first(const std::string& mdPath_p,
const std::string& mdPath_b,
ncnn::Net* net) {
load_finished.store(false, std::memory_order_release);
load_success.store(false, std::memory_order_release);
// 启动GPU试探子线程
std::thread test_thread(&ModelLoader::gpu_test_worker, this, mdPath_p, mdPath_b);
// 主线程超时监控
bool timeout = false;
const auto start = std::chrono::steady_clock::now();
while (true) {
const auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::steady_clock::now() - start
).count();
if (elapsed_ms >= load_timeout_ms_) {
timeout = true;
break;
}
if (load_finished.load(std::memory_order_acquire)) {
timeout = false;
break;
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
// 处理试探结果
LoadStatus status = LoadStatus::FAILED;
if (timeout) {
// 超时降级CPU:此分支无崩溃
write_gpu_compat_flag(GPU_STATUS::BLOCK_NO_GPU);
if (test_thread.joinable()) test_thread.detach();
cardsLog("阻塞,降级到CPU!");
status = load_model_core(mdPath_p, mdPath_b, net, true);
} else {
// 未超时:主线程尝试GPU加载 → 此处崩溃
cardsLog("子线程GPU加载完成,主线程开始GPU加载!");
if (test_thread.joinable()) test_thread.join();
const bool gpu_load_ok = load_success.load(std::memory_order_acquire);
if (gpu_load_ok) {
cardsLog("GPU加载成功,主线程重新GPU加载模型");
write_gpu_compat_flag(GPU_STATUS::UNBLOCK_GPU);
// 崩溃点:调用load_model_core并传入false(启用GPU)
status = load_model_core(mdPath_p, mdPath_b, net, false);
} else {
write_gpu_compat_flag(GPU_STATUS::UNBLOCK_NO_GPU);
status = load_model_core(mdPath_p, mdPath_b, net, true);
}
}
return status;
}
};dosubot, G61be and BlueInstruction
Metadata
Metadata
Assignees
Labels
No labels