Skip to content

Commit 941c00b

Browse files
committed
Fix crash due to camera error DCAMERR_LOSTFRAME
1 parent 5d412f2 commit 941c00b

5 files changed

Lines changed: 105 additions & 33 deletions

File tree

src/device/hamamatsu/hamamatsu_dcam.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,17 @@ Status DCam::WaitFrameReady(uint32_t timeout_ms)
427427
// Expected when we abort the wait when stopping acquisition
428428
return absl::CancelledError("dcamwait is aborted");
429429
}
430-
return absl::InternalError(
430+
if (err == DCAMERR_LOSTFRAME) {
431+
return absl::DataLossError(
432+
"dcamwait_start failed: DCAMERR_LOSTFRAME (frame data is "
433+
"lost)");
434+
}
435+
if (err == DCAMERR_MISSINGFRAME_TROUBLE) {
436+
return absl::DataLossError(
437+
"dcamwait_start failed: DCAMERR_MISSINGFRAME_TROUBLE "
438+
"(frame data is lost due to problems in the low level driver)");
439+
}
440+
return absl::UnknownError(
431441
fmt::format("dcamwait_start(FRAMEREADY, {}): {}", waitStart.timeout,
432442
DCAMERR_ToString(err)));
433443
}

src/experimentcontrol.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,9 +297,13 @@ void ExperimentControl::runMultiChannelTask(std::string ndimage_name,
297297
is_busy = false;
298298
std::string message = fmt::format("Error in task: {}", e.what());
299299
LOG_ERROR(message);
300+
// Set to Ready state for now, so that we can retry without restarting
301+
// the program
302+
// TODO check whether StopAcquisition was successful,
303+
// if not, we should set to Error state
300304
SendEvent({
301305
.type = EventType::TaskStateChanged,
302-
.value = "Error",
306+
.value = "Ready",
303307
});
304308
SendEvent({
305309
.type = EventType::TaskMessage,

src/task/live_view_task.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,19 @@ void LiveViewTask::Run()
2727
try {
2828
for (;;) {
2929
StatusOr<ImageData> frame = GetFrame();
30-
if (!status.ok()) {
31-
throw std::runtime_error(status.ToString());
32-
}
33-
if (frame.value().empty()) {
30+
if (absl::IsCancelled(frame.status())) {
3431
is_running = false;
35-
exp->Images()->SetLiveViewFrame(frame.value());
32+
exp->Images()->SetLiveViewFrame(ImageData());
3633
return;
3734
}
35+
if (absl::IsDataLoss(frame.status())) {
36+
LOG_WARN("ignore frame data loss ({})",
37+
frame.status().ToString());
38+
continue;
39+
}
40+
if (!status.ok()) {
41+
throw std::runtime_error(status.ToString());
42+
}
3843
exp->Images()->SetLiveViewFrame(frame.value());
3944
}
4045
} catch (std::exception &e) {
@@ -126,10 +131,14 @@ Status LiveViewTask::StartAcquisition()
126131
StatusOr<ImageData> LiveViewTask::GetFrame()
127132
{
128133
Status status = dcam->WaitFrameReady(1000);
134+
if (absl::IsCancelled(status) || absl::IsDataLoss(status)) {
135+
return status;
136+
}
129137
if (!status.ok()) {
130-
LOG_DEBUG("[{}] WaitFrameReady returned false, which indicates ABORT",
131-
task_name);
132-
return ImageData();
138+
LOG_ERROR("[{}] WaitFrameReady failed: {}", task_name,
139+
status.ToString());
140+
return absl::InternalError("WaitFrameReady failed: " +
141+
status.ToString());
133142
}
134143

135144
// Get latest frame

src/task/multi_channel_task.cpp

Lines changed: 70 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -147,19 +147,21 @@ Status MultiChannelTask::ExposeFrame(int i_ch,
147147
return status;
148148
}
149149

150-
ImageData
150+
StatusOr<ImageData>
151151
MultiChannelTask::GetFrame(int i_ch,
152152
std::chrono::system_clock::time_point *timestamp)
153153
{
154154
Channel channel = channels[i_ch];
155155
Status status = dcam->WaitFrameReady(1000);
156156
if (!status.ok()) {
157-
// This should not happen
158-
// Log and then continue anyways to see whether it is possible to get
159-
// frame data if there is no data, throw exception there
160-
LOG_WARN(
161-
"[{}][{}] WaitFrameReady returned false, which indicates ABORT",
162-
ndimage_name, i_ch);
157+
// error DCAMERR_LOSTFRAME can happen here in ~1/5000 chance
158+
// Calling WaitFrameReady again won't help, and can only get
159+
// DCAMERR_TIMEOUT.
160+
// So it cannot be recovered here, and let's return
161+
// the error.
162+
LOG_ERROR("[{}][{}] WaitFrameReady failed: {}", ndimage_name, i_ch + 1,
163+
status.ToString());
164+
return status;
163165
} else {
164166
LOG_DEBUG("[{}][{}] Frame ready [{:.1f} ms after exposure end]",
165167
ndimage_name, i_ch + 1, sw_exposure_end.Milliseconds());
@@ -168,8 +170,11 @@ MultiChannelTask::GetFrame(int i_ch,
168170
utils::StopWatch sw;
169171
StatusOr<ImageData> frame = dcam->GetFrame(i_ch, timestamp);
170172
if (!frame.ok()) {
171-
throw std::runtime_error(
172-
"GetFrame returned nullptr without throwing an exception");
173+
std::string error_msg =
174+
fmt::format("[{}][{}] GetFrame failed: {}", ndimage_name, i_ch + 1,
175+
frame.status().ToString());
176+
LOG_ERROR(error_msg);
177+
return frame.status();
173178
}
174179
LOG_DEBUG("[{}][{}] Get frame [{:.1f} ms]", ndimage_name, i_ch + 1,
175180
sw.Milliseconds());
@@ -281,14 +286,22 @@ Status MultiChannelTask::Acquire(std::string ndimage_name,
281286
//
282287
utils::StopWatch sw_frame;
283288
utils::StopWatch sw_channel;
289+
int i_retry = 0;
284290
try {
285291
for (int i_ch = 0; i_ch < channels.size(); i_ch++) {
286-
status = exp->Channels()->WaitSwitchChannel();
287-
if (!status.ok()) {
288-
LOG_ERROR("[{}][{}/{}] SwitchChannel failed: {}", ndimage_name,
289-
i_ch + 1, channels.size(), status.ToString());
290-
StopAcqusition();
291-
return status;
292+
if (i_retry > 0) {
293+
LOG_INFO("[{}][{}/{}] retry {} ", ndimage_name, i_ch + 1,
294+
channels.size(), i_retry);
295+
}
296+
297+
if (i_retry == 0) {
298+
status = exp->Channels()->WaitSwitchChannel();
299+
if (!status.ok()) {
300+
LOG_ERROR("[{}][{}/{}] SwitchChannel failed: {}",
301+
ndimage_name, i_ch + 1, channels.size(),
302+
status.ToString());
303+
goto cleanup;
304+
}
292305
}
293306

294307
channel = channels[i_ch];
@@ -307,8 +320,7 @@ Status MultiChannelTask::Acquire(std::string ndimage_name,
307320
if (!status.ok()) {
308321
LOG_ERROR("[{}][{}/{}] ExposeFrame failed: {}", ndimage_name,
309322
i_ch + 1, channels.size(), status.ToString());
310-
StopAcqusition();
311-
return status;
323+
goto cleanup;
312324
}
313325
if (i_ch + 1 < channels.size()) {
314326
sw_channel.Reset();
@@ -319,7 +331,34 @@ Status MultiChannelTask::Acquire(std::string ndimage_name,
319331
}
320332

321333
std::chrono::system_clock::time_point timestamp;
322-
ImageData data = GetFrame(i_ch, &timestamp);
334+
StatusOr<ImageData> data = GetFrame(i_ch, &timestamp);
335+
status = data.status();
336+
if (!status.ok()) {
337+
if (absl::IsDataLoss(status)) {
338+
// This is a rare error (~1/5000 chance), try recovering
339+
if (i_retry < 2) {
340+
// Try recovering the error
341+
LOG_ERROR("[{}][{}/{}] [Retry {}] GetFrame failed: {}. "
342+
"Reacquire the channel after 500 ms",
343+
ndimage_name, i_ch + 1, channels.size(),
344+
i_retry, status.ToString());
345+
std::this_thread::sleep_for(
346+
std::chrono::milliseconds(500));
347+
i_retry++;
348+
i_ch--;
349+
continue;
350+
} else {
351+
LOG_ERROR("[{}][{}/{}] [Retry {}] GetFrame failed: {}."
352+
"Max retries reached. Failed",
353+
ndimage_name, i_ch + 1, channels.size(),
354+
i_retry, status.ToString());
355+
goto cleanup;
356+
}
357+
}
358+
LOG_ERROR("[{}][{}/{}] GetFrame failed: {}", ndimage_name,
359+
i_ch + 1, channels.size(), status.ToString());
360+
goto cleanup;
361+
}
323362

324363
nlohmann::ordered_json new_metadata;
325364
new_metadata["timestamp"] =
@@ -343,15 +382,25 @@ Status MultiChannelTask::Acquire(std::string ndimage_name,
343382
new_metadata["device_property"][k.ToString()] = v;
344383
}
345384

346-
exp->Images()->AddImage(ndimage_name, i_ch, i_z, i_t, data,
385+
exp->Images()->AddImage(ndimage_name, i_ch, i_z, i_t, data.value(),
347386
new_metadata);
348387
LOG_INFO("[{}][{}] Frame completed [{:.0f} ms]", ndimage_name,
349388
i_ch + 1, sw_frame.Milliseconds());
389+
i_retry = 0;
350390
}
351391
} catch (std::exception &e) {
352-
LOG_ERROR("Unexpected exception during acquisition : {}", e.what());
392+
status = absl::UnknownError(fmt::format(
393+
"Unexpected exception during acquisition: {}", e.what()));
394+
}
395+
396+
cleanup:
397+
// Clean up after failed acquisition
398+
if (!status.ok()) {
399+
LOG_ERROR("Stopping acquisition due to error: {}", status.ToString());
353400
StopAcqusition();
354-
throw;
401+
double task_elapse_ms = sw_task.Milliseconds();
402+
LOG_ERROR("[{}] Task failed: {:.0f} ms", ndimage_name, task_elapse_ms);
403+
return status;
355404
}
356405

357406
StopAcqusition();

src/task/multi_channel_task.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ class MultiChannelTask : public EventSender {
3030
Status PrepareBuffer();
3131
Status StartAcqusition();
3232
Status ExposeFrame(int i_ch, PropertyValueMap *property_snapshot);
33-
ImageData GetFrame(int i_ch,
34-
std::chrono::system_clock::time_point *timestamp);
33+
StatusOr<ImageData>
34+
GetFrame(int i_ch, std::chrono::system_clock::time_point *timestamp);
3535
void StopAcqusition();
3636

3737
private:

0 commit comments

Comments
 (0)