Skip to content

Commit ee21595

Browse files
committed
log offending ComputeCommand in allocation panic
1 parent 3ff9564 commit ee21595

File tree

1 file changed

+105
-11
lines changed

1 file changed

+105
-11
lines changed

Diff for: src/compute/src/compute_state.rs

+105-11
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@ use std::collections::{BTreeMap, BTreeSet};
1212
use std::num::NonZeroUsize;
1313
use std::ops::DerefMut;
1414
use std::rc::Rc;
15-
use std::sync::{mpsc, Arc};
15+
use std::sync::{Arc, mpsc};
1616
use std::time::{Duration, Instant};
1717

1818
use bytesize::ByteSize;
19-
use differential_dataflow::lattice::Lattice;
20-
use differential_dataflow::trace::{Cursor, TraceReader};
2119
use differential_dataflow::Hashable;
2220
use differential_dataflow::IntoOwned;
21+
use differential_dataflow::lattice::Lattice;
22+
use differential_dataflow::trace::{Cursor, TraceReader};
2323
use mz_compute_client::logging::LoggingConfig;
2424
use mz_compute_client::protocol::command::{
2525
ComputeCommand, ComputeParameters, InstanceConfig, Peek, PeekTarget,
@@ -30,30 +30,30 @@ use mz_compute_client::protocol::response::{
3030
StatusResponse, SubscribeResponse,
3131
};
3232
use mz_compute_types::dataflows::DataflowDescription;
33-
use mz_compute_types::plan::render_plan::RenderPlan;
3433
use mz_compute_types::plan::LirId;
34+
use mz_compute_types::plan::render_plan::RenderPlan;
3535
use mz_dyncfg::ConfigSet;
36-
use mz_expr::row::RowCollection;
3736
use mz_expr::SafeMfpPlan;
37+
use mz_expr::row::RowCollection;
3838
use mz_ore::cast::CastFrom;
3939
use mz_ore::collections::CollectionExt;
4040
use mz_ore::metrics::UIntGauge;
4141
use mz_ore::now::EpochMillis;
4242
use mz_ore::task::AbortOnDropHandle;
4343
use mz_ore::tracing::{OpenTelemetryContext, TracingHandle};
44+
use mz_persist_client::Diagnostics;
4445
use mz_persist_client::cache::PersistClientCache;
4546
use mz_persist_client::cfg::USE_CRITICAL_SINCE_SNAPSHOT;
4647
use mz_persist_client::read::ReadHandle;
47-
use mz_persist_client::Diagnostics;
4848
use mz_persist_types::codec_impls::UnitSchema;
4949
use mz_repr::fixed_length::ToDatumIter;
5050
use mz_repr::{DatumVec, Diff, GlobalId, Row, RowArena, Timestamp};
5151
use mz_storage_operators::stats::StatsCursor;
52+
use mz_storage_types::StorageDiff;
5253
use mz_storage_types::controller::CollectionMetadata;
5354
use mz_storage_types::dyncfgs::ORE_OVERFLOWING_BEHAVIOR;
5455
use mz_storage_types::sources::SourceData;
5556
use mz_storage_types::time_dependence::TimeDependence;
56-
use mz_storage_types::StorageDiff;
5757
use mz_txn_wal::operator::TxnsContext;
5858
use mz_txn_wal::txn_cache::TxnsCache;
5959
use timely::communication::Allocate;
@@ -63,7 +63,7 @@ use timely::progress::frontier::Antichain;
6363
use timely::scheduling::Scheduler;
6464
use timely::worker::Worker as TimelyWorker;
6565
use tokio::sync::{oneshot, watch};
66-
use tracing::{debug, error, info, span, warn, Level};
66+
use tracing::{Level, debug, error, info, span, warn};
6767
use uuid::Uuid;
6868

6969
use crate::arrangement::manager::{TraceBundle, TraceManager};
@@ -394,7 +394,102 @@ impl<'a, A: Allocate + 'static> ActiveComputeState<'a, A> {
394394
pub fn handle_compute_command(&mut self, cmd: ComputeCommand) {
395395
use ComputeCommand::*;
396396

397-
self.compute_state.command_history.push(cmd.clone());
397+
let cmd2 = mz_ore::panic::catch_unwind_str(|| cmd.clone()).unwrap_or_else(|panic| {
398+
error!("allocation error cloning compute command: {panic}");
399+
match cmd {
400+
CreateTimely { config, epoch } => {
401+
error!(" type=CreateTimely");
402+
error!(" config={config:?}");
403+
error!(" epoch={epoch:?}");
404+
}
405+
CreateInstance(instance_config) => {
406+
error!(" type=CreateInstance");
407+
error!(" instance_config={instance_config:?}");
408+
}
409+
InitializationComplete => {
410+
error!(" type=InitializationComplete");
411+
}
412+
AllowWrites => {
413+
error!(" type=AllowWrites");
414+
}
415+
UpdateConfiguration(compute_parameters) => {
416+
error!(" type=UpdateConfiguration");
417+
let ComputeParameters {
418+
workload_class,
419+
max_result_size,
420+
tracing,
421+
grpc_client,
422+
dyncfg_updates,
423+
} = compute_parameters;
424+
error!(" workload_class={workload_class:?}");
425+
error!(" max_result_size={max_result_size:?}");
426+
error!(" tracing={tracing:?}");
427+
error!(" grpc_client={grpc_client:?}");
428+
error!(" dyncfg_updates={dyncfg_updates:?}");
429+
}
430+
CreateDataflow(dataflow_description) => {
431+
error!(" type=CreateDataflow");
432+
let DataflowDescription {
433+
source_imports,
434+
index_imports,
435+
objects_to_build,
436+
index_exports,
437+
sink_exports,
438+
as_of,
439+
until,
440+
initial_storage_as_of,
441+
refresh_schedule,
442+
debug_name,
443+
time_dependence,
444+
} = dataflow_description;
445+
error!(" source_imports={source_imports:?}");
446+
error!(" index_imports={index_imports:?}");
447+
error!(" objects_to_build={objects_to_build:?}");
448+
error!(" index_exports={index_exports:?}");
449+
error!(" sink_exports={sink_exports:?}");
450+
error!(" as_of={as_of:?}");
451+
error!(" until={until:?}");
452+
error!(" initial_storage_as_of={initial_storage_as_of:?}");
453+
error!(" refresh_schedule={refresh_schedule:?}");
454+
error!(" debug_name={debug_name:?}");
455+
error!(" time_dependence={time_dependence:?}");
456+
}
457+
Schedule(global_id) => {
458+
error!(" type=Schedule");
459+
error!(" global_id={global_id:?}")
460+
}
461+
AllowCompaction { id, frontier } => {
462+
error!(" type=AllowCompaction");
463+
error!(" id={id:?}");
464+
error!(" frontier={frontier:?}");
465+
}
466+
Peek(peek) => {
467+
error!(" type=Peek");
468+
let mz_compute_client::protocol::command::Peek {
469+
target,
470+
literal_constraints,
471+
uuid,
472+
timestamp,
473+
finishing,
474+
map_filter_project,
475+
otel_ctx,
476+
} = peek;
477+
error!(" target={target:?}");
478+
error!(" literal_constraints={literal_constraints:?}");
479+
error!(" uuid={uuid:?}");
480+
error!(" timestamp={timestamp:?}");
481+
error!(" finishing={finishing:?}");
482+
error!(" map_filter_project={map_filter_project:?}");
483+
error!(" otel_ctx={otel_ctx:?}");
484+
}
485+
CancelPeek { uuid } => {
486+
error!(" type=CancelPeek");
487+
error!(" uuid={uuid:?}");
488+
}
489+
}
490+
panic!("abort");
491+
});
492+
self.compute_state.command_history.push(cmd2);
398493

399494
// Record the command duration, per worker and command kind.
400495
let timer = self
@@ -1500,8 +1595,7 @@ impl IndexPeek {
15001595
let copies: usize = if copies.is_negative() {
15011596
return Err(format!(
15021597
"Invalid data in source, saw retractions ({}) for row that does not exist: {:?}",
1503-
-copies,
1504-
&*borrow,
1598+
-copies, &*borrow,
15051599
));
15061600
} else {
15071601
copies.into_inner().try_into().unwrap()

0 commit comments

Comments
 (0)