@@ -262,11 +262,14 @@ func buildPowerOffRule() *OperationRule {
262262 }
263263}
264264
265- // buildRestartRule creates the hardcoded default rule for restart operations
265+ // buildRestartRule creates the hardcoded default rule for graceful restart.
266+ // Each stage explicitly specifies the power operation to avoid inheriting the
267+ // composite "restart" operation from the task context (which would send
268+ // BMC GRACEFUL_RESTART — an atomic off→on — instead of separate off/on).
266269func buildRestartRule () * OperationRule {
267270 return & OperationRule {
268271 Name : "Hardcoded Default Restart" ,
269- Description : "Composite rule: power off all components then power on" ,
272+ Description : "Composite rule: graceful power off all components then power on" ,
270273 OperationType : common .TaskTypePowerControl ,
271274 OperationCode : SequenceRestart ,
272275 RuleDefinition : RuleDefinition {
@@ -276,7 +279,7 @@ func buildRestartRule() *OperationRule {
276279 {
277280 ComponentType : devicetypes .ComponentTypeCompute ,
278281 Stage : 1 ,
279- MaxParallel : 0 , // All components together (legacy behavior)
282+ MaxParallel : 0 ,
280283 Timeout : 20 * time .Minute ,
281284 RetryPolicy : & RetryPolicy {
282285 MaxAttempts : 3 ,
@@ -285,10 +288,12 @@ func buildRestartRule() *OperationRule {
285288 },
286289 MainOperation : ActionConfig {
287290 Name : ActionPowerControl ,
291+ Parameters : map [string ]any {
292+ ParamOperation : "power_off" ,
293+ },
288294 },
289295 PostOperation : []ActionConfig {
290296 {
291- // Verify power status after operation
292297 Name : ActionVerifyPowerStatus ,
293298 Timeout : 3 * time .Minute ,
294299 PollInterval : 10 * time .Second ,
@@ -301,7 +306,7 @@ func buildRestartRule() *OperationRule {
301306 {
302307 ComponentType : devicetypes .ComponentTypeNVLSwitch ,
303308 Stage : 2 ,
304- MaxParallel : 0 , // All components together (legacy behavior)
309+ MaxParallel : 0 ,
305310 Timeout : 15 * time .Minute ,
306311 RetryPolicy : & RetryPolicy {
307312 MaxAttempts : 3 ,
@@ -310,10 +315,12 @@ func buildRestartRule() *OperationRule {
310315 },
311316 MainOperation : ActionConfig {
312317 Name : ActionPowerControl ,
318+ Parameters : map [string ]any {
319+ ParamOperation : "power_off" ,
320+ },
313321 },
314322 PostOperation : []ActionConfig {
315323 {
316- // Verify power status after operation
317324 Name : ActionVerifyPowerStatus ,
318325 Timeout : 3 * time .Minute ,
319326 PollInterval : 10 * time .Second ,
@@ -326,7 +333,7 @@ func buildRestartRule() *OperationRule {
326333 {
327334 ComponentType : devicetypes .ComponentTypePowerShelf ,
328335 Stage : 3 ,
329- MaxParallel : 0 , // All components together (legacy behavior)
336+ MaxParallel : 0 ,
330337 Timeout : 10 * time .Minute ,
331338 RetryPolicy : & RetryPolicy {
332339 MaxAttempts : 3 ,
@@ -335,10 +342,12 @@ func buildRestartRule() *OperationRule {
335342 },
336343 MainOperation : ActionConfig {
337344 Name : ActionPowerControl ,
345+ Parameters : map [string ]any {
346+ ParamOperation : "power_off" ,
347+ },
338348 },
339349 PostOperation : []ActionConfig {
340350 {
341- // Verify power status after operation
342351 Name : ActionVerifyPowerStatus ,
343352 Timeout : 3 * time .Minute ,
344353 PollInterval : 10 * time .Second ,
@@ -352,7 +361,7 @@ func buildRestartRule() *OperationRule {
352361 {
353362 ComponentType : devicetypes .ComponentTypePowerShelf ,
354363 Stage : 4 ,
355- MaxParallel : 0 , // All components together (legacy behavior)
364+ MaxParallel : 0 ,
356365 Timeout : 10 * time .Minute ,
357366 RetryPolicy : & RetryPolicy {
358367 MaxAttempts : 3 ,
@@ -361,10 +370,12 @@ func buildRestartRule() *OperationRule {
361370 },
362371 MainOperation : ActionConfig {
363372 Name : ActionPowerControl ,
373+ Parameters : map [string ]any {
374+ ParamOperation : "power_on" ,
375+ },
364376 },
365377 PostOperation : []ActionConfig {
366378 {
367- // Verify power status after operation
368379 Name : ActionVerifyPowerStatus ,
369380 Timeout : 3 * time .Minute ,
370381 PollInterval : 10 * time .Second ,
@@ -373,8 +384,6 @@ func buildRestartRule() *OperationRule {
373384 },
374385 },
375386 {
376- // Wait for downstream components to become
377- // reachable
378387 Name : ActionVerifyReachability ,
379388 Timeout : 3 * time .Minute ,
380389 PollInterval : 10 * time .Second ,
@@ -390,7 +399,7 @@ func buildRestartRule() *OperationRule {
390399 {
391400 ComponentType : devicetypes .ComponentTypeNVLSwitch ,
392401 Stage : 5 ,
393- MaxParallel : 0 , // All components together (legacy behavior)
402+ MaxParallel : 0 ,
394403 Timeout : 15 * time .Minute ,
395404 RetryPolicy : & RetryPolicy {
396405 MaxAttempts : 3 ,
@@ -399,10 +408,12 @@ func buildRestartRule() *OperationRule {
399408 },
400409 MainOperation : ActionConfig {
401410 Name : ActionPowerControl ,
411+ Parameters : map [string ]any {
412+ ParamOperation : "power_on" ,
413+ },
402414 },
403415 PostOperation : []ActionConfig {
404416 {
405- // Verify power status after operation
406417 Name : ActionVerifyPowerStatus ,
407418 Timeout : 3 * time .Minute ,
408419 PollInterval : 10 * time .Second ,
@@ -415,7 +426,7 @@ func buildRestartRule() *OperationRule {
415426 {
416427 ComponentType : devicetypes .ComponentTypeCompute ,
417428 Stage : 6 ,
418- MaxParallel : 0 , // All components together (legacy behavior)
429+ MaxParallel : 0 ,
419430 Timeout : 20 * time .Minute ,
420431 RetryPolicy : & RetryPolicy {
421432 MaxAttempts : 3 ,
@@ -424,10 +435,12 @@ func buildRestartRule() *OperationRule {
424435 },
425436 MainOperation : ActionConfig {
426437 Name : ActionPowerControl ,
438+ Parameters : map [string ]any {
439+ ParamOperation : "power_on" ,
440+ },
427441 },
428442 PostOperation : []ActionConfig {
429443 {
430- // Verify power status after operation
431444 Name : ActionVerifyPowerStatus ,
432445 Timeout : 3 * time .Minute ,
433446 PollInterval : 10 * time .Second ,
@@ -987,18 +1000,21 @@ func buildIngestRule() *OperationRule {
9871000 }
9881001}
9891002
990- // buildForceRestartRule creates the hardcoded default rule for
991- // forced restart operations (no verification)
1003+ // buildForceRestartRule creates the hardcoded default rule for forced restart
1004+ // operations. Skips per-stage verification for speed but verifies the "off"
1005+ // state before proceeding to power on, ensuring a real power cycle occurs.
9921006func buildForceRestartRule () * OperationRule {
9931007 return & OperationRule {
9941008 Name : "Hardcoded Default Force Restart" ,
995- Description : "Forced restart: power off then on (no verification) " ,
1009+ Description : "Forced restart: power off, verify off, then power on " ,
9961010 OperationType : common .TaskTypePowerControl ,
9971011 OperationCode : SequenceForceRestart ,
9981012 RuleDefinition : RuleDefinition {
9991013 Version : CurrentRuleDefinitionVersion ,
10001014 Steps : []SequenceStep {
10011015 // === Power Off Sequence (Stages 1-3) ===
1016+ // Explicit force_power_off to avoid sending BMC FORCE_RESTART
1017+ // (which is an atomic off→on cycle, not just off).
10021018 {
10031019 ComponentType : devicetypes .ComponentTypeCompute ,
10041020 Stage : 1 ,
@@ -1011,6 +1027,9 @@ func buildForceRestartRule() *OperationRule {
10111027 },
10121028 MainOperation : ActionConfig {
10131029 Name : ActionPowerControl ,
1030+ Parameters : map [string ]any {
1031+ ParamOperation : "force_power_off" ,
1032+ },
10141033 },
10151034 PostOperation : []ActionConfig {
10161035 {
@@ -1033,6 +1052,9 @@ func buildForceRestartRule() *OperationRule {
10331052 },
10341053 MainOperation : ActionConfig {
10351054 Name : ActionPowerControl ,
1055+ Parameters : map [string ]any {
1056+ ParamOperation : "force_power_off" ,
1057+ },
10361058 },
10371059 PostOperation : []ActionConfig {
10381060 {
@@ -1055,22 +1077,86 @@ func buildForceRestartRule() *OperationRule {
10551077 },
10561078 MainOperation : ActionConfig {
10571079 Name : ActionPowerControl ,
1080+ Parameters : map [string ]any {
1081+ ParamOperation : "force_power_off" ,
1082+ },
10581083 },
10591084 PostOperation : []ActionConfig {
10601085 {
1061- // Brief pause between off and on
10621086 Name : ActionSleep ,
10631087 Parameters : map [string ]any {
10641088 ParamDuration : 5 * time .Second ,
10651089 },
10661090 },
10671091 },
10681092 },
1069- // === Power On Sequence (Stages 4-6) ===
1093+ // === Verify Off Stage (Stage 4) ===
1094+ // Confirm all components are actually off before powering
1095+ // back on. Without this, a silent power-off failure would
1096+ // result in a "successful restart" that never power-cycled.
10701097 {
10711098 ComponentType : devicetypes .ComponentTypePowerShelf ,
10721099 Stage : 4 ,
10731100 MaxParallel : 0 ,
1101+ Timeout : 2 * time .Minute ,
1102+ RetryPolicy : & RetryPolicy {
1103+ MaxAttempts : 2 ,
1104+ InitialInterval : 5 * time .Second ,
1105+ BackoffCoefficient : 1.5 ,
1106+ },
1107+ MainOperation : ActionConfig {
1108+ Name : ActionVerifyPowerStatus ,
1109+ Timeout : 1 * time .Minute ,
1110+ PollInterval : 5 * time .Second ,
1111+ Parameters : map [string ]any {
1112+ ParamExpectedStatus : "off" ,
1113+ },
1114+ },
1115+ },
1116+ {
1117+ ComponentType : devicetypes .ComponentTypeNVLSwitch ,
1118+ Stage : 4 , // Parallel with PowerShelf
1119+ MaxParallel : 0 ,
1120+ Timeout : 2 * time .Minute ,
1121+ RetryPolicy : & RetryPolicy {
1122+ MaxAttempts : 2 ,
1123+ InitialInterval : 5 * time .Second ,
1124+ BackoffCoefficient : 1.5 ,
1125+ },
1126+ MainOperation : ActionConfig {
1127+ Name : ActionVerifyPowerStatus ,
1128+ Timeout : 1 * time .Minute ,
1129+ PollInterval : 5 * time .Second ,
1130+ Parameters : map [string ]any {
1131+ ParamExpectedStatus : "off" ,
1132+ },
1133+ },
1134+ },
1135+ {
1136+ ComponentType : devicetypes .ComponentTypeCompute ,
1137+ Stage : 4 , // Parallel with others
1138+ MaxParallel : 0 ,
1139+ Timeout : 2 * time .Minute ,
1140+ RetryPolicy : & RetryPolicy {
1141+ MaxAttempts : 2 ,
1142+ InitialInterval : 5 * time .Second ,
1143+ BackoffCoefficient : 1.5 ,
1144+ },
1145+ MainOperation : ActionConfig {
1146+ Name : ActionVerifyPowerStatus ,
1147+ Timeout : 1 * time .Minute ,
1148+ PollInterval : 5 * time .Second ,
1149+ Parameters : map [string ]any {
1150+ ParamExpectedStatus : "off" ,
1151+ },
1152+ },
1153+ },
1154+ // === Power On Sequence (Stages 5-7) ===
1155+ // Explicit force_power_on to match the force semantics.
1156+ {
1157+ ComponentType : devicetypes .ComponentTypePowerShelf ,
1158+ Stage : 5 ,
1159+ MaxParallel : 0 ,
10741160 Timeout : 10 * time .Minute ,
10751161 RetryPolicy : & RetryPolicy {
10761162 MaxAttempts : 3 ,
@@ -1079,6 +1165,9 @@ func buildForceRestartRule() *OperationRule {
10791165 },
10801166 MainOperation : ActionConfig {
10811167 Name : ActionPowerControl ,
1168+ Parameters : map [string ]any {
1169+ ParamOperation : "force_power_on" ,
1170+ },
10821171 },
10831172 PostOperation : []ActionConfig {
10841173 {
@@ -1091,7 +1180,7 @@ func buildForceRestartRule() *OperationRule {
10911180 },
10921181 {
10931182 ComponentType : devicetypes .ComponentTypeNVLSwitch ,
1094- Stage : 5 ,
1183+ Stage : 6 ,
10951184 MaxParallel : 0 ,
10961185 Timeout : 15 * time .Minute ,
10971186 RetryPolicy : & RetryPolicy {
@@ -1101,6 +1190,9 @@ func buildForceRestartRule() *OperationRule {
11011190 },
11021191 MainOperation : ActionConfig {
11031192 Name : ActionPowerControl ,
1193+ Parameters : map [string ]any {
1194+ ParamOperation : "force_power_on" ,
1195+ },
11041196 },
11051197 PostOperation : []ActionConfig {
11061198 {
@@ -1113,7 +1205,7 @@ func buildForceRestartRule() *OperationRule {
11131205 },
11141206 {
11151207 ComponentType : devicetypes .ComponentTypeCompute ,
1116- Stage : 6 ,
1208+ Stage : 7 ,
11171209 MaxParallel : 0 ,
11181210 Timeout : 20 * time .Minute ,
11191211 RetryPolicy : & RetryPolicy {
@@ -1123,22 +1215,24 @@ func buildForceRestartRule() *OperationRule {
11231215 },
11241216 MainOperation : ActionConfig {
11251217 Name : ActionPowerControl ,
1218+ Parameters : map [string ]any {
1219+ ParamOperation : "force_power_on" ,
1220+ },
11261221 },
11271222 PostOperation : []ActionConfig {
11281223 {
1129- // Brief settle time before final verification
11301224 Name : ActionSleep ,
11311225 Parameters : map [string ]any {
11321226 ParamDuration : 10 * time .Second ,
11331227 },
11341228 },
11351229 },
11361230 },
1137- // === Final Verification Stage (Stage 7 ) ===
1138- // Verify all components in parallel
1231+ // === Final Verification Stage (Stage 8 ) ===
1232+ // Verify all components are back on
11391233 {
11401234 ComponentType : devicetypes .ComponentTypePowerShelf ,
1141- Stage : 7 ,
1235+ Stage : 8 ,
11421236 MaxParallel : 0 ,
11431237 Timeout : 2 * time .Minute ,
11441238 RetryPolicy : & RetryPolicy {
@@ -1157,7 +1251,7 @@ func buildForceRestartRule() *OperationRule {
11571251 },
11581252 {
11591253 ComponentType : devicetypes .ComponentTypeNVLSwitch ,
1160- Stage : 7 , // Parallel with PowerShelf
1254+ Stage : 8 , // Parallel with PowerShelf
11611255 MaxParallel : 0 ,
11621256 Timeout : 2 * time .Minute ,
11631257 RetryPolicy : & RetryPolicy {
@@ -1176,7 +1270,7 @@ func buildForceRestartRule() *OperationRule {
11761270 },
11771271 {
11781272 ComponentType : devicetypes .ComponentTypeCompute ,
1179- Stage : 7 , // Parallel with others
1273+ Stage : 8 , // Parallel with others
11801274 MaxParallel : 0 ,
11811275 Timeout : 2 * time .Minute ,
11821276 RetryPolicy : & RetryPolicy {
0 commit comments