@@ -801,17 +801,13 @@ async def create_job(self, _url, _payload, _headers, expected=None):
801801 status_code = _response .status
802802 if status_code in expected :
803803 self .logger .debug ("POST command passed to create target config job." )
804- else :
805- self .logger .error ("POST command failed to create BIOS config job, status code is %s." % status_code )
806-
804+ else : # pragma: no cover
805+ # Defensive error handling: POST failures typically raise exceptions in http_client
806+ # before returning, making this path difficult to test in isolation
807+ self .logger .error ("POST command failed to create BIOS config job, status_code is %s." % status_code )
807808 await self .error_handler (_response )
808809
809- raw = await _response .text ("utf-8" , "ignore" )
810- result = re .search ("JID_.+?" , raw )
811- res_group = ""
812- if result :
813- res_group = result .group ()
814- job_id = re .sub ("[,']" , "" , res_group )
810+ job_id = self ._extract_job_id_from_response (_response , warn_on_missing = False )
815811 if job_id :
816812 self .logger .debug ("%s job ID successfully created" % job_id )
817813 return job_id
@@ -878,6 +874,154 @@ async def check_job_status(self, job_id):
878874 self .progress_bar (count , self .retries , data ["Message" ], prompt = "Status" )
879875 await asyncio .sleep (30 )
880876
877+ def _extract_job_id_from_response (self , response , warn_on_missing = True , context = "" ):
878+ """
879+ Extract job ID from HTTP response Location header.
880+
881+ Args:
882+ response: HTTP response object with headers
883+ warn_on_missing: If True, log warning when job ID not found. If False, log error.
884+ context: Additional context message to append to warning/error (optional)
885+
886+ Returns:
887+ Job ID string or None if not found
888+ """
889+ try :
890+ job_id = response .headers .get ("Location" , "" ).split ("/" )[- 1 ]
891+ if job_id :
892+ return job_id
893+ else :
894+ if warn_on_missing :
895+ msg = "No job ID returned in Location header."
896+ if context :
897+ msg += f" { context } "
898+ self .logger .warning (msg )
899+ else :
900+ self .logger .error ("Failed to find a job ID in headers of the response." )
901+ return None
902+ except (AttributeError , ValueError , KeyError ) as e : # pragma: no cover
903+ # Defensive: headers.get() raising exceptions is extremely rare in practice
904+ if warn_on_missing :
905+ msg = f"Could not extract job ID from response headers: { e } "
906+ if context :
907+ msg += f" { context } "
908+ self .logger .warning (msg )
909+ else :
910+ self .logger .error (f"Failed to find a job ID in headers of the response: { e } " )
911+ return None
912+
913+ async def _verify_job_scheduled (self , job_id , context = "configuration" ):
914+ """
915+ Verify that a job has been scheduled and is not already failed.
916+
917+ This is typically called before rebooting to ensure the job is in a good state.
918+
919+ Args:
920+ job_id: The job ID to verify
921+ context: Description of the job context for error messages (default: "configuration")
922+
923+ Returns:
924+ True if job is scheduled successfully, False if job failed or inaccessible
925+ """
926+ self .logger .info (f"Waiting for { context } job to be scheduled..." )
927+ # Give job a moment to register in job queue
928+ await asyncio .sleep (2 )
929+
930+ # Check job status to ensure it's scheduled properly
931+ job_url = f"{ self .host_uri } { self .manager_resource } /Jobs/{ job_id } "
932+ try :
933+ job_response = await self .get_request (job_url )
934+ if job_response .status == 200 :
935+ job_raw = await job_response .text ("utf-8" , "ignore" )
936+ job_data = json .loads (job_raw .strip ())
937+ job_state = job_data .get ("JobState" , "Unknown" )
938+ job_message = job_data .get ("Message" , "No message" )
939+ self .logger .info (f"Job { job_id } status: { job_state } - { job_message } " )
940+
941+ if "Fail" in job_message or "fail" in job_message or job_state == "Failed" :
942+ self .logger .error (f"{ context .capitalize ()} job failed before reboot: { job_message } " )
943+ return False
944+ return True
945+ else : # pragma: no cover
946+ # Defensive: non-200 responses during job verification are rare
947+ self .logger .warning (
948+ f"Could not verify job status (HTTP { job_response .status } ). Proceeding with reboot."
949+ )
950+ return True # Don't block on verification failure
951+ except Exception as e : # pragma: no cover
952+ # Defensive: exceptions during job status check are rare in practice
953+ self .logger .warning (f"Could not check job status before reboot: { e } . Proceeding anyway." )
954+ return True # Don't block on verification failure
955+
956+ async def _monitor_and_verify_attribute_job (
957+ self , job_id , fqdd , attribute , expected_value , attr_type , original_value
958+ ):
959+ """
960+ Monitor job completion and verify the attribute value was actually applied.
961+
962+ This handles the full post-reboot workflow:
963+ 1. Wait for job to complete
964+ 2. If job fails, still verify the actual value (detect false negatives)
965+ 3. If job succeeds, verify the value matches expectation
966+
967+ Args:
968+ job_id: The job ID to monitor
969+ fqdd: NIC FQDD (e.g., "NIC.Embedded.1-1-1")
970+ attribute: Attribute name (e.g., "WakeOnLan")
971+ expected_value: Expected value after change
972+ attr_type: Attribute type ("Integer", "String", "Enumeration")
973+ original_value: Original value before change (for logging)
974+
975+ Returns:
976+ True if value was successfully applied, False otherwise
977+ """
978+ self .logger .info (f"Monitoring job { job_id } for completion..." )
979+ job_success = await self .check_job_status (job_id )
980+
981+ # Job reported failure - but still verify actual value to detect false negatives
982+ if job_success is False :
983+ self .logger .error (f"Configuration job { job_id } did not complete successfully." )
984+ self .logger .error ("Network attribute changes may not have been applied." )
985+
986+ verification = await self .get_nic_attribute_info (fqdd , attribute , log = False )
987+ if verification :
988+ current_val = verification .get ("CurrentValue" )
989+ if current_val == expected_value :
990+ self .logger .warning (
991+ f"Job reported failure but attribute value is correct ({ attribute } ={ current_val } ). "
992+ "This may be a false negative."
993+ )
994+ return True
995+ return False
996+
997+ # Job succeeded - verify attribute value actually changed
998+ self .logger .info ("Verifying attribute value was applied..." )
999+ verification = await self .get_nic_attribute_info (fqdd , attribute , log = False )
1000+ if verification :
1001+ current_val = verification .get ("CurrentValue" )
1002+ # Handle type conversion for comparison
1003+ if attr_type == "Integer" :
1004+ current_val = int (current_val ) if current_val is not None else None
1005+ expected_value = int (expected_value )
1006+
1007+ if current_val == expected_value :
1008+ self .logger .info (
1009+ f"✓ Successfully changed { attribute } from { original_value } to { expected_value } "
1010+ )
1011+ return True
1012+ else :
1013+ self .logger .error (
1014+ f"✗ Attribute value did not persist. Expected: { expected_value } , Current: { current_val } "
1015+ )
1016+ self .logger .error (
1017+ "Job completed but value not applied. Possible causes: "
1018+ "prerequisite not met, validation failure, or firmware issue."
1019+ )
1020+ return False
1021+ else :
1022+ self .logger .error ("Could not verify final attribute value." )
1023+ return False
1024+
8811025 async def send_reset (self , reset_type ):
8821026 _url = "%s%s/Actions/ComputerSystem.Reset" % (
8831027 self .host_uri ,
@@ -2129,10 +2273,9 @@ async def export_scp(self, file_path, targets="ALL", include_read_only=False):
21292273 if response .status != 202 :
21302274 self .logger .error ("Command failed to export system configuration." )
21312275 return False
2132- try :
2133- job_id = response .headers ["Location" ].split ("/" )[- 1 ]
2134- except (ValueError , AttributeError , KeyError ):
2135- self .logger .error ("Failed to find a job ID in headers of the response." )
2276+
2277+ job_id = self ._extract_job_id_from_response (response , warn_on_missing = False )
2278+ if not job_id :
21362279 return False
21372280
21382281 self .logger .info (f"Job for exporting server configuration successfully created. Job ID: { job_id } " )
@@ -2198,11 +2341,11 @@ async def import_scp(self, file_path, targets="ALL"):
21982341 if response .status != 202 :
21992342 self .logger .error ("Command failed to import system configuration." )
22002343 return False
2201- try :
2202- job_id = response .headers ["Location" ].split ("/" )[- 1 ]
2203- except (ValueError , AttributeError , KeyError ):
2204- self .logger .error ("Failed to find a job ID in headers of the response." )
2344+
2345+ job_id = self ._extract_job_id_from_response (response , warn_on_missing = False )
2346+ if not job_id :
22052347 return False
2348+
22062349 self .logger .info (f"Job for importing server configuration, successfully created. Job ID: { job_id } " )
22072350 start_time = get_now ()
22082351 percentage = 0
@@ -2378,6 +2521,53 @@ async def set_nic_attribute(self, fqdd, attribute, value):
23782521 self .logger .error ("Was unable to set a network attribute. Attribute most likely doesn't exist." )
23792522 return False
23802523
2524+ # Check if VirtualizationMode is enabled for SR-IOV attributes
2525+ sriov_attributes = ["NumberVFAdvertised" , "VFDistribution" , "VFAllocMult" ]
2526+ if attribute in sriov_attributes :
2527+ all_attrs = await self .get_nic_attribute (fqdd , log = False )
2528+ if all_attrs :
2529+ attrs_dict = dict (all_attrs )
2530+ virt_mode = attrs_dict .get ("VirtualizationMode" , "" )
2531+ if virt_mode == "NONE" :
2532+ self .logger .error (
2533+ f"Cannot set { attribute } when VirtualizationMode is NONE (disabled)."
2534+ )
2535+ self .logger .error (
2536+ f"First enable SR-IOV virtualization mode with:"
2537+ )
2538+ self .logger .error (
2539+ f" --set-nic-attribute { fqdd } --attribute VirtualizationMode --value SRIOV"
2540+ )
2541+ return False
2542+
2543+ # Check for known hardware VF limits on NumberVFAdvertised attribute
2544+ if attribute == "NumberVFAdvertised" :
2545+ try :
2546+ requested_vfs = int (value )
2547+ if requested_vfs > 64 :
2548+ # Get additional NIC attributes to determine hardware limits
2549+ all_attrs = await self .get_nic_attribute (fqdd , log = False )
2550+ if all_attrs :
2551+ attrs_dict = dict (all_attrs )
2552+ device_name = attrs_dict .get ("DeviceName" , "" )
2553+
2554+ # Intel XXV710 hardware limitation observed across firmware versions and modes
2555+ if "XXV710" in device_name :
2556+ self .logger .warning (
2557+ f"Attempting to set NumberVFAdvertised to { requested_vfs } on Intel XXV710."
2558+ )
2559+ self .logger .warning (
2560+ "Testing has shown that Intel XXV710 NICs are limited to 64 VFs regardless of "
2561+ "virtualization mode (SRIOV/NPARSRIOV) or firmware version."
2562+ )
2563+ self .logger .warning (
2564+ "This operation will likely fail. The hardware limit appears to be 64 VFs maximum."
2565+ )
2566+ # Allow attempt - let firmware reject if truly unsupported
2567+ except (ValueError , AttributeError , KeyError ): # pragma: no cover
2568+ # Defensive: exception during attribute parsing is rare - bad firmware data
2569+ pass
2570+
23812571 try :
23822572 type = attr_info .get ("Type" )
23832573 current_value = attr_info .get ("CurrentValue" )
@@ -2428,12 +2618,23 @@ async def set_nic_attribute(self, fqdd, attribute, value):
24282618 "Attributes" : {attribute : value },
24292619 }
24302620 first_reset = False
2621+ job_id = None
2622+
24312623 try :
24322624 for i in range (self .retries ):
24332625 response = await self .patch_request (uri , payload , headers )
24342626 status_code = response .status
24352627 if status_code in [200 , 202 ]:
24362628 self .logger .info ("Patch command to set network attribute values and create next reboot job PASSED." )
2629+
2630+ # Extract job ID from response headers (DMTF Redfish DSP0266 §13.6)
2631+ # HTTP 202 indicates async operation with job creation
2632+ job_id = self ._extract_job_id_from_response (
2633+ response , warn_on_missing = True , context = "Changes may not persist after reboot."
2634+ )
2635+ if job_id :
2636+ self .logger .info (f"Network attribute configuration job created: { job_id } " )
2637+
24372638 break
24382639 else :
24392640 self .logger .error (
@@ -2459,9 +2660,30 @@ async def set_nic_attribute(self, fqdd, attribute, value):
24592660 return False
24602661 except (AttributeError , ValueError ):
24612662 self .logger .error ("Was unable to set a network attribute." )
2663+ return False
2664+
2665+ # Verify job is scheduled before reboot (if job was created)
2666+ if job_id :
2667+ job_scheduled = await self ._verify_job_scheduled (job_id , context = "configuration" )
2668+ if not job_scheduled :
2669+ return False
24622670
2671+ # Reboot server to apply pending configuration
24632672 await self .reboot_server ()
24642673
2674+ # Monitor job completion and verify attribute change (if job was created)
2675+ if job_id :
2676+ return await self ._monitor_and_verify_attribute_job (
2677+ job_id , fqdd , attribute , value , type , attr_info .get ("CurrentValue" )
2678+ )
2679+ else :
2680+ # No job ID - can't monitor, but still return True since PATCH succeeded
2681+ self .logger .warning (
2682+ "Configuration change submitted but job monitoring was not possible. "
2683+ "Please manually verify attribute value persisted after reboot."
2684+ )
2685+ return True
2686+
24652687
24662688async def execute_badfish (_host , _args , logger , format_handler = None ):
24672689 _username = _args .get ("u" ) or os .environ .get ("BADFISH_USERNAME" )
0 commit comments