diff --git a/build/build-variables.yml b/build/build-variables.yml
index 75699622d0..8e4c41360b 100644
--- a/build/build-variables.yml
+++ b/build/build-variables.yml
@@ -16,6 +16,14 @@ variables:
DeploymentEnvironmentNameR4BSql: '$(DeploymentEnvironmentNameR4B)-sql'
DeploymentEnvironmentNameR5: '$(DeploymentEnvironmentName)-r5'
DeploymentEnvironmentNameR5Sql: '$(DeploymentEnvironmentNameR5)-sql'
+ # Key Vault names (shorter due to 24 character limit)
+ KeyVaultNameSql: '$(KeyVaultBaseName)-sql'
+ KeyVaultNameR4: '$(KeyVaultBaseName)-r4'
+ KeyVaultNameR4Sql: '$(KeyVaultNameR4)-sql'
+ KeyVaultNameR4B: '$(KeyVaultBaseName)-r4b'
+ KeyVaultNameR4BSql: '$(KeyVaultNameR4B)-sql'
+ KeyVaultNameR5: '$(KeyVaultBaseName)-r5'
+ KeyVaultNameR5Sql: '$(KeyVaultNameR5)-sql'
TestEnvironmentUrl: 'https://$(DeploymentEnvironmentName).azurewebsites.net'
# These variables are not used in the deployment scripts, but are used in the E2E tests files.
TestEnvironmentUrl_Sql: 'https://$(DeploymentEnvironmentName)-sql.azurewebsites.net'
diff --git a/build/ci-variables.yml b/build/ci-variables.yml
index 6afcea9248..467c10e304 100644
--- a/build/ci-variables.yml
+++ b/build/ci-variables.yml
@@ -4,6 +4,8 @@ variables:
resourceGroupRoot: 'msh-fhir-ci4'
appServicePlanName: '$(resourceGroupRoot)-linux'
DeploymentEnvironmentName: '$(resourceGroupRoot)'
+ # CI uses the same name for Key Vaults as the deployment environment (no BuildId suffix needed)
+ KeyVaultBaseName: '$(DeploymentEnvironmentName)'
ResourceGroupName: '$(resourceGroupRoot)'
CrucibleEnvironmentUrl: 'https://crucible.mshapis.com/'
TestEnvironmentName: 'OSS CI'
diff --git a/build/jobs/add-aad-test-environment.yml b/build/jobs/add-aad-test-environment.yml
index dbb8c5a70e..dcc035582e 100644
--- a/build/jobs/add-aad-test-environment.yml
+++ b/build/jobs/add-aad-test-environment.yml
@@ -32,22 +32,10 @@ steps:
$password = ConvertTo-SecureString -AsPlainText $password_raw -Force
$clientSecretCredential = New-Object -TypeName System.Management.Automation.PSCredential -ArgumentList $clientId, $password
- # If a deleted keyvault with purge protection exists, try to restore it.
$environmentName = "$(DeploymentEnvironmentName)".ToLower() -replace "\.", ""
+ $keyVaultName = "$(KeyVaultBaseName)-ts".ToLower() -replace "\.", ""
Write-Host "Installed module and set variables"
- $vaultName = "${environmentName}-ts"
- $vaultLocation = "westus"
- $vaultResourceGroupName = "$(ResourceGroupName)"
- if (Get-AzKeyVault -VaultName $vaultName -Location $vaultLocation -InRemovedState)
- {
- Write-Host "Attempting to restore vault ${vaultName}"
-
- Undo-AzKeyVaultRemoval -VaultName $vaultName -ResourceGroupName $vaultResourceGroupName -Location $vaultLocation
- Write-Host "KeyVault $vaultName is restored"
- }
- Write-Host "Restored keyvaults"
-
# Connect to Microsoft Graph using client credentials
Write-Host "Connecting to Microsoft Graph"
Connect-MgGraph -TenantId $tenantId -ClientSecretCredential $clientSecretCredential
@@ -57,4 +45,4 @@ steps:
Import-Module $(System.DefaultWorkingDirectory)/release/scripts/PowerShell/FhirServerRelease/FhirServerRelease.psd1
Write-Host "Imported modules"
- $output = Add-AadTestAuthEnvironment -TestAuthEnvironmentPath $(System.DefaultWorkingDirectory)/testauthenvironment.json -EnvironmentName $environmentName -ResourceGroupName $(ResourceGroupName) -TenantAdminCredential $clientSecretCredential -TenantId $tenantId -ClientId $clientId -ClientSecret $password
+ $output = Add-AadTestAuthEnvironment -TestAuthEnvironmentPath $(System.DefaultWorkingDirectory)/testauthenvironment.json -EnvironmentName $environmentName -KeyVaultName $keyVaultName -ResourceGroupName $(UniqueResourceGroupName) -EnvironmentLocation $(ResourceGroupRegion) -TenantAdminCredential $clientSecretCredential -TenantId $tenantId -ClientId $clientId -ClientSecret $password
diff --git a/build/jobs/cleanup.yml b/build/jobs/cleanup.yml
index 62d395e514..b8edd363aa 100644
--- a/build/jobs/cleanup.yml
+++ b/build/jobs/cleanup.yml
@@ -12,12 +12,71 @@ jobs:
azurePowerShellVersion: latestVersion
ScriptType: InlineScript
Inline: |
- Get-AzResourceGroup -Name $(ResourceGroupName) -ErrorVariable notPresent -ErrorAction SilentlyContinue
+ $deploymentEnvName = "$(DeploymentEnvironmentName)"
+ $keyVaultBaseName = "$(KeyVaultBaseName)"
+
+ Get-AzResourceGroup -Name $(UniqueResourceGroupName) -ErrorVariable notPresent -ErrorAction SilentlyContinue
if ($notPresent) {
- Write-Host "Resource group $(ResourceGroupName) not found"
+ Write-Host "Resource group $(UniqueResourceGroupName) not found"
} else {
- Write-Host "Deleting resource group $(ResourceGroupName)"
- Remove-AzResourceGroup -Name $(ResourceGroupName) -Force -Verbose
+ Write-Host "Deleting resource group $(UniqueResourceGroupName)"
+ $maxRetries = 3
+ $retryCount = 0
+ $deleted = $false
+
+ while (-not $deleted -and $retryCount -lt $maxRetries) {
+ try {
+ Remove-AzResourceGroup -Name $(UniqueResourceGroupName) -Force -Verbose -ErrorAction Stop
+ $deleted = $true
+ Write-Host "Successfully deleted resource group $(UniqueResourceGroupName)"
+ }
+ catch {
+ $retryCount++
+ if ($retryCount -lt $maxRetries) {
+ Write-Warning "Failed to delete resource group (attempt $retryCount of $maxRetries). Retrying in 30 seconds... Error: $($_.Exception.Message)"
+ Start-Sleep -Seconds 30
+ } else {
+ Write-Warning "Failed to delete resource group after $maxRetries attempts. Error: $($_.Exception.Message)"
+ }
+ }
+ }
+ }
+
+ # Purge any soft-deleted Key Vaults from this build
+ # Vault names use KeyVaultBaseName (e.g., f20585pr5240-ts, f20585pr5240-r4, etc.)
+ $vaultPattern = "$keyVaultBaseName*"
+ Write-Host "Checking for soft-deleted Key Vaults matching pattern: $vaultPattern"
+ $softDeletedVaults = Get-AzKeyVault -InRemovedState -ErrorAction SilentlyContinue | Where-Object {
+ $_.VaultName -like $vaultPattern
+ }
+
+ if ($softDeletedVaults.Count -eq 0) {
+ Write-Host "No soft-deleted vaults found to purge"
+ } else {
+ Write-Host "Found $($softDeletedVaults.Count) soft-deleted vault(s) to purge"
+
+ $results = $softDeletedVaults | ForEach-Object -Parallel {
+ $vaultName = $_.VaultName
+ $vaultLocation = $_.Location
+ try {
+ Remove-AzKeyVault -VaultName $vaultName -Location $vaultLocation -InRemovedState -Force -ErrorAction Stop
+ [PSCustomObject]@{ Vault = $vaultName; Status = "Success" }
+ }
+ catch {
+ [PSCustomObject]@{ Vault = $vaultName; Status = "Failed: $($_.Exception.Message)" }
+ }
+ } -ThrottleLimit 10
+
+ $results | ForEach-Object {
+ if ($_.Status -eq "Success") {
+ Write-Host "Successfully purged vault: $($_.Vault)"
+ } else {
+ Write-Warning "Failed to purge vault $($_.Vault). $($_.Status)"
+ }
+ }
+
+ $successCount = ($results | Where-Object { $_.Status -eq "Success" }).Count
+ Write-Host "Completed purging: $successCount of $($softDeletedVaults.Count) vaults"
}
diff --git a/build/jobs/e2e-setup.yml b/build/jobs/e2e-setup.yml
index 33890f84d2..2ca2fd2bfc 100644
--- a/build/jobs/e2e-setup.yml
+++ b/build/jobs/e2e-setup.yml
@@ -17,7 +17,7 @@ steps:
KeyVaultName: 'resolute-oss-tenant-info'
- task: AzureKeyVault@1
- displayName: 'Azure Key Vault: $(DeploymentEnvironmentName)-ts'
+ displayName: 'Azure Key Vault: $(KeyVaultBaseName)-ts'
inputs:
azureSubscription: $(ConnectedServiceName)
- KeyVaultName: '$(DeploymentEnvironmentName)-ts'
+ KeyVaultName: '$(KeyVaultBaseName)-ts'
diff --git a/build/jobs/e2e-tests.yml b/build/jobs/e2e-tests.yml
index a23f74b228..40f455c58c 100644
--- a/build/jobs/e2e-tests.yml
+++ b/build/jobs/e2e-tests.yml
@@ -5,92 +5,29 @@ parameters:
type: string
- name: appServiceType
type: string
+- name: categoryFilter
+ type: string
+ default: 'Category!=ExportLongRunning'
+- name: testRunTitleSuffix
+ type: string
+ default: ''
steps:
- template: e2e-tests-extract.yml
parameters:
version: ${{parameters.version}}
- - task: AzurePowerShell@5
- displayName: 'Set Variables'
- inputs:
- azureSubscription: $(ConnectedServiceName)
- azurePowerShellVersion: latestVersion
- ScriptType: inlineScript
- Inline: |
- $keyVault = "$(DeploymentEnvironmentName)-ts"
- $secrets = Get-AzKeyVaultSecret -VaultName $keyVault
-
- foreach($secret in $secrets)
- {
- $environmentVariableName = $secret.Name.Replace("--","_")
-
- $secretValue = Get-AzKeyVaultSecret -VaultName $keyVault -Name $secret.Name
- # Replace with -AsPlainText flag when v5.3 of the Az Module is supported
- $plainValue = ([System.Net.NetworkCredential]::new("", $secretValue.SecretValue).Password).ToString()
- if([string]::IsNullOrEmpty($plainValue))
- {
- throw "$($secret.Name) is empty"
- }
- Write-Host "##vso[task.setvariable variable=$($environmentVariableName)]$($plainValue)"
- }
-
- $appServiceName = "${{ parameters.appServiceName }}"
- $appSettings = (Get-AzWebApp -ResourceGroupName $(ResourceGroupName) -Name $appServiceName).SiteConfig.AppSettings
- $acrSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__ConvertData__ContainerRegistryServers__0"}
- $acrLoginServer = $acrSettings[0].Value
- $acrAccountName = ($acrLoginServer -split '\.')[0]
-
- ## This needs to be moved to MI, WI #125246
- # $acrPassword = (Get-AzContainerRegistryCredential -ResourceGroupName $(ResourceGroupName) -Name $acrAccountName).Password
- # Write-Host "##vso[task.setvariable variable=TestContainerRegistryServer]$($acrLoginServer)"
- # Write-Host "##vso[task.setvariable variable=TestContainerRegistryPassword]$($acrPassword)"
-
- $exportStoreSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__Export__StorageAccountUri"}
- $exportStoreUri = $exportStoreSettings[0].Value
- Write-Host "$exportStoreUri"
- $exportStoreAccountName = [System.Uri]::new("$exportStoreUri").Host.Split('.')[0]
- $exportStoreKey = Get-AzStorageAccountKey -ResourceGroupName $(ResourceGroupName) -Name "$exportStoreAccountName" | Where-Object {$_.KeyName -eq "key1"}
-
- Write-Host "##vso[task.setvariable variable=TestExportStoreUri]$($exportStoreUri)"
-
- $integrationStoreSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__IntegrationDataStore__StorageAccountUri"}
- $integrationStoreUri = $integrationStoreSettings[0].Value
- Write-Host "$integrationStoreUri"
- $integrationStoreAccountName = [System.Uri]::new("$integrationStoreUri").Host.Split('.')[0]
- Write-Host "##vso[task.setvariable variable=TestIntegrationStoreUri]$($integrationStoreUri)"
-
- Write-Host "##vso[task.setvariable variable=Resource]$(TestApplicationResource)"
-
- $secrets = Get-AzKeyVaultSecret -VaultName resolute-oss-tenant-info
- foreach($secret in $secrets)
- {
- $environmentVariableName = $secret.Name.Replace("--","_")
-
- $secretValue = Get-AzKeyVaultSecret -VaultName resolute-oss-tenant-info -Name $secret.Name
- # Replace with -AsPlainText flag when v5.3 of the Az Module is supported
- $plainValue = ([System.Net.NetworkCredential]::new("", $secretValue.SecretValue).Password).ToString()
- if([string]::IsNullOrEmpty($plainValue))
- {
- throw "$($secret.Name) is empty"
- }
- Write-Host "##vso[task.setvariable variable=$($environmentVariableName)]$($plainValue)"
- }
- # ----------------------------------------
-
- dotnet dev-certs https
-
- Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_CLIENT_ID]$env:AZURESUBSCRIPTION_CLIENT_ID"
- Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_TENANT_ID]$env:AZURESUBSCRIPTION_TENANT_ID"
- Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_SERVICE_CONNECTION_ID]$env:AZURESUBSCRIPTION_SERVICE_CONNECTION_ID"
+ - template: ../tasks/e2e-set-variables.yml
+ parameters:
+ appServiceName: ${{ parameters.appServiceName }}
- task: DotNetCoreCLI@2
- displayName: 'E2E ${{ parameters.version }} ${{parameters.appServiceType}}'
+ displayName: 'E2E ${{ parameters.version }} ${{parameters.appServiceType}}${{ parameters.testRunTitleSuffix }}'
inputs:
command: test
- arguments: '"$(Agent.TempDirectory)/E2ETests/**/*${{ parameters.version }}.Tests.E2E*.dll" --blame-hang-timeout 7m --filter "FullyQualifiedName~${{parameters.appServiceType}}&Category!=ExportLongRunning"'
+ arguments: '"$(Agent.TempDirectory)/E2ETests/**/*${{ parameters.version }}.Tests.E2E*.dll" --blame-hang-timeout 15m --filter "FullyQualifiedName~${{parameters.appServiceType}}&${{ parameters.categoryFilter }}"'
workingDirectory: "$(System.ArtifactsDirectory)"
- testRunTitle: '${{ parameters.version }} ${{parameters.appServiceType}}'
+ testRunTitle: '${{ parameters.version }} ${{parameters.appServiceType}}${{ parameters.testRunTitleSuffix }}'
env:
'TestEnvironmentUrl': $(TestEnvironmentUrl)
'TestEnvironmentUrl_${{ parameters.version }}': $(TestEnvironmentUrl_${{ parameters.version }})
diff --git a/build/jobs/provision-deploy.yml b/build/jobs/provision-deploy.yml
index 9c81e99080..ebc47d22b1 100644
--- a/build/jobs/provision-deploy.yml
+++ b/build/jobs/provision-deploy.yml
@@ -30,6 +30,9 @@ parameters:
- name: reindexEnabled
type: boolean
default: true
+- name: keyVaultName
+ type: string
+ default: ''
jobs:
- job: provisionEnvironment
@@ -73,6 +76,7 @@ jobs:
appServicePlanSku = "P2V3"
numberOfInstances = 2
serviceName = $webAppName
+ keyVaultName = "${{ parameters.keyVaultName }}".ToLower()
securityAuthenticationAuthority = "https://login.microsoftonline.com/$(tenant-id)"
securityAuthenticationAudience = "${{ parameters.testEnvironmentUrl }}"
additionalFhirServerConfigProperties = $additionalProperties
@@ -99,13 +103,6 @@ jobs:
$deploymentName = $webAppName
$resourceGroupName = "${{ parameters.resourceGroup }}"
- Write-Host "Check for keyvaults in removed state..."
- if (Get-AzKeyVault -VaultName $webAppName -Location $(ResourceGroupRegion) -InRemovedState)
- {
- Undo-AzKeyVaultRemoval -VaultName $webAppName -ResourceGroupName $resourceGroupName -Location $(ResourceGroupRegion)
- Write-Host "KeyVault $webAppName is restored"
- }
-
Write-Host "Provisioning Resource Group"
Write-Host "ResourceGroupName: ${{ parameters.resourceGroup }}"
diff --git a/build/jobs/provision-sqlServer.yml b/build/jobs/provision-sqlServer.yml
index 42590d6af9..a228129e8d 100644
--- a/build/jobs/provision-sqlServer.yml
+++ b/build/jobs/provision-sqlServer.yml
@@ -114,13 +114,34 @@ jobs:
'nspProfile' = 'default'
}
- # Deploy using external template
- $nspDeploymentResult = New-AzResourceGroupDeployment `
- -ResourceGroupName $resourceGroup `
- -Name "$sqlServerName-nsp-association" `
- -TemplateFile $(System.DefaultWorkingDirectory)/samples/templates/nsp-resource-association.json `
- -TemplateParameterObject $templateParameters `
- -Verbose
+ # Deploy using external template with retry logic for transient conflicts
+ $maxRetries = 3
+ $retryDelaySeconds = 15
+ $attempt = 0
+ $success = $false
+
+ do {
+ try {
+ $attempt++
+ Write-Host "Attempt $attempt of $maxRetries to create NSP association..."
+
+ $nspDeploymentResult = New-AzResourceGroupDeployment `
+ -ResourceGroupName $resourceGroup `
+ -Name "$sqlServerName-nsp-association" `
+ -TemplateFile $(System.DefaultWorkingDirectory)/samples/templates/nsp-resource-association.json `
+ -TemplateParameterObject $templateParameters `
+ -Verbose
+
+ $success = $true
+ } catch {
+ if ($_.Exception.Message -like "*Conflict*" -and $attempt -lt $maxRetries) {
+ Write-Host "Conflict detected. Retrying in $retryDelaySeconds seconds..."
+ Start-Sleep -Seconds $retryDelaySeconds
+ } else {
+ throw
+ }
+ }
+ } while (-not $success -and $attempt -lt $maxRetries)
Write-Host "Successfully associated SQL Server with NSP using external ARM template"
Write-Host "Association Resource ID: $($nspDeploymentResult.Outputs.associationResourceId.Value)"
diff --git a/build/jobs/run-export-tests.yml b/build/jobs/run-export-tests.yml
index 91b64c3de8..1717f7fea0 100644
--- a/build/jobs/run-export-tests.yml
+++ b/build/jobs/run-export-tests.yml
@@ -23,7 +23,7 @@ jobs:
azurePowerShellVersion: latestVersion
ScriptType: inlineScript
Inline: |
- $keyVault = "$(DeploymentEnvironmentName)-ts"
+ $keyVault = "$(KeyVaultBaseName)-ts"
$secrets = Get-AzKeyVaultSecret -VaultName $keyVault
foreach($secret in $secrets)
@@ -116,7 +116,7 @@ jobs:
azurePowerShellVersion: latestVersion
ScriptType: inlineScript
Inline: |
- $keyVault = "$(DeploymentEnvironmentName)-ts"
+ $keyVault = "$(KeyVaultBaseName)-ts"
$secrets = Get-AzKeyVaultSecret -VaultName $keyVault
foreach($secret in $secrets)
diff --git a/build/jobs/run-tests.yml b/build/jobs/run-tests.yml
index a9c3d29a3c..6e5e602acb 100644
--- a/build/jobs/run-tests.yml
+++ b/build/jobs/run-tests.yml
@@ -40,7 +40,7 @@ jobs:
Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_SERVICE_CONNECTION_ID]$env:AZURESUBSCRIPTION_SERVICE_CONNECTION_ID"
$appServiceName = '${{ parameters.appServiceName }}'
- $appSettings = (Get-AzWebApp -ResourceGroupName $(ResourceGroupName) -Name $appServiceName).SiteConfig.AppSettings
+ $appSettings = (Get-AzWebApp -ResourceGroupName $(UniqueResourceGroupName) -Name $appServiceName).SiteConfig.AppSettings
$dataStoreResourceId = $appSettings | where {$_.Name -eq "FhirServer__ResourceManager__DataStoreResourceId"}
$dataStoreResourceId = $dataStoreResourceId[0].Value
Write-Host "$dataStoreResourceId"
@@ -180,6 +180,22 @@ jobs:
version: ${{ parameters.version }}
appServiceName: ${{ parameters.appServiceName }}
appServiceType: 'CosmosDb'
+ categoryFilter: 'Category!=ExportLongRunning&Category!=IndexAndReindex'
+
+- job: 'cosmosE2eTests_Reindex'
+ dependsOn: []
+ pool:
+ name: '$(SharedLinuxPool)'
+ vmImage: '$(LinuxVmImage)'
+ steps:
+ - template: e2e-setup.yml
+ - template: e2e-tests.yml
+ parameters:
+ version: ${{ parameters.version }}
+ appServiceName: ${{ parameters.appServiceName }}
+ appServiceType: 'CosmosDb'
+ categoryFilter: 'Category=IndexAndReindex'
+ testRunTitleSuffix: ' Reindex'
- job: 'sqlE2eTests'
dependsOn: []
@@ -193,3 +209,34 @@ jobs:
version: ${{ parameters.version }}
appServiceName: '${{ parameters.appServiceName }}-sql'
appServiceType: 'SqlServer'
+ categoryFilter: 'Category!=ExportLongRunning&Category!=IndexAndReindex&Category!=BulkUpdate'
+
+- job: 'sqlE2eTests_Reindex'
+ dependsOn: []
+ pool:
+ name: '$(SharedLinuxPool)'
+ vmImage: '$(LinuxVmImage)'
+ steps:
+ - template: e2e-setup.yml
+ - template: e2e-tests.yml
+ parameters:
+ version: ${{ parameters.version }}
+ appServiceName: '${{ parameters.appServiceName }}-sql'
+ appServiceType: 'SqlServer'
+ categoryFilter: 'Category=IndexAndReindex'
+ testRunTitleSuffix: ' Reindex'
+
+- job: 'sqlE2eTests_BulkUpdate'
+ dependsOn: []
+ pool:
+ name: '$(SharedLinuxPool)'
+ vmImage: '$(LinuxVmImage)'
+ steps:
+ - template: e2e-setup.yml
+ - template: e2e-tests.yml
+ parameters:
+ version: ${{ parameters.version }}
+ appServiceName: '${{ parameters.appServiceName }}-sql'
+ appServiceType: 'SqlServer'
+ categoryFilter: 'Category=BulkUpdate'
+ testRunTitleSuffix: ' BulkUpdate'
diff --git a/build/pr-pipeline.yml b/build/pr-pipeline.yml
index c1792d561f..1daf06f5b5 100644
--- a/build/pr-pipeline.yml
+++ b/build/pr-pipeline.yml
@@ -107,6 +107,19 @@ stages:
tag: $(ImageTag)
buildPlatform: $(testDockerImagePlatforms)
+- stage: cleanupOldResources
+ displayName: 'Cleanup Old PR Resources'
+ dependsOn:
+ - setupEnvironment
+ jobs:
+ - job: deleteOldResourceGroups
+ displayName: 'Delete Old Resource Groups'
+ pool:
+ name: '$(DefaultLinuxPool)'
+ vmImage: '$(LinuxVmImage)'
+ steps:
+ - template: ./tasks/delete-resource-groups.yml
+
- stage: provisionEnvironment
displayName: Provision Environment
dependsOn: []
@@ -124,17 +137,20 @@ stages:
azurePowerShellVersion: latestVersion
ScriptType: inlineScript
Inline: |
+ # Handle re-run scenario: remove existing RG for this build if it exists
try
{
- Get-AzResourceGroup -Name $(ResourceGroupName) | Remove-AzResourceGroup -Verbose -Force
+ Get-AzResourceGroup -Name $(UniqueResourceGroupName) | Remove-AzResourceGroup -Verbose -Force
}
catch
{}
- New-AzResourceGroup -Name "$(ResourceGroupName)" -Location "$(ResourceGroupRegion)" -Force
+ # Create new resource group with unique name
+ New-AzResourceGroup -Name "$(UniqueResourceGroupName)" -Location "$(ResourceGroupRegion)" -Force
+ Write-Host "Created resource group: $(UniqueResourceGroupName)"
- template: ./jobs/add-resource-group-role-assignments.yml
parameters:
azureSubscription: $(ConnectedServiceName)
- resourceGroupName: $(ResourceGroupName)
+ resourceGroupName: $(UniqueResourceGroupName)
- stage: setupEnvironment
displayName: Setup Test Environment
@@ -143,10 +159,18 @@ stages:
- provisionEnvironment
jobs:
- template: ./jobs/cleanup-aad.yml
+ - job: deleteKeyVaults
+ displayName: 'Delete and Purge Key Vaults'
+ pool:
+ name: '$(DefaultLinuxPool)'
+ vmImage: '$(LinuxVmImage)'
+ steps:
+ - template: ./tasks/delete-keyvaults.yml
- job: setup
displayName: 'Setup AAD'
dependsOn:
- cleanupAad
+ - deleteKeyVaults
pool:
vmImage: '$(WindowsVmImage)'
steps:
@@ -159,8 +183,8 @@ stages:
jobs:
- template: ./jobs/provision-nsp.yml
parameters:
- resourceGroup: $(ResourceGroupName)
- nspName: 'nsp-$(ResourceGroupName)'
+ resourceGroup: $(UniqueResourceGroupName)
+ nspName: 'nsp-$(UniqueResourceGroupName)'
nspProfile: 'default'
- stage: associateKeyVaultNsp
@@ -171,9 +195,9 @@ stages:
jobs:
- template: ./jobs/associate-keyvault-nsp.yml
parameters:
- resourceGroup: $(ResourceGroupName)
- keyVaultName: '$(DeploymentEnvironmentName)-ts'
- nspName: 'nsp-$(ResourceGroupName)'
+ resourceGroup: $(UniqueResourceGroupName)
+ keyVaultName: '$(KeyVaultBaseName)-ts'
+ nspName: 'nsp-$(UniqueResourceGroupName)'
nspProfile: 'default'
- stage: deploySqlServer
@@ -184,19 +208,19 @@ stages:
jobs:
- template: ./jobs/provision-sqlServer.yml
parameters:
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
sqlServerName: $(DeploymentEnvironmentName)
adminType: 'userAssignedManagedIdentity'
adminUserAssignedManagedIdentityName: "$(DeploymentEnvironmentName)-uami"
deploymentName: "AppServer"
- nspName: 'nsp-$(ResourceGroupName)'
+ nspName: 'nsp-$(UniqueResourceGroupName)'
- template: ./jobs/provision-sqlServer.yml
parameters:
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
sqlServerName: $(DeploymentEnvironmentName)inttest
adminType: 'federatedServiceConnection'
deploymentName: "IntegrationTests"
- nspName: 'nsp-$(ResourceGroupName)'
+ nspName: 'nsp-$(UniqueResourceGroupName)'
- stage: deployStu3
displayName: 'Deploy STU3 CosmosDB Site'
@@ -209,10 +233,11 @@ stages:
parameters:
version: Stu3
webAppName: $(DeploymentEnvironmentName)
+ keyVaultName: $(KeyVaultBaseName)
appServicePlanName: '$(appServicePlanName)-cosmos'
appServicePlanResourceGroup: $(appServicePlanResourceGroup)
subscription: $(ConnectedServiceName)
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
testEnvironmentUrl: $(TestApplicationResource)
imageTag: $(ImageTag)
reindexEnabled: true
@@ -229,10 +254,11 @@ stages:
version: Stu3
sql: true
webAppName: $(DeploymentEnvironmentNameSql)
+ keyVaultName: $(KeyVaultNameSql)
appServicePlanName: '$(appServicePlanName)-sql'
appServicePlanResourceGroup: $(appServicePlanResourceGroup)
subscription: $(ConnectedServiceName)
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
testEnvironmentUrl: $(TestApplicationResource)
imageTag: $(ImageTag)
schemaAutomaticUpdatesEnabled: 'auto'
@@ -251,10 +277,11 @@ stages:
parameters:
version: R4
webAppName: $(DeploymentEnvironmentNameR4)
+ keyVaultName: $(KeyVaultNameR4)
appServicePlanName: '$(appServicePlanName)-cosmos'
appServicePlanResourceGroup: $(appServicePlanResourceGroup)
subscription: $(ConnectedServiceName)
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
testEnvironmentUrl: $(TestApplicationResource)
imageTag: $(ImageTag)
reindexEnabled: true
@@ -271,10 +298,11 @@ stages:
version: R4
sql: true
webAppName: $(DeploymentEnvironmentNameR4Sql)
+ keyVaultName: $(KeyVaultNameR4Sql)
appServicePlanName: '$(appServicePlanName)-sql'
appServicePlanResourceGroup: $(appServicePlanResourceGroup)
subscription: $(ConnectedServiceName)
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
testEnvironmentUrl: $(TestApplicationResource)
imageTag: $(ImageTag)
schemaAutomaticUpdatesEnabled: 'auto'
@@ -293,10 +321,11 @@ stages:
parameters:
version: R4B
webAppName: $(DeploymentEnvironmentNameR4B)
+ keyVaultName: $(KeyVaultNameR4B)
appServicePlanName: '$(appServicePlanName)-cosmos'
appServicePlanResourceGroup: $(appServicePlanResourceGroup)
subscription: $(ConnectedServiceName)
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
testEnvironmentUrl: $(TestApplicationResource)
imageTag: $(ImageTag)
reindexEnabled: true
@@ -313,10 +342,11 @@ stages:
version: R4B
sql: true
webAppName: $(DeploymentEnvironmentNameR4BSql)
+ keyVaultName: $(KeyVaultNameR4BSql)
appServicePlanName: '$(appServicePlanName)-sql'
appServicePlanResourceGroup: $(appServicePlanResourceGroup)
subscription: $(ConnectedServiceName)
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
testEnvironmentUrl: $(TestApplicationResource)
imageTag: $(ImageTag)
schemaAutomaticUpdatesEnabled: 'auto'
@@ -335,10 +365,11 @@ stages:
parameters:
version: R5
webAppName: $(DeploymentEnvironmentNameR5)
+ keyVaultName: $(KeyVaultNameR5)
appServicePlanName: '$(appServicePlanName)-cosmos'
appServicePlanResourceGroup: $(appServicePlanResourceGroup)
subscription: $(ConnectedServiceName)
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
testEnvironmentUrl: $(TestApplicationResource)
imageTag: $(ImageTag)
reindexEnabled: true
@@ -355,10 +386,11 @@ stages:
version: R5
sql: true
webAppName: $(DeploymentEnvironmentNameR5Sql)
+ keyVaultName: $(KeyVaultNameR5Sql)
appServicePlanName: '$(appServicePlanName)-sql'
appServicePlanResourceGroup: $(appServicePlanResourceGroup)
subscription: $(ConnectedServiceName)
- resourceGroup: $(ResourceGroupName)
+ resourceGroup: $(UniqueResourceGroupName)
testEnvironmentUrl: $(TestApplicationResource)
imageTag: $(ImageTag)
schemaAutomaticUpdatesEnabled: 'auto'
@@ -377,7 +409,7 @@ stages:
- template: ./jobs/run-tests.yml
parameters:
version: Stu3
- keyVaultName: $(DeploymentEnvironmentName)
+ keyVaultName: $(KeyVaultBaseName)
appServiceName: $(DeploymentEnvironmentName)
integrationSqlServerName: $(DeploymentEnvironmentName)inttest
@@ -392,7 +424,7 @@ stages:
- template: ./jobs/run-tests.yml
parameters:
version: R4
- keyVaultName: $(DeploymentEnvironmentNameR4)
+ keyVaultName: $(KeyVaultNameR4)
appServiceName: $(DeploymentEnvironmentNameR4)
integrationSqlServerName: $(DeploymentEnvironmentName)inttest
@@ -407,7 +439,7 @@ stages:
- template: ./jobs/run-tests.yml
parameters:
version: R4B
- keyVaultName: $(DeploymentEnvironmentNameR4B)
+ keyVaultName: $(KeyVaultNameR4B)
appServiceName: $(DeploymentEnvironmentNameR4B)
integrationSqlServerName: $(DeploymentEnvironmentName)inttest
@@ -422,7 +454,7 @@ stages:
- template: ./jobs/run-tests.yml
parameters:
version: R5
- keyVaultName: $(DeploymentEnvironmentNameR5)
+ keyVaultName: $(KeyVaultNameR5)
appServiceName: $(DeploymentEnvironmentNameR5)
integrationSqlServerName: $(DeploymentEnvironmentName)inttest
diff --git a/build/pr-variables.yml b/build/pr-variables.yml
index 8507e12d31..f2c2ac69b1 100644
--- a/build/pr-variables.yml
+++ b/build/pr-variables.yml
@@ -3,7 +3,12 @@ variables:
resourceGroupRoot: 'msh-fhir-pr'
appServicePlanName: '$(resourceGroupRoot)-$(prName)-asp'
ResourceGroupName: '$(resourceGroupRoot)-$(prName)'
- DeploymentEnvironmentName: 'f$(build.BuildNumber)'
+ # Unique resource group name with Build.BuildId suffix to avoid waiting for deletion of existing RG
+ UniqueResourceGroupName: '$(resourceGroupRoot)-$(prName)-$(Build.BuildId)'
+ # Base name for Key Vaults (kept short due to 24 char limit)
+ KeyVaultBaseName: 'f$(build.BuildNumber)'
+ # Deployment environment name includes BuildId for uniqueness across builds
+ DeploymentEnvironmentName: 'f$(build.BuildNumber)-$(Build.BuildId)'
TestEnvironmentName: 'OSS PR$(prName)'
ImageTag: '$(build.BuildNumber)'
diff --git a/build/tasks/delete-keyvaults.yml b/build/tasks/delete-keyvaults.yml
new file mode 100644
index 0000000000..95f08d75e5
--- /dev/null
+++ b/build/tasks/delete-keyvaults.yml
@@ -0,0 +1,97 @@
+# Task template to delete Key Vaults from old resource groups and purge them
+# This prevents soft-delete conflicts when deploying new vaults with the same name
+
+steps:
+- task: AzurePowerShell@5
+ displayName: 'Delete and Purge Key Vaults'
+ inputs:
+ azureSubscription: $(ConnectedServiceName)
+ azurePowerShellVersion: latestVersion
+ ScriptType: inlineScript
+ Inline: |
+ $keyVaultBaseName = "$(KeyVaultBaseName)"
+ $currentRgName = "$(UniqueResourceGroupName)"
+ $region = "$(ResourceGroupRegion)"
+ $pattern = "$keyVaultBaseName*"
+
+ Write-Host "Looking for Key Vaults matching pattern: $pattern in region: $region"
+ Write-Host "Current resource group (excluded): $currentRgName"
+
+ # Get all key vaults matching the pattern that are NOT in the current RG
+ $vaults = Get-AzKeyVault | Where-Object {
+ $_.VaultName -like $pattern -and
+ $_.Location -eq $region -and
+ $_.ResourceGroupName -ne $currentRgName
+ }
+
+ if ($null -eq $vaults -or $vaults.Count -eq 0) {
+ Write-Host "No Key Vaults found to delete"
+ } else {
+ Write-Host "Found $($vaults.Count) Key Vault(s) to delete"
+
+ # Filter out vaults with resource locks
+ $vaultsToDelete = $vaults | Where-Object {
+ $locks = Get-AzResourceLock -ResourceGroupName $_.ResourceGroupName -ResourceName $_.VaultName -ResourceType "Microsoft.KeyVault/vaults" -ErrorAction SilentlyContinue
+ if ($locks) {
+ Write-Warning "Vault '$($_.VaultName)' has resource locks - skipping"
+ return $false
+ }
+ return $true
+ }
+
+ if ($vaultsToDelete.Count -eq 0) {
+ Write-Host "No vaults eligible for deletion after lock check"
+ } else {
+ Write-Host "Deleting $($vaultsToDelete.Count) vault(s) in parallel..."
+
+ # Step 1: Delete vaults (moves them to soft-deleted state)
+ $deleteResults = $vaultsToDelete | ForEach-Object -Parallel {
+ $vaultName = $_.VaultName
+ $sourceRg = $_.ResourceGroupName
+ try {
+ Remove-AzKeyVault -VaultName $vaultName -ResourceGroupName $sourceRg -Force -ErrorAction Stop
+ [PSCustomObject]@{ Vault = $vaultName; Location = $_.Location; Status = "Deleted" }
+ }
+ catch {
+ [PSCustomObject]@{ Vault = $vaultName; Location = $_.Location; Status = "DeleteFailed: $($_.Exception.Message)" }
+ }
+ } -ThrottleLimit 10
+
+ $deleteResults | ForEach-Object {
+ if ($_.Status -eq "Deleted") {
+ Write-Host "Deleted vault: $($_.Vault)"
+ } else {
+ Write-Warning "$($_.Vault): $($_.Status)"
+ }
+ }
+
+ # Step 2: Purge the deleted vaults
+ $deletedVaults = $deleteResults | Where-Object { $_.Status -eq "Deleted" }
+ if ($deletedVaults.Count -gt 0) {
+ Write-Host "Purging $($deletedVaults.Count) soft-deleted vault(s)..."
+
+ $purgeResults = $deletedVaults | ForEach-Object -Parallel {
+ $vaultName = $_.Vault
+ $location = $_.Location
+ try {
+ Remove-AzKeyVault -VaultName $vaultName -Location $location -InRemovedState -Force -ErrorAction Stop
+ [PSCustomObject]@{ Vault = $vaultName; Status = "Purged" }
+ }
+ catch {
+ [PSCustomObject]@{ Vault = $vaultName; Status = "PurgeFailed: $($_.Exception.Message)" }
+ }
+ } -ThrottleLimit 10
+
+ $purgeResults | ForEach-Object {
+ if ($_.Status -eq "Purged") {
+ Write-Host "Purged vault: $($_.Vault)"
+ } else {
+ Write-Warning "$($_.Vault): $($_.Status)"
+ }
+ }
+
+ $purgedCount = ($purgeResults | Where-Object { $_.Status -eq "Purged" }).Count
+ Write-Host "Completed: $purgedCount of $($deletedVaults.Count) vaults purged"
+ }
+ }
+ }
diff --git a/build/tasks/delete-resource-groups.yml b/build/tasks/delete-resource-groups.yml
new file mode 100644
index 0000000000..91f6fe581a
--- /dev/null
+++ b/build/tasks/delete-resource-groups.yml
@@ -0,0 +1,64 @@
+# Task template to delete old resource groups for a PR
+# Runs after Key Vaults have been moved out to prevent soft-delete conflicts
+
+steps:
+- task: AzurePowerShell@5
+ displayName: 'Delete Old Resource Groups'
+ inputs:
+ azureSubscription: $(ConnectedServiceName)
+ azurePowerShellVersion: latestVersion
+ ScriptType: inlineScript
+ Inline: |
+ $prNumber = "$(prName)"
+ $resourceGroupRoot = "$(resourceGroupRoot)"
+ $currentRgName = "$(UniqueResourceGroupName)"
+ $pattern = "$resourceGroupRoot-$prNumber*"
+
+ Write-Host "Looking for old resource groups matching pattern: $pattern"
+ Write-Host "Excluding current RG: $currentRgName"
+
+ $oldRGs = Get-AzResourceGroup | Where-Object {
+ $_.ResourceGroupName -like $pattern -and
+ $_.ResourceGroupName -ne $currentRgName
+ }
+
+ if ($oldRGs.Count -eq 0) {
+ Write-Host "No old resource groups found to cleanup"
+ } else {
+ Write-Host "Found $($oldRGs.Count) old resource group(s) to cleanup"
+
+ # Delete all RGs in parallel with retry logic
+ $results = $oldRGs | ForEach-Object -Parallel {
+ $rgName = $_.ResourceGroupName
+ $maxRetries = 3
+ $retryCount = 0
+ $deleted = $false
+
+ while (-not $deleted -and $retryCount -lt $maxRetries) {
+ try {
+ Remove-AzResourceGroup -Name $rgName -Force -ErrorAction Stop
+ $deleted = $true
+ [PSCustomObject]@{ RG = $rgName; Status = "Success" }
+ }
+ catch {
+ $retryCount++
+ if ($retryCount -lt $maxRetries) {
+ Start-Sleep -Seconds 30
+ } else {
+ [PSCustomObject]@{ RG = $rgName; Status = "Failed: $($_.Exception.Message)" }
+ }
+ }
+ }
+ } -ThrottleLimit 10
+
+ $results | ForEach-Object {
+ if ($_.Status -eq "Success") {
+ Write-Host "Successfully removed resource group: $($_.RG)"
+ } else {
+ Write-Warning "Failed to remove resource group $($_.RG). $($_.Status)"
+ }
+ }
+
+ $successCount = ($results | Where-Object { $_.Status -eq "Success" }).Count
+ Write-Host "Completed removal: $successCount of $($oldRGs.Count) resource group(s)"
+ }
diff --git a/build/tasks/e2e-set-variables.yml b/build/tasks/e2e-set-variables.yml
new file mode 100644
index 0000000000..35d15f646d
--- /dev/null
+++ b/build/tasks/e2e-set-variables.yml
@@ -0,0 +1,77 @@
+parameters:
+- name: appServiceName
+ type: string
+
+steps:
+- task: AzurePowerShell@5
+ displayName: 'Set Variables'
+ inputs:
+ azureSubscription: $(ConnectedServiceName)
+ azurePowerShellVersion: LatestVersion
+ ScriptType: InlineScript
+ Inline: |
+ $keyVault = "$(KeyVaultBaseName)-ts"
+ $secrets = Get-AzKeyVaultSecret -VaultName $keyVault
+
+ foreach($secret in $secrets)
+ {
+ $environmentVariableName = $secret.Name.Replace("--","_")
+
+ $secretValue = Get-AzKeyVaultSecret -VaultName $keyVault -Name $secret.Name
+ # Replace with -AsPlainText flag when v5.3 of the Az Module is supported
+ $plainValue = ([System.Net.NetworkCredential]::new("", $secretValue.SecretValue).Password).ToString()
+ if([string]::IsNullOrEmpty($plainValue))
+ {
+ throw "$($secret.Name) is empty"
+ }
+ Write-Host "##vso[task.setvariable variable=$($environmentVariableName)]$($plainValue)"
+ }
+
+ $appServiceName = "${{ parameters.appServiceName }}"
+ $appSettings = (Get-AzWebApp -ResourceGroupName $(UniqueResourceGroupName) -Name $appServiceName).SiteConfig.AppSettings
+ $acrSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__ConvertData__ContainerRegistryServers__0"}
+ $acrLoginServer = $acrSettings[0].Value
+ $acrAccountName = ($acrLoginServer -split '\.')[0]
+
+ ## This needs to be moved to MI, WI #125246
+ # $acrPassword = (Get-AzContainerRegistryCredential -ResourceGroupName $(ResourceGroupName) -Name $acrAccountName).Password
+ # Write-Host "##vso[task.setvariable variable=TestContainerRegistryServer]$($acrLoginServer)"
+ # Write-Host "##vso[task.setvariable variable=TestContainerRegistryPassword]$($acrPassword)"
+
+ $exportStoreSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__Export__StorageAccountUri"}
+ $exportStoreUri = $exportStoreSettings[0].Value
+ Write-Host "$exportStoreUri"
+ $exportStoreAccountName = [System.Uri]::new("$exportStoreUri").Host.Split('.')[0]
+ $exportStoreKey = Get-AzStorageAccountKey -ResourceGroupName $(UniqueResourceGroupName) -Name "$exportStoreAccountName" | Where-Object {$_.KeyName -eq "key1"}
+
+ Write-Host "##vso[task.setvariable variable=TestExportStoreUri]$($exportStoreUri)"
+
+ $integrationStoreSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__IntegrationDataStore__StorageAccountUri"}
+ $integrationStoreUri = $integrationStoreSettings[0].Value
+ Write-Host "$integrationStoreUri"
+ $integrationStoreAccountName = [System.Uri]::new("$integrationStoreUri").Host.Split('.')[0]
+ Write-Host "##vso[task.setvariable variable=TestIntegrationStoreUri]$($integrationStoreUri)"
+
+ Write-Host "##vso[task.setvariable variable=Resource]$(TestApplicationResource)"
+
+ $secrets = Get-AzKeyVaultSecret -VaultName resolute-oss-tenant-info
+ foreach($secret in $secrets)
+ {
+ $environmentVariableName = $secret.Name.Replace("--","_")
+
+ $secretValue = Get-AzKeyVaultSecret -VaultName resolute-oss-tenant-info -Name $secret.Name
+ # Replace with -AsPlainText flag when v5.3 of the Az Module is supported
+ $plainValue = ([System.Net.NetworkCredential]::new("", $secretValue.SecretValue).Password).ToString()
+ if([string]::IsNullOrEmpty($plainValue))
+ {
+ throw "$($secret.Name) is empty"
+ }
+ Write-Host "##vso[task.setvariable variable=$($environmentVariableName)]$($plainValue)"
+ }
+ # ----------------------------------------
+
+ dotnet dev-certs https
+
+ Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_CLIENT_ID]$env:AZURESUBSCRIPTION_CLIENT_ID"
+ Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_TENANT_ID]$env:AZURESUBSCRIPTION_TENANT_ID"
+ Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_SERVICE_CONNECTION_ID]$env:AZURESUBSCRIPTION_SERVICE_CONNECTION_ID"
diff --git a/samples/templates/default-azuredeploy-docker.json b/samples/templates/default-azuredeploy-docker.json
index 4187e4b81b..1238646dd7 100644
--- a/samples/templates/default-azuredeploy-docker.json
+++ b/samples/templates/default-azuredeploy-docker.json
@@ -5,11 +5,18 @@
"serviceName": {
"type": "string",
"minLength": 3,
- "maxLength": 24,
"metadata": {
"description": "Name of the FHIR service Web App."
}
},
+ "keyVaultName": {
+ "type": "string",
+ "defaultValue": "",
+ "maxLength": 24,
+ "metadata": {
+ "description": "Name of the Key Vault. If empty, serviceName is used. Must be 24 characters or less."
+ }
+ },
"appServicePlanResourceGroup": {
"type": "string",
"defaultValue": "",
@@ -258,7 +265,8 @@
"variables": {
"isMAG": "[or(contains(resourceGroup().location,'usgov'),contains(resourceGroup().location,'usdod'))]",
"serviceName": "[toLower(parameters('serviceName'))]",
- "keyvaultEndpoint": "[if(variables('isMAG'), concat('https://', variables('serviceName'), '.vault.usgovcloudapi.net/'), concat('https://', variables('serviceName'), '.vault.azure.net/'))]",
+ "keyVaultName": "[if(empty(parameters('keyVaultName')), variables('serviceName'), toLower(parameters('keyVaultName')))]",
+ "keyvaultEndpoint": "[if(variables('isMAG'), concat('https://', variables('keyVaultName'), '.vault.usgovcloudapi.net/'), concat('https://', variables('keyVaultName'), '.vault.azure.net/'))]",
"appServicePlanResourceGroup": "[if(empty(parameters('appServicePlanResourceGroup')), resourceGroup().name, parameters('appServicePlanResourceGroup'))]",
"appServicePlanName": "[if(empty(parameters('appServicePlanName')),concat(variables('serviceName'),'-asp'),parameters('appServicePlanName'))]",
"appServiceResourceId": "[resourceId('Microsoft.Web/sites', variables('serviceName'))]",
@@ -310,7 +318,7 @@
"sqlSkuFamily": "[if(variables('isSqlHyperscaleTier'), 'Gen5', '')]",
"sqlSkuName": "[if(variables('isSqlHyperscaleTier'), 'HS_Gen5', 'Standard')]",
"sqlSkuTier": "[if(variables('isSqlHyperscaleTier'), 'Hyperscale', 'Standard')]",
- "keyVaultRoleName": "[concat('Key Vault Secrets Officer-', variables('serviceName'))]",
+ "keyVaultRoleName": "[concat('Key Vault Secrets Officer-', variables('keyVaultName'))]",
"imageRepositoryName": "[if(contains(parameters('registryName'),'mcr.'), concat(toLower(parameters('fhirVersion')), '-fhir-server'), concat(toLower(parameters('fhirVersion')), '_fhir-server'))]",
"networkSecurityPerimeterName": "[parameters('networkSecurityPerimeterName')]"
},
@@ -386,7 +394,7 @@
"dependsOn": [
"[resourceId('Microsoft.Web/serverfarms', variables('appServicePlanName'))]",
"[resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', variables('managedIdentityName'))]",
- "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('serviceName')))]"
+ "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName')))]"
],
"resources": [
{
@@ -395,8 +403,8 @@
"type": "config",
"dependsOn": [
"[variables('appServiceResourceId')]",
- "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('serviceName')))]",
- "[if(equals(parameters('solutionType'),'FhirServerCosmosDB'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('serviceName'), 'CosmosDb--Host'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('serviceName'), 'SqlServer--ConnectionString'))]"
+ "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName')))]",
+ "[if(equals(parameters('solutionType'),'FhirServerCosmosDB'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('keyVaultName'), 'CosmosDb--Host'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('keyVaultName'), 'SqlServer--ConnectionString'))]"
],
"properties": "[union(variables('combinedFhirServerConfigProperties'), json(concat('{ \"FhirServer__ResourceManager__DataStoreResourceId\": \"', if(equals(parameters('solutionType'),'FhirServerCosmosDB'), resourceId('Microsoft.DocumentDb/databaseAccounts', variables('serviceName')), resourceId('Microsoft.Sql/servers/', variables('sqlServerDerivedName'))), '\", ', if(variables('deployAppInsights'), concat('\"Telemetry__Provider\": \"', parameters('telemetryProviderType'), '\",', '\"Telemetry__InstrumentationKey\": \"', reference(resourceId('Microsoft.Insights/components', variables('appInsightsName'))).InstrumentationKey, '\",', '\"Telemetry__ConnectionString\": \"', reference(resourceId('Microsoft.Insights/components', variables('appInsightsName'))).ConnectionString, '\"'), ''), '}')))]"
},
@@ -407,7 +415,7 @@
"dependsOn": [
"appsettings",
"[variables('appServiceResourceId')]",
- "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('serviceName')))]"
+ "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName')))]"
],
"properties": {
"linuxFxVersion": "[concat('DOCKER|', parameters('registryName'), '/', variables('imageRepositoryName'),':', parameters('imageTag'))]",
@@ -641,7 +649,7 @@
},
{
"type": "Microsoft.KeyVault/vaults",
- "name": "[variables('serviceName')]",
+ "name": "[variables('keyVaultName')]",
"apiVersion": "2022-07-01",
"location": "[resourceGroup().location]",
"tags": {
@@ -676,9 +684,9 @@
{
"type": "Microsoft.KeyVault/vaults/providers/roleAssignments",
"apiVersion": "2022-04-01",
- "name": "[concat(variables('serviceName'), '/Microsoft.Authorization/', guid(uniqueString(variables('keyVaultRoleName'), parameters('fhirVersion'), variables('serviceName'))))]",
+ "name": "[concat(variables('keyVaultName'), '/Microsoft.Authorization/', guid(uniqueString(variables('keyVaultRoleName'), parameters('fhirVersion'), variables('keyVaultName'))))]",
"dependsOn": [
- "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]"
+ "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]"
],
"properties": {
"roleDefinitionId": "[concat(subscription().Id, '/providers/Microsoft.Authorization/roleDefinitions/', 'b86a8fe4-44ce-4948-aee5-eccb2c155cd7')]", // Role definition ID for "Key Vault Secrets Officer"
@@ -688,9 +696,9 @@
{
"type": "Microsoft.KeyVault/vaults/providers/roleAssignments",
"apiVersion": "2022-04-01",
- "name": "[concat(variables('serviceName'), '/Microsoft.Authorization/', guid(uniqueString('Reader', parameters('fhirVersion'), variables('serviceName'))))]",
+ "name": "[concat(variables('keyVaultName'), '/Microsoft.Authorization/', guid(uniqueString('Reader', parameters('fhirVersion'), variables('keyVaultName'))))]",
"dependsOn": [
- "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]"
+ "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]"
],
"properties": {
"roleDefinitionId": "[concat(subscription().Id, '/providers/Microsoft.Authorization/roleDefinitions/', 'b86a8fe4-44ce-4948-aee5-eccb2c155cd7')]", // Role definition ID for "Key Vault Secrets Officer"
@@ -700,28 +708,28 @@
{
"condition": "[equals(parameters('solutionType'),'FhirServerCosmosDB')]",
"type": "Microsoft.KeyVault/vaults/secrets",
- "name": "[concat(variables('serviceName'), '/CosmosDb--Host')]",
+ "name": "[concat(variables('keyVaultName'), '/CosmosDb--Host')]",
"apiVersion": "2015-06-01",
"properties": {
"contentType": "text/plain",
"value": "[if(equals(parameters('solutionType'),'FhirServerCosmosDB'), reference(concat('Microsoft.DocumentDb/databaseAccounts/', variables('serviceName'))).documentEndpoint, '')]"
},
"dependsOn": [
- "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]",
+ "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]",
"[resourceId('Microsoft.DocumentDb/databaseAccounts', variables('serviceName'))]"
]
},
{
"condition": "[equals(parameters('solutionType'),'FhirServerSqlServer')]",
"type": "Microsoft.KeyVault/vaults/secrets",
- "name": "[concat(variables('serviceName'), '/SqlServer--ConnectionString')]",
+ "name": "[concat(variables('keyVaultName'), '/SqlServer--ConnectionString')]",
"apiVersion": "2015-06-01",
"properties": {
"contentType": "text/plain",
"value": "[concat('Server=tcp:', if(equals(parameters('solutionType'),'FhirServerSqlServer'), reference(variables('computedSqlServerReference'), '2015-05-01-preview').fullyQualifiedDomainName, ''),',1433;Initial Catalog=',variables('sqlDatabaseName'),';Persist Security Info=False;Authentication=ActiveDirectoryMSI;MultipleActiveResultSets=False;Encrypt=True;TrustServerCertificate=False;Connection Timeout=30;User Id=', reference(resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', variables('managedIdentityName')), '2018-11-30').clientId, ';')]"
},
"dependsOn": [
- "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]",
+ "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]",
"[resourceId('Microsoft.Sql/servers', variables('sqlServerDerivedName'))]"
]
},
@@ -817,15 +825,15 @@
{
"type": "Microsoft.Network/networkSecurityPerimeters/resourceAssociations",
"apiVersion": "2023-07-01-preview",
- "name": "[concat(variables('networkSecurityPerimeterName'), '/keyvault-', uniqueString(variables('serviceName')))]",
+ "name": "[concat(variables('networkSecurityPerimeterName'), '/keyvault-', uniqueString(variables('keyVaultName')))]",
"dependsOn": [
- "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]",
+ "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]",
"[resourceId('Microsoft.Network/networkSecurityPerimeters', variables('networkSecurityPerimeterName'))]",
"[resourceId('Microsoft.Network/networkSecurityPerimeters/profiles', variables('networkSecurityPerimeterName'), parameters('networkSecurityPerimeterProfileName'))]"
],
"properties": {
"privateLinkResource": {
- "id": "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]"
+ "id": "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]"
},
"profile": {
"id": "[resourceId('Microsoft.Network/networkSecurityPerimeters/profiles', variables('networkSecurityPerimeterName'), parameters('networkSecurityPerimeterProfileName'))]"
diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryFactAttribute.cs b/src/Microsoft.Health.Extensions.Xunit/RetryFactAttribute.cs
new file mode 100644
index 0000000000..60f6e32b2d
--- /dev/null
+++ b/src/Microsoft.Health.Extensions.Xunit/RetryFactAttribute.cs
@@ -0,0 +1,37 @@
+// -------------------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+// -------------------------------------------------------------------------------------------------
+
+using System;
+using Xunit;
+using Xunit.Sdk;
+
+namespace Microsoft.Health.Extensions.Xunit
+{
+ ///
+ /// Attribute that marks a test method to be retried a specified number of times if it fails.
+ /// Useful for handling transient failures in integration and end-to-end tests.
+ ///
+ [XunitTestCaseDiscoverer("Microsoft.Health.Extensions.Xunit.RetryFactDiscoverer", "Microsoft.Health.Extensions.Xunit")]
+ [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)]
+ public sealed class RetryFactAttribute : FactAttribute
+ {
+ ///
+ /// Gets or sets the maximum number of retry attempts (default is 3).
+ ///
+ public int MaxRetries { get; set; } = 3;
+
+ ///
+ /// Gets or sets the delay in milliseconds between retry attempts (default is 5000ms).
+ ///
+ public int DelayBetweenRetriesMs { get; set; } = 5000;
+
+ ///
+ /// Gets or sets whether to retry on assertion failures (XunitException).
+ /// Default is false - assertion failures usually indicate test bugs, not transient issues.
+ /// Set to true for tests that validate eventually-consistent systems (e.g., cache refresh, reindex operations).
+ ///
+ public bool RetryOnAssertionFailure { get; set; } = false;
+ }
+}
diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryFactDiscoverer.cs b/src/Microsoft.Health.Extensions.Xunit/RetryFactDiscoverer.cs
new file mode 100644
index 0000000000..f45ab9690f
--- /dev/null
+++ b/src/Microsoft.Health.Extensions.Xunit/RetryFactDiscoverer.cs
@@ -0,0 +1,54 @@
+// -------------------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+// -------------------------------------------------------------------------------------------------
+
+using System.Collections.Generic;
+using Xunit.Abstractions;
+using Xunit.Sdk;
+
+namespace Microsoft.Health.Extensions.Xunit
+{
+ ///
+ /// Test case discoverer for .
+ ///
+ public class RetryFactDiscoverer : IXunitTestCaseDiscoverer
+ {
+ private readonly IMessageSink _diagnosticMessageSink;
+
+ public RetryFactDiscoverer(IMessageSink diagnosticMessageSink)
+ {
+ _diagnosticMessageSink = diagnosticMessageSink;
+ }
+
+ public IEnumerable Discover(
+ ITestFrameworkDiscoveryOptions discoveryOptions,
+ ITestMethod testMethod,
+ IAttributeInfo factAttribute)
+ {
+ var maxRetries = factAttribute.GetNamedArgument(nameof(RetryFactAttribute.MaxRetries));
+ var delayMs = factAttribute.GetNamedArgument(nameof(RetryFactAttribute.DelayBetweenRetriesMs));
+ var retryOnAssertionFailure = factAttribute.GetNamedArgument(nameof(RetryFactAttribute.RetryOnAssertionFailure));
+
+ // Use default values if not specified
+ if (maxRetries == 0)
+ {
+ maxRetries = 3;
+ }
+
+ if (delayMs == 0)
+ {
+ delayMs = 5000;
+ }
+
+ yield return new RetryTestCase(
+ _diagnosticMessageSink,
+ discoveryOptions.MethodDisplayOrDefault(),
+ discoveryOptions.MethodDisplayOptionsOrDefault(),
+ testMethod,
+ maxRetries,
+ delayMs,
+ retryOnAssertionFailure);
+ }
+ }
+}
diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryTestCase.cs b/src/Microsoft.Health.Extensions.Xunit/RetryTestCase.cs
new file mode 100644
index 0000000000..1afc54c07f
--- /dev/null
+++ b/src/Microsoft.Health.Extensions.Xunit/RetryTestCase.cs
@@ -0,0 +1,316 @@
+// -------------------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+// -------------------------------------------------------------------------------------------------
+
+using System;
+using System.ComponentModel;
+using System.Diagnostics;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit.Abstractions;
+using Xunit.Sdk;
+
+namespace Microsoft.Health.Extensions.Xunit
+{
+ ///
+ /// Test case that implements retry logic.
+ ///
+ public class RetryTestCase : XunitTestCase
+ {
+ private int _maxRetries;
+ private int _delayMs;
+ private bool _retryOnAssertionFailure;
+
+ [EditorBrowsable(EditorBrowsableState.Never)]
+ [Obsolete("Called by the de-serializer; should only be called by deriving classes for de-serialization purposes")]
+ public RetryTestCase()
+ {
+ }
+
+ public RetryTestCase(
+ IMessageSink diagnosticMessageSink,
+ TestMethodDisplay defaultMethodDisplay,
+ TestMethodDisplayOptions defaultMethodDisplayOptions,
+ ITestMethod testMethod,
+ int maxRetries,
+ int delayMs,
+ bool retryOnAssertionFailure = false,
+ object[] testMethodArguments = null)
+ : base(diagnosticMessageSink, defaultMethodDisplay, defaultMethodDisplayOptions, testMethod, testMethodArguments)
+ {
+ _maxRetries = maxRetries;
+ _delayMs = delayMs;
+ _retryOnAssertionFailure = retryOnAssertionFailure;
+ }
+
+ public override async Task RunAsync(
+ IMessageSink diagnosticMessageSink,
+ IMessageBus messageBus,
+ object[] constructorArguments,
+ ExceptionAggregator aggregator,
+ CancellationTokenSource cancellationTokenSource)
+ {
+ // Use System.Diagnostics.Trace for ADO visibility
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact starting test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' with MaxRetries={_maxRetries}, DelayMs={_delayMs}, RetryOnAssertionFailure={_retryOnAssertionFailure}");
+
+ var runSummary = new RunSummary { Total = 1 };
+ Exception lastException = null;
+
+ for (int attempt = 1; attempt <= _maxRetries; attempt++)
+ {
+ var isLastAttempt = attempt == _maxRetries;
+
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact attempt {attempt}/{_maxRetries} for test '{TestMethod.Method.Name}'");
+
+ // Create a fresh aggregator for each attempt
+ var attemptAggregator = new ExceptionAggregator();
+
+ // Only intercept failure messages on non-final attempts
+ // On the final attempt, let everything go through (both success and failure)
+ IMessageBus busToUse;
+ FailureInterceptingMessageBus interceptingBus = null;
+
+ if (isLastAttempt)
+ {
+ busToUse = messageBus;
+ }
+ else
+ {
+ interceptingBus = new FailureInterceptingMessageBus(messageBus);
+ busToUse = interceptingBus;
+ }
+
+ try
+ {
+ var summary = await base.RunAsync(
+ diagnosticMessageSink,
+ busToUse,
+ constructorArguments,
+ attemptAggregator,
+ cancellationTokenSource);
+
+ runSummary.Time = summary.Time;
+
+ if (summary.Failed == 0)
+ {
+ // Test passed - success message already went through to Test Explorer
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' passed on attempt {attempt}/{_maxRetries}");
+ diagnosticMessageSink.OnMessage(
+ new DiagnosticMessage($"[RetryFact] Test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' passed on attempt {attempt}/{_maxRetries}"));
+
+ runSummary.Failed = 0;
+ return runSummary;
+ }
+
+ // Test failed on this attempt
+ lastException = attemptAggregator.ToException();
+
+ // If no exception was captured but test failed, create an exception using captured failure details
+ if (lastException == null && summary.Failed > 0)
+ {
+ string failureMsg = interceptingBus?.LastFailureMessage ?? "Test failed but no exception was captured.";
+ string stackTrace = interceptingBus?.LastFailureStackTrace;
+ bool isAssertionFailure = interceptingBus?.IsAssertionFailure ?? false;
+
+ string fullMessage = failureMsg +
+ (stackTrace != null ? Environment.NewLine + "Stack Trace:" + Environment.NewLine + stackTrace : string.Empty);
+
+ // If this is an assertion failure (based on exception types), create an XunitException
+ // so that RetryOnAssertionFailure logic works correctly
+ if (isAssertionFailure)
+ {
+ lastException = new XunitException(fullMessage);
+ }
+ else
+ {
+ lastException = new InvalidOperationException(fullMessage);
+ }
+
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact: Test failed but exception is null, created placeholder exception (IsAssertion={isAssertionFailure})");
+ }
+
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact test failed on attempt {attempt} with exception type: {lastException?.GetType().FullName ?? "null"}, Message: {lastException?.Message ?? "null"}");
+
+ if (!isLastAttempt)
+ {
+ // Check if we should retry this exception (now handles null)
+ var shouldRetry = ShouldRetry(lastException);
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact ShouldRetry={shouldRetry} for exception type {lastException?.GetType().FullName ?? "null"}");
+
+ if (!shouldRetry)
+ {
+ Trace.WriteLine($"##vso[task.logissue type=warning]Test '{TestMethod.Method.Name}' failed with non-retriable exception. Skipping retries.");
+ diagnosticMessageSink.OnMessage(
+ new DiagnosticMessage($"[RetryFact] Test '{TestMethod.Method.Name}' failed with non-retriable exception. Skipping retries."));
+
+ if (lastException != null)
+ {
+ aggregator.Add(lastException);
+ }
+
+ runSummary.Failed = 1;
+ return runSummary;
+ }
+
+ // Not the last attempt - the failure was intercepted, so retry
+ Trace.WriteLine($"##vso[task.logissue type=warning]Test '{TestMethod.Method.Name}' failed on attempt {attempt}/{_maxRetries}, will retry after {_delayMs}ms");
+ diagnosticMessageSink.OnMessage(
+ new DiagnosticMessage($"[RetryFact] Test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' failed on attempt {attempt}/{_maxRetries}. Retrying after {_delayMs}ms delay. Error: {lastException?.Message ?? "No exception message"}"));
+
+ await Task.Delay(_delayMs);
+ }
+ else
+ {
+ // Last attempt - failure message already went through to Test Explorer
+ Trace.WriteLine($"##vso[task.logissue type=error]Test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' failed after {_maxRetries} attempts. Final error: {lastException?.Message ?? "No exception captured"}");
+ diagnosticMessageSink.OnMessage(
+ new DiagnosticMessage($"[RetryFact] Test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' failed after {_maxRetries} attempts. Last exception: {lastException?.Message ?? "No exception message"}"));
+
+ if (lastException != null)
+ {
+ aggregator.Add(lastException);
+ }
+ else if (summary.Failed > 0)
+ {
+ // Add an exception with captured failure details if test failed but no exception was captured
+ aggregator.Add(new InvalidOperationException($"Test failed after {_maxRetries} attempts but no exception was captured"));
+ }
+
+ runSummary.Failed = 1;
+ return runSummary;
+ }
+ }
+ catch (Exception ex)
+ {
+ Trace.WriteLine($"##vso[task.logissue type=error]RetryFact unexpected exception: {ex.GetType().FullName}: {ex.Message}");
+ throw;
+ }
+ finally
+ {
+ // Dispose the intercepting bus if we created one
+ interceptingBus?.Dispose();
+ }
+ }
+
+ // Should never reach here
+ Trace.WriteLine($"##vso[task.logissue type=error]RetryFact WARNING: Reached end of retry loop unexpectedly");
+ runSummary.Failed = 1;
+ return runSummary;
+ }
+
+ public override void Serialize(IXunitSerializationInfo data)
+ {
+ base.Serialize(data);
+ data.AddValue(nameof(_maxRetries), _maxRetries);
+ data.AddValue(nameof(_delayMs), _delayMs);
+ data.AddValue(nameof(_retryOnAssertionFailure), _retryOnAssertionFailure);
+ }
+
+ public override void Deserialize(IXunitSerializationInfo data)
+ {
+ base.Deserialize(data);
+ _maxRetries = data.GetValue(nameof(_maxRetries));
+ _delayMs = data.GetValue(nameof(_delayMs));
+ _retryOnAssertionFailure = data.GetValue(nameof(_retryOnAssertionFailure));
+ }
+
+ ///
+ /// Determines if an exception should trigger a retry.
+ ///
+ private bool ShouldRetry(Exception ex)
+ {
+ // If exception is null, we should retry (something went wrong with exception capture)
+ if (ex == null)
+ {
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact: Exception is null, will retry");
+ return true; // Retry when we can't determine the exception type
+ }
+
+ // Unwrap aggregate exceptions
+ while (ex is AggregateException aggEx && aggEx.InnerExceptions.Count == 1)
+ {
+ ex = aggEx.InnerException;
+ }
+
+ // Don't retry assertion failures unless explicitly configured
+ if (ex is XunitException)
+ {
+ if (!_retryOnAssertionFailure)
+ {
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact: Not retrying XunitException because _retryOnAssertionFailure is false");
+ return false;
+ }
+ else
+ {
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact: Retrying XunitException because _retryOnAssertionFailure is true");
+ return true;
+ }
+ }
+
+ // Retry everything else (network, timeout, SQL transient, etc.)
+ Trace.WriteLine($"##vso[task.logdetail]RetryFact: Retrying non-assertion exception of type {ex.GetType().FullName}");
+ return true;
+ }
+
+ ///
+ /// Message bus that intercepts ONLY failure messages (ITestFailed).
+ /// Used on non-final retry attempts to suppress intermediate failures.
+ /// Success messages and all other messages always pass through.
+ /// Also captures failure details (messages and stack traces) for diagnostic purposes.
+ ///
+ private class FailureInterceptingMessageBus : IMessageBus
+ {
+ private readonly IMessageBus _innerBus;
+
+ public FailureInterceptingMessageBus(IMessageBus innerBus)
+ {
+ _innerBus = innerBus;
+ }
+
+ public string LastFailureMessage { get; private set; }
+
+ public string LastFailureStackTrace { get; private set; }
+
+ public bool IsAssertionFailure { get; private set; }
+
+ public bool QueueMessage(IMessageSinkMessage message)
+ {
+ // Intercept ONLY failure messages - suppress them for non-final attempts
+ if (message is ITestFailed failed)
+ {
+ // Capture failure details for diagnostics
+ LastFailureMessage = failed.Messages != null && failed.Messages.Length > 0
+ ? string.Join(Environment.NewLine, failed.Messages)
+ : failed.ExceptionTypes != null && failed.ExceptionTypes.Length > 0
+ ? string.Join(", ", failed.ExceptionTypes)
+ : "Unknown failure";
+
+ LastFailureStackTrace = failed.StackTraces != null && failed.StackTraces.Length > 0
+ ? string.Join(Environment.NewLine, failed.StackTraces)
+ : null;
+
+ // Detect if this is an assertion failure by checking exception types
+ // XUnit assertion exceptions typically have types containing "Xunit" or "Assert"
+ IsAssertionFailure = failed.ExceptionTypes != null &&
+ failed.ExceptionTypes.Length > 0 &&
+ (failed.ExceptionTypes[0].Contains("Xunit", StringComparison.Ordinal) ||
+ failed.ExceptionTypes[0].Contains("Assert", StringComparison.Ordinal) ||
+ failed.ExceptionTypes[0].Contains("EqualException", StringComparison.Ordinal) ||
+ failed.ExceptionTypes[0].Contains("TrueException", StringComparison.Ordinal) ||
+ failed.ExceptionTypes[0].Contains("FalseException", StringComparison.Ordinal));
+
+ return true; // Swallow the failure - we're going to retry
+ }
+
+ // All other messages (ITestPassed, ITestStarting, ITestFinished, etc.) pass through
+ return _innerBus.QueueMessage(message);
+ }
+
+ public void Dispose()
+ {
+ // Don't dispose the inner bus - it's owned by the caller
+ }
+ }
+ }
+}
diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryTheoryAttribute.cs b/src/Microsoft.Health.Extensions.Xunit/RetryTheoryAttribute.cs
new file mode 100644
index 0000000000..88a65b6b54
--- /dev/null
+++ b/src/Microsoft.Health.Extensions.Xunit/RetryTheoryAttribute.cs
@@ -0,0 +1,37 @@
+// -------------------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+// -------------------------------------------------------------------------------------------------
+
+using System;
+using Xunit;
+using Xunit.Sdk;
+
+namespace Microsoft.Health.Extensions.Xunit
+{
+ ///
+ /// Attribute that marks a theory method to be retried a specified number of times if it fails.
+ /// Useful for handling transient failures in integration and end-to-end tests with parameterized data.
+ ///
+ [XunitTestCaseDiscoverer("Microsoft.Health.Extensions.Xunit.RetryTheoryDiscoverer", "Microsoft.Health.Extensions.Xunit")]
+ [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)]
+ public sealed class RetryTheoryAttribute : TheoryAttribute
+ {
+ ///
+ /// Gets or sets the maximum number of retry attempts (default is 3).
+ ///
+ public int MaxRetries { get; set; } = 3;
+
+ ///
+ /// Gets or sets the delay in milliseconds between retry attempts (default is 5000ms).
+ ///
+ public int DelayBetweenRetriesMs { get; set; } = 5000;
+
+ ///
+ /// Gets or sets whether to retry on assertion failures (XunitException).
+ /// Default is false - assertion failures usually indicate test bugs, not transient issues.
+ /// Set to true for tests that validate eventually-consistent systems (e.g., cache refresh, reindex operations).
+ ///
+ public bool RetryOnAssertionFailure { get; set; } = false;
+ }
+}
diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryTheoryDiscoverer.cs b/src/Microsoft.Health.Extensions.Xunit/RetryTheoryDiscoverer.cs
new file mode 100644
index 0000000000..9539e3c1db
--- /dev/null
+++ b/src/Microsoft.Health.Extensions.Xunit/RetryTheoryDiscoverer.cs
@@ -0,0 +1,91 @@
+// -------------------------------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
+// -------------------------------------------------------------------------------------------------
+
+using System.Collections.Generic;
+using System.Linq;
+using Xunit.Abstractions;
+using Xunit.Sdk;
+
+namespace Microsoft.Health.Extensions.Xunit
+{
+ ///
+ /// Test case discoverer for .
+ /// For Theory tests, we need to let xUnit discover the data-driven test cases first,
+ /// then wrap each one with retry logic.
+ ///
+ public class RetryTheoryDiscoverer : TheoryDiscoverer
+ {
+ public RetryTheoryDiscoverer(IMessageSink diagnosticMessageSink)
+ : base(diagnosticMessageSink)
+ {
+ }
+
+ protected override IEnumerable CreateTestCasesForDataRow(
+ ITestFrameworkDiscoveryOptions discoveryOptions,
+ ITestMethod testMethod,
+ IAttributeInfo theoryAttribute,
+ object[] dataRow)
+ {
+ var maxRetries = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.MaxRetries));
+ var delayMs = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.DelayBetweenRetriesMs));
+ var retryOnAssertionFailure = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.RetryOnAssertionFailure));
+
+ // Use default values if not specified
+ if (maxRetries == 0)
+ {
+ maxRetries = 3;
+ }
+
+ if (delayMs == 0)
+ {
+ delayMs = 5000;
+ }
+
+ // Create a RetryTestCase for each data row
+ yield return new RetryTestCase(
+ DiagnosticMessageSink,
+ discoveryOptions.MethodDisplayOrDefault(),
+ discoveryOptions.MethodDisplayOptionsOrDefault(),
+ testMethod,
+ maxRetries,
+ delayMs,
+ retryOnAssertionFailure,
+ dataRow);
+ }
+
+ protected override IEnumerable CreateTestCasesForTheory(
+ ITestFrameworkDiscoveryOptions discoveryOptions,
+ ITestMethod testMethod,
+ IAttributeInfo theoryAttribute)
+ {
+ var maxRetries = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.MaxRetries));
+ var delayMs = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.DelayBetweenRetriesMs));
+ var retryOnAssertionFailure = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.RetryOnAssertionFailure));
+
+ // Use default values if not specified
+ if (maxRetries == 0)
+ {
+ maxRetries = 3;
+ }
+
+ if (delayMs == 0)
+ {
+ delayMs = 5000;
+ }
+
+ // For theories without data (will be skipped), wrap in retry
+ return base.CreateTestCasesForTheory(discoveryOptions, testMethod, theoryAttribute)
+ .Select(testCase => new RetryTestCase(
+ DiagnosticMessageSink,
+ discoveryOptions.MethodDisplayOrDefault(),
+ discoveryOptions.MethodDisplayOptionsOrDefault(),
+ testMethod,
+ maxRetries,
+ delayMs,
+ retryOnAssertionFailure,
+ testCase.TestMethodArguments));
+ }
+ }
+}
diff --git a/src/Microsoft.Health.Fhir.Core.UnitTests/Features/Search/Registry/SearchParameterCacheRefreshBackgroundServiceTests.cs b/src/Microsoft.Health.Fhir.Core.UnitTests/Features/Search/Registry/SearchParameterCacheRefreshBackgroundServiceTests.cs
index 709e9b47a3..6c273679a6 100644
--- a/src/Microsoft.Health.Fhir.Core.UnitTests/Features/Search/Registry/SearchParameterCacheRefreshBackgroundServiceTests.cs
+++ b/src/Microsoft.Health.Fhir.Core.UnitTests/Features/Search/Registry/SearchParameterCacheRefreshBackgroundServiceTests.cs
@@ -36,7 +36,8 @@ public SearchParameterCacheRefreshBackgroundServiceTests()
_coreFeatureConfiguration = Substitute.For>();
_coreFeatureConfiguration.Value.Returns(new CoreFeatureConfiguration
{
- SearchParameterCacheRefreshIntervalSeconds = 60,
+ SearchParameterCacheRefreshIntervalSeconds = 1,
+ SearchParameterCacheRefreshMaxInitialDelaySeconds = 0, // No delay for tests
});
_service = new SearchParameterCacheRefreshBackgroundService(
@@ -320,8 +321,9 @@ public async Task OnRefreshTimer_WhenOperationCanceled_ShouldHandleGracefully()
// Act - Initialize and let timer run
await service.Handle(new SearchParametersInitializedNotification(), CancellationToken.None);
- // Wait for timer to fire and handle the exception
- await Task.Delay(200);
+ // Wait longer for timer to fire and handle the exception - give it up to 2 seconds
+ // The timer starts immediately (TimeSpan.Zero) when Handle is called
+ await Task.Delay(2000);
// Assert - Verify that OperationCanceledException was handled and logged appropriately
mockLogger.Received().Log(
diff --git a/src/Microsoft.Health.Fhir.Core/Configs/CoreFeatureConfiguration.cs b/src/Microsoft.Health.Fhir.Core/Configs/CoreFeatureConfiguration.cs
index 581f29761c..e67091f37a 100644
--- a/src/Microsoft.Health.Fhir.Core/Configs/CoreFeatureConfiguration.cs
+++ b/src/Microsoft.Health.Fhir.Core/Configs/CoreFeatureConfiguration.cs
@@ -106,5 +106,12 @@ public class CoreFeatureConfiguration
/// SearchParameter cache synchronized across instances. Default is 60 seconds if not specified.
///
public int SearchParameterCacheRefreshIntervalSeconds { get; set; } = 60;
+
+ ///
+ /// Gets or sets the maximum initial delay in seconds for the SearchParameter cache background service timer.
+ /// This random delay (0 to this value) staggers timer startup across instances to prevent thundering herd.
+ /// Default is 15 seconds. Set to 0 to disable the delay (useful for testing).
+ ///
+ public int SearchParameterCacheRefreshMaxInitialDelaySeconds { get; set; } = 15;
}
}
diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/FhirOperationDataStoreBase.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/FhirOperationDataStoreBase.cs
index 45d812f85c..c84a99f4fd 100644
--- a/src/Microsoft.Health.Fhir.Core/Features/Operations/FhirOperationDataStoreBase.cs
+++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/FhirOperationDataStoreBase.cs
@@ -376,7 +376,7 @@ record = JsonConvert.DeserializeObject(jobInfo.Definition);
errorMessage += error.Diagnostics + " ";
}
- if (error.Severity == OperationOutcomeConstants.IssueSeverity.Error && !inFlightJobsExist)
+ if (error.Severity == OperationOutcomeConstants.IssueSeverity.Error && !inFlightJobsExist && !cancelledJobsExist)
{
status = JobStatus.Failed;
}
@@ -393,6 +393,12 @@ record = JsonConvert.DeserializeObject(jobInfo.Definition);
record.FailureDetails = new JobFailureDetails(Core.Resources.ReindexFailedWithUnknownError, HttpStatusCode.InternalServerError);
}
+ // To prevent sending consumer status that is wrong if there are still running prcessing jobs that were enqueued during race condition.
+ if (inFlightJobsExist && status != JobStatus.Running)
+ {
+ status = JobStatus.Running;
+ }
+
switch (status)
{
case JobStatus.Created:
diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/GetReindexRequestHandler.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/GetReindexRequestHandler.cs
index 43569f7a35..86693e5635 100644
--- a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/GetReindexRequestHandler.cs
+++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/GetReindexRequestHandler.cs
@@ -47,10 +47,22 @@ public async Task Handle(GetReindexRequest request, Cancella
}
else
{
- return await GetListOfReindexJobs();
+ return await GetActiveReindexJob(cancellationToken);
}
}
+ private async Task GetActiveReindexJob(CancellationToken cancellationToken)
+ {
+ var reindexJobId = await _fhirOperationDataStore.CheckActiveReindexJobsAsync(cancellationToken);
+
+ if (reindexJobId.found && reindexJobId.id != null)
+ {
+ return await GetSingleReindexJobAsync(reindexJobId.id, cancellationToken);
+ }
+
+ return await GetListOfReindexJobs();
+ }
+
private static Task GetListOfReindexJobs()
{
// TODO: build list of reindex jobs
diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/Models/ReindexJobRecord.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/Models/ReindexJobRecord.cs
index 555e085e72..79d1b17a1f 100644
--- a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/Models/ReindexJobRecord.cs
+++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/Models/ReindexJobRecord.cs
@@ -107,7 +107,7 @@ protected ReindexJobRecord()
public long Progress { get; set; }
[JsonProperty(JobRecordProperties.ResourceTypeSearchParameterHashMap)]
- public IReadOnlyDictionary ResourceTypeSearchParameterHashMap { get; private set; }
+ public IReadOnlyDictionary ResourceTypeSearchParameterHashMap { get; internal set; }
[JsonProperty(JobRecordProperties.LastModified)]
public DateTimeOffset LastModified { get; set; }
diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexOrchestratorJob.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexOrchestratorJob.cs
index f7d0f4171f..a3417382af 100644
--- a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexOrchestratorJob.cs
+++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexOrchestratorJob.cs
@@ -11,6 +11,7 @@
using System.Threading;
using System.Threading.Tasks;
using EnsureThat;
+using Hl7.Fhir.Model;
using Microsoft.AspNetCore.JsonPatch.Internal;
using Microsoft.Data.SqlClient;
using Microsoft.Extensions.Logging;
@@ -111,25 +112,32 @@ public async Task ExecuteAsync(JobInfo jobInfo, CancellationToken cancel
try
{
- // Wait for the configured SearchParameterCacheRefreshIntervalSeconds before processing
- var delaySeconds = Math.Max(1, _coreFeatureConfiguration.SearchParameterCacheRefreshIntervalSeconds);
- var delayMultiplier = Math.Max(1, _operationsConfiguration.Reindex.ReindexDelayMultiplier);
- _logger.LogInformation("Reindex job with Id: {Id} waiting for {DelaySeconds} second(s) before processing as configured by SearchParameterCacheRefreshIntervalSeconds and ReindexDelayMultiplier.", _jobInfo.Id, delaySeconds * delayMultiplier);
+ await Task.Delay(1000, cancellationToken);
- await Task.Delay(TimeSpan.FromSeconds(delaySeconds) * delayMultiplier, cancellationToken);
+ if (cancellationToken.IsCancellationRequested || _jobInfo.CancelRequested)
+ {
+ throw new OperationCanceledException("Reindex operation cancelled by customer.");
+ }
+
+ // Attempt to get and apply the latest search parameter updates
+ await RefreshSearchParameterCache(cancellationToken);
_reindexJobRecord.Status = OperationStatus.Running;
_jobInfo.Status = JobStatus.Running;
_logger.LogInformation("Reindex job with Id: {Id} has been started. Status: {Status}.", _jobInfo.Id, _reindexJobRecord.Status);
- await CreateReindexProcessingJobsAsync(cancellationToken);
-
var jobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, _jobInfo.GroupId, true, cancellationToken);
+ var queryReindexProcessingJobs = jobs.Where(j => j.Id != _jobInfo.GroupId).ToList();
- // Get only ProcessingJobs.
- var queryProcessingJobs = jobs.Where(j => j.Id != _jobInfo.GroupId).ToList();
+ // Only create jobs if we don't have any at this point.
+ if (!queryReindexProcessingJobs.Any())
+ {
+ await CreateReindexProcessingJobsAsync(cancellationToken);
+ jobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, _jobInfo.GroupId, true, cancellationToken);
+ queryReindexProcessingJobs = jobs.Where(j => j.Id != _jobInfo.GroupId).ToList();
+ }
- if (!queryProcessingJobs.Any())
+ if (!queryReindexProcessingJobs.Any())
{
// Nothing to process so we are done.
AddErrorResult(OperationOutcomeConstants.IssueSeverity.Information, OperationOutcomeConstants.IssueType.Informational, Core.Resources.ReindexingNothingToProcess);
@@ -137,12 +145,11 @@ public async Task ExecuteAsync(JobInfo jobInfo, CancellationToken cancel
return JsonConvert.SerializeObject(_currentResult);
}
- _currentResult.CreatedJobs = queryProcessingJobs.Count;
+ _currentResult.CreatedJobs = queryReindexProcessingJobs.Count;
- if (queryProcessingJobs.Any())
- {
- await CheckForCompletionAsync(queryProcessingJobs, cancellationToken);
- }
+ await CheckForCompletionAsync(queryReindexProcessingJobs, cancellationToken);
+
+ await RefreshSearchParameterCache(cancellationToken);
}
catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested)
{
@@ -162,6 +169,30 @@ public async Task ExecuteAsync(JobInfo jobInfo, CancellationToken cancel
return JsonConvert.SerializeObject(_currentResult);
}
+ private async Task RefreshSearchParameterCache(CancellationToken cancellationToken)
+ {
+ try
+ {
+ _logger.LogJobInformation(_jobInfo, "Performing full SearchParameter database refresh and hash recalculation for reindex job.");
+
+ // Use the enhanced method with forceFullRefresh flag
+ await _searchParameterOperations.GetAndApplySearchParameterUpdates(cancellationToken, forceFullRefresh: true);
+
+ // Update the reindex job record with the latest hash map
+ _reindexJobRecord.ResourceTypeSearchParameterHashMap = _searchParameterDefinitionManager.SearchParameterHashMap;
+
+ _logger.LogJobInformation(
+ _jobInfo,
+ "Completed full SearchParameter refresh. Hash map updated with {ResourceTypeCount} resource types.",
+ _reindexJobRecord.ResourceTypeSearchParameterHashMap.Count);
+ }
+ catch (Exception ex)
+ {
+ _logger.LogJobError(ex, _jobInfo, "Failed to refresh SearchParameter cache.");
+ throw;
+ }
+ }
+
private async Task> CreateReindexProcessingJobsAsync(CancellationToken cancellationToken)
{
// Build queries based on new search params
@@ -169,16 +200,12 @@ private async Task> CreateReindexProcessingJobsAsync(Cancell
List validStatus = new List() { SearchParameterStatus.Supported, SearchParameterStatus.PendingDelete, SearchParameterStatus.PendingDisable };
_initialSearchParamStatusCollection = await _searchParameterStatusManager.GetAllSearchParameterStatus(cancellationToken);
- // Create a dictionary for efficient O(1) lookups by URI
- var searchParamStatusByUri = _initialSearchParamStatusCollection.ToDictionary(
- s => s.Uri.ToString(),
- s => s.Status,
- StringComparer.OrdinalIgnoreCase);
-
- var validUris = searchParamStatusByUri
- .Where(s => validStatus.Contains(s.Value))
- .Select(s => s.Key)
- .ToHashSet(StringComparer.OrdinalIgnoreCase);
+ // Get all URIs that have at least one entry with a valid status
+ // This handles case-variant duplicates naturally
+ var validUris = _initialSearchParamStatusCollection
+ .Where(s => validStatus.Contains(s.Status))
+ .Select(s => s.Uri.ToString())
+ .ToHashSet();
// Filter to only those search parameters with valid status
var possibleNotYetIndexedParams = _searchParameterDefinitionManager.AllSearchParameters
@@ -252,6 +279,42 @@ private async Task> CreateReindexProcessingJobsAsync(Cancell
.Select(kvp => kvp.Key)
.ToList();
+ // Confirm counts for range by not ignoring hash this time incase it's 0
+ // Because we ignore hash to get full range set for initial count, we need to double-check counts here
+ foreach (var resourceCount in _reindexJobRecord.ResourceCounts)
+ {
+ var resourceType = resourceCount.Key;
+ var resourceCountValue = resourceCount.Value;
+ var startResourceSurrogateId = resourceCountValue.StartResourceSurrogateId;
+ var endResourceSurrogateId = resourceCountValue.EndResourceSurrogateId;
+ var count = resourceCountValue.Count;
+
+ var queryForCount = new ReindexJobQueryStatus(resourceType, continuationToken: null)
+ {
+ LastModified = Clock.UtcNow,
+ Status = OperationStatus.Queued,
+ StartResourceSurrogateId = startResourceSurrogateId,
+ EndResourceSurrogateId = endResourceSurrogateId,
+ };
+
+ SearchResult countOnlyResults = await GetResourceCountForQueryAsync(queryForCount, countOnly: true, false, _cancellationToken);
+
+ // Check if the result has no records and add to zero-count list
+ if (countOnlyResults?.TotalCount == 0)
+ {
+ if (!resourceTypesWithZeroCount.Contains(resourceType))
+ {
+ resourceTypesWithZeroCount.Add(resourceType);
+
+ // subtract this count from JobRecordCount
+ _reindexJobRecord.Count -= resourceCountValue.Count;
+
+ // Update the ResourceCounts entry to reflect zero count
+ resourceCountValue.Count = 0;
+ }
+ }
+ }
+
if (resourceTypesWithZeroCount.Any())
{
_logger.LogJobInformation(
@@ -285,6 +348,9 @@ private async Task> CreateReindexProcessingJobsAsync(Cancell
// Update the SearchParameterStatus to Enabled so they can be used once data is loaded
await UpdateSearchParameterStatus(null, zeroCountParams.Select(p => p.Url.ToString()).ToList(), cancellationToken);
+ // Attempt to get and apply the latest search parameter updates
+ await RefreshSearchParameterCache(cancellationToken);
+
_logger.LogJobInformation(
_jobInfo,
"Successfully updated status for {Count} search parameter(s) with zero-count resource types",
@@ -340,6 +406,11 @@ private void AddErrorResult(string severity, string issueType, string message)
private async Task> EnqueueQueryProcessingJobsAsync(CancellationToken cancellationToken)
{
+ if (cancellationToken.IsCancellationRequested || _jobInfo.CancelRequested)
+ {
+ throw new OperationCanceledException("Reindex operation cancelled by customer.");
+ }
+
var resourcesPerJob = (int)_reindexJobRecord.MaximumNumberOfResourcesPerQuery;
var definitions = new List();
@@ -545,7 +616,7 @@ private async Task GetResourceCountForQueryAsync(ReindexJobQuerySt
}
string searchParameterHash = string.Empty;
- _reindexJobRecord.ResourceTypeSearchParameterHashMap.TryGetValue(queryStatus.ResourceType, out searchParameterHash);
+ searchParameterHash = GetHashMapByResourceType(queryStatus.ResourceType);
// Ensure searchParameterHash is never null - for Cosmos DB scenarios, this will be empty string
searchParameterHash ??= string.Empty;
@@ -672,7 +743,7 @@ private string GetHashMapByResourceType(string resourceType)
private bool CheckJobRecordForAnyWork()
{
- return _reindexJobRecord.Count > 0 || _reindexJobRecord.ResourceCounts.Any(e => e.Value.Count <= 0 && e.Value.StartResourceSurrogateId > 0);
+ return _reindexJobRecord.Count > 0 || _reindexJobRecord.ResourceCounts.Any(e => e.Value.Count > 0 && e.Value.StartResourceSurrogateId > 0);
}
private void LogReindexJobRecordErrorMessage()
@@ -792,7 +863,15 @@ private async Task CheckForCompletionAsync(List jobInfos, CancellationT
// Send heartbeat less frequently when stable
if (unchangedCount <= MAX_UNCHANGED_CYCLES)
{
- await _queueClient.PutJobHeartbeatAsync(_jobInfo, cancellationToken);
+ try
+ {
+ await _queueClient.PutJobHeartbeatAsync(_jobInfo, cancellationToken);
+ }
+ catch (JobConflictException ex)
+ {
+ // Log but don't fail - heartbeat conflicts are acceptable
+ _logger.LogJobWarning(ex, _jobInfo, "Heartbeat conflict - another worker updated the job");
+ }
}
}
catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested)
diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexProcessingJob.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexProcessingJob.cs
index a4a17d1f4a..da48e2c9b0 100644
--- a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexProcessingJob.cs
+++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexProcessingJob.cs
@@ -4,10 +4,12 @@
// -------------------------------------------------------------------------------------------------
using System;
+using System.ClientModel;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Security.Cryptography;
+using System.Text;
using System.Threading;
using System.Threading.Tasks;
using EnsureThat;
@@ -16,6 +18,7 @@
using Microsoft.Health.Extensions.DependencyInjection;
using Microsoft.Health.Fhir.Core.Exceptions;
using Microsoft.Health.Fhir.Core.Extensions;
+using Microsoft.Health.Fhir.Core.Features.Operations.Reindex.Models;
using Microsoft.Health.Fhir.Core.Features.Persistence;
using Microsoft.Health.Fhir.Core.Features.Search;
using Microsoft.Health.Fhir.Core.Features.Search.Parameters;
@@ -71,14 +74,14 @@ public async Task ExecuteAsync(JobInfo jobInfo, CancellationToken cancel
_reindexProcessingJobDefinition = DeserializeJobDefinition(jobInfo);
_reindexProcessingJobResult = new ReindexProcessingJobResult();
- ValidateSearchParametersHash(_jobInfo, _reindexProcessingJobDefinition, cancellationToken);
+ await ValidateSearchParametersHashAsync(_jobInfo, _reindexProcessingJobDefinition, cancellationToken);
await ProcessQueryAsync(cancellationToken);
return JsonConvert.SerializeObject(_reindexProcessingJobResult);
}
- private void ValidateSearchParametersHash(JobInfo jobInfo, ReindexProcessingJobDefinition jobDefinition, CancellationToken cancellationToken)
+ private async Task ValidateSearchParametersHashAsync(JobInfo jobInfo, ReindexProcessingJobDefinition jobDefinition, CancellationToken cancellationToken)
{
string currentResourceTypeHash = _searchParameterOperations.GetResourceTypeSearchParameterHashMap(jobDefinition.ResourceType);
@@ -87,34 +90,62 @@ private void ValidateSearchParametersHash(JobInfo jobInfo, ReindexProcessingJobD
if (string.IsNullOrEmpty(currentResourceTypeHash) || !string.Equals(currentResourceTypeHash, jobDefinition.ResourceTypeSearchParameterHashMap, StringComparison.Ordinal))
{
- _logger.LogJobError(
+ _logger.LogJobWarning(
jobInfo,
- "Search parameters for resource type {ResourceType} have changed since the Reindex Job was created. Job definition hash: '{CurrentHash}', In-memory hash: '{JobHash}'.",
+ "Search parameters for resource type {ResourceType} have changed since the Reindex Job was created. Job definition hash: '{JobHash}', In-memory hash: '{CurrentHash}'. Attempting to refresh search parameters.",
jobDefinition.ResourceType,
jobDefinition.ResourceTypeSearchParameterHashMap,
currentResourceTypeHash);
- string message = "Search Parameter hash does not match. Resubmit reindex job to try again.";
+ // Attempt to get and apply the latest search parameter updates
+ await _searchParameterOperations.GetAndApplySearchParameterUpdates(cancellationToken, forceFullRefresh: true);
- // Create error object to provide structured error information
- var errorObject = new
+ // Re-check the hash after updating
+ currentResourceTypeHash = _searchParameterOperations.GetResourceTypeSearchParameterHashMap(jobDefinition.ResourceType);
+
+ // If the hash still doesn't match after the update, fail the job
+ if (string.IsNullOrEmpty(currentResourceTypeHash) || !string.Equals(currentResourceTypeHash, jobDefinition.ResourceTypeSearchParameterHashMap, StringComparison.Ordinal))
{
- message = message,
- resourceType = jobDefinition.ResourceType,
- jobDefinitionHash = jobDefinition.ResourceTypeSearchParameterHashMap,
- currentHash = currentResourceTypeHash,
- jobId = jobInfo.Id,
- groupId = jobInfo.GroupId,
- };
+ _logger.LogJobError(
+ jobInfo,
+ "Search parameters for resource type {ResourceType} still do not match after refresh. Job definition hash: '{JobHash}', In-memory hash: '{CurrentHash}'.",
+ jobDefinition.ResourceType,
+ jobDefinition.ResourceTypeSearchParameterHashMap,
+ currentResourceTypeHash);
+
+ string message = "Search Parameter hash does not match. Resubmit reindex job to try again.";
+
+ // Create error object to provide structured error information
+ var errorObject = new
+ {
+ message = message,
+ resourceType = jobDefinition.ResourceType,
+ jobDefinitionHash = jobDefinition.ResourceTypeSearchParameterHashMap,
+ currentHash = currentResourceTypeHash,
+ jobId = jobInfo.Id,
+ groupId = jobInfo.GroupId,
+ };
- _reindexProcessingJobResult.FailedResourceCount = jobDefinition.ResourceCount.Count;
+ _reindexProcessingJobResult.FailedResourceCount = jobDefinition.ResourceCount.Count;
- throw new ReindexProcessingJobSoftException(message, errorObject, isCustomerCaused: true);
+ throw new ReindexProcessingJobSoftException(message, errorObject, isCustomerCaused: true);
+ }
+ else
+ {
+ _logger.LogJobInformation(
+ jobInfo,
+ "Search parameters for resource type {ResourceType} successfully refreshed. Hash now matches: '{CurrentHash}'.",
+ jobDefinition.ResourceType,
+ currentResourceTypeHash);
+ }
}
}
private async Task GetResourcesToReindexAsync(SearchResultReindex searchResultReindex, CancellationToken cancellationToken)
{
+ string searchParameterHash = _reindexProcessingJobDefinition.ResourceTypeSearchParameterHashMap;
+ searchParameterHash ??= string.Empty;
+
var queryParametersList = new List>()
{
Tuple.Create(KnownQueryParameterNames.Type, _reindexProcessingJobDefinition.ResourceType),
@@ -122,30 +153,36 @@ private async Task GetResourcesToReindexAsync(SearchResultReindex
if (searchResultReindex != null)
{
- // Always use the StartResourceSurrogateId for the start of the range
- // and the ResourceCount.EndResourceSurrogateId for the end. The sql will determine
- // how many resources to actually return based on the configured maximumNumberOfResourcesPerQuery.
- // When this function returns, it knows what the next starting value to use in
- // searching for the next block of results and will use that as the queryStatus starting point
- queryParametersList.AddRange(new[]
+ // If we have SurrogateId range, we simply use those and ignore search parameter hash
+ // Otherwise, it's cosmos DB and we must use it and ensure we pass MaximumNumberOfResourcesPerQuery so we get expected count returned.
+ if (searchResultReindex.StartResourceSurrogateId > 0 && searchResultReindex.EndResourceSurrogateId > 0)
{
- // This EndResourceSurrogateId is only needed because of the way the sql is written. It is not accurate initially.
- Tuple.Create(KnownQueryParameterNames.EndSurrogateId, searchResultReindex.EndResourceSurrogateId.ToString()),
- Tuple.Create(KnownQueryParameterNames.StartSurrogateId, searchResultReindex.StartResourceSurrogateId.ToString()),
- Tuple.Create(KnownQueryParameterNames.GlobalEndSurrogateId, "0"),
- });
-
- queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.IgnoreSearchParamHash, "true"));
- }
+ queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.IgnoreSearchParamHash, "true"));
+
+ // Always use the StartResourceSurrogateId for the start of the range
+ // and the ResourceCount.EndResourceSurrogateId for the end. The sql will determine
+ // how many resources to actually return based on the configured maximumNumberOfResourcesPerQuery.
+ // When this function returns, it knows what the next starting value to use in
+ // searching for the next block of results and will use that as the queryStatus starting point
+ queryParametersList.AddRange(new[]
+ {
+ // This EndResourceSurrogateId is only needed because of the way the sql is written. It is not accurate initially.
+ Tuple.Create(KnownQueryParameterNames.StartSurrogateId, searchResultReindex.StartResourceSurrogateId.ToString()),
+ Tuple.Create(KnownQueryParameterNames.EndSurrogateId, searchResultReindex.EndResourceSurrogateId.ToString()),
+ Tuple.Create(KnownQueryParameterNames.GlobalEndSurrogateId, "0"),
+ });
+ }
+ else
+ {
+ queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.Count, _reindexProcessingJobDefinition.MaximumNumberOfResourcesPerQuery.ToString()));
+ }
- if (_reindexProcessingJobDefinition.ResourceCount.ContinuationToken != null)
- {
- queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.ContinuationToken, _reindexProcessingJobDefinition.ResourceCount.ContinuationToken));
+ if (searchResultReindex.ContinuationToken != null)
+ {
+ queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.ContinuationToken, searchResultReindex.ContinuationToken));
+ }
}
- string searchParameterHash = _reindexProcessingJobDefinition.ResourceTypeSearchParameterHashMap;
- searchParameterHash ??= string.Empty;
-
using (IScoped searchService = _searchServiceFactory())
{
try
@@ -168,7 +205,7 @@ private void LogReindexProcessingJobErrorMessage()
{
var ser = JsonConvert.SerializeObject(_reindexProcessingJobDefinition);
var result = JsonConvert.SerializeObject(_reindexProcessingJobResult);
- _logger.LogJobInformation(_jobInfo, $"ReindexProcessingJob Error: Current ReindexJobRecord: {ser}. ReindexProcessing Job Result: {result}.");
+ _logger.LogJobInformation(_jobInfo, "ReindexProcessingJob Error: Current ReindexJobRecord: {JobDefinition}. ReindexProcessing Job Result: {JobResult}.", ser, result);
}
private async Task ProcessQueryAsync(CancellationToken cancellationToken)
@@ -178,6 +215,11 @@ private async Task ProcessQueryAsync(CancellationToken cancellationToken)
throw new InvalidOperationException("_reindexProcessingJobDefinition cannot be null during processing.");
}
+ if (cancellationToken.IsCancellationRequested)
+ {
+ return;
+ }
+
long resourceCount = 0;
try
{
@@ -187,9 +229,9 @@ private async Task ProcessQueryAsync(CancellationToken cancellationToken)
throw new OperationFailedException($"Search service returned null search result.", HttpStatusCode.InternalServerError);
}
- resourceCount = result.TotalCount ?? 0;
- _reindexProcessingJobResult.SearchParameterUrls = _reindexProcessingJobDefinition.SearchParameterUrls;
+ resourceCount = result.TotalCount ?? result.Results?.Count() ?? 0;
_jobInfo.Data = resourceCount;
+ _reindexProcessingJobResult.SearchParameterUrls = _reindexProcessingJobDefinition.SearchParameterUrls;
if (resourceCount > _reindexProcessingJobDefinition.MaximumNumberOfResourcesPerQuery)
{
@@ -205,12 +247,52 @@ private async Task ProcessQueryAsync(CancellationToken cancellationToken)
{ _reindexProcessingJobDefinition.ResourceType, _reindexProcessingJobDefinition.ResourceTypeSearchParameterHashMap },
};
- await _timeoutRetries.ExecuteAsync(async () => await ProcessSearchResultsAsync(result, dictionary, (int)_reindexProcessingJobDefinition.MaximumNumberOfResourcesPerWrite, cancellationToken));
+ // Process results in a loop to handle continuation tokens
+ do
+ {
+ await _timeoutRetries.ExecuteAsync(async () => await ProcessSearchResultsAsync(result, dictionary, (int)_reindexProcessingJobDefinition.MaximumNumberOfResourcesPerWrite, cancellationToken));
+
+ resourceCount += result.TotalCount ?? result.Results?.Count() ?? 0;
+ _jobInfo.Data = resourceCount;
+ _reindexProcessingJobResult.SucceededResourceCount += (long)result?.Results.Count();
+ _jobInfo.Data = _reindexProcessingJobResult.SucceededResourceCount;
+ _logger.LogJobInformation(_jobInfo, "Reindex processing batch complete. Current number of resources indexed by this job: {Progress}.", _reindexProcessingJobResult.SucceededResourceCount);
+
+ // Check if there's a continuation token to fetch more results
+ if (!string.IsNullOrEmpty(result.ContinuationToken) && !cancellationToken.IsCancellationRequested)
+ {
+ _logger.LogJobInformation(_jobInfo, "Continuation token found. Fetching next batch of resources for reindexing.");
+
+ // Clear the previous continuation token first to avoid conflicts
+ _reindexProcessingJobDefinition.ResourceCount.ContinuationToken = null;
+
+ // Create a new SearchResultReindex with the continuation token for the next query
+ var nextSearchResultReindex = new SearchResultReindex(_reindexProcessingJobDefinition.ResourceCount.Count)
+ {
+ StartResourceSurrogateId = _reindexProcessingJobDefinition.ResourceCount.StartResourceSurrogateId,
+ EndResourceSurrogateId = _reindexProcessingJobDefinition.ResourceCount.EndResourceSurrogateId,
+ ContinuationToken = Convert.ToBase64String(Encoding.UTF8.GetBytes(result.ContinuationToken)),
+ };
+
+ // Fetch the next batch of results
+ result = await _timeoutRetries.ExecuteAsync(async () => await GetResourcesToReindexAsync(nextSearchResultReindex, cancellationToken));
+
+ if (result == null)
+ {
+ throw new OperationFailedException($"Search service returned null search result during continuation.", HttpStatusCode.InternalServerError);
+ }
+ }
+ else
+ {
+ // No more continuation token, exit the loop
+ result = null;
+ }
+ }
+ while (result != null && !cancellationToken.IsCancellationRequested);
if (!cancellationToken.IsCancellationRequested)
{
- _reindexProcessingJobResult.SucceededResourceCount += (long)result?.Results.Count();
- _logger.LogJobInformation(_jobInfo, "Reindex processing job complete. Current number of resources indexed by this job: {Progress}.", _reindexProcessingJobResult.SucceededResourceCount);
+ _logger.LogJobInformation(_jobInfo, "Reindex processing job complete. Total number of resources indexed by this job: {Progress}.", _reindexProcessingJobResult.SucceededResourceCount);
}
}
catch (SqlException sqlEx)
@@ -253,14 +335,14 @@ private async Task ProcessQueryAsync(CancellationToken cancellationToken)
_logger.LogJobError(ex, _jobInfo, "Reindex processing job error occurred. Is FhirException: 'true'.");
LogReindexProcessingJobErrorMessage();
_reindexProcessingJobResult.Error = ex.Message;
- _reindexProcessingJobResult.FailedResourceCount = resourceCount;
+ _reindexProcessingJobResult.FailedResourceCount = _reindexProcessingJobDefinition.ResourceCount.Count - _reindexProcessingJobResult.SucceededResourceCount;
}
catch (Exception ex)
{
_logger.LogJobError(ex, _jobInfo, "Reindex processing job error occurred. Is FhirException: 'false'.");
LogReindexProcessingJobErrorMessage();
_reindexProcessingJobResult.Error = ex.Message;
- _reindexProcessingJobResult.FailedResourceCount = resourceCount;
+ _reindexProcessingJobResult.FailedResourceCount = _reindexProcessingJobDefinition.ResourceCount.Count - _reindexProcessingJobResult.SucceededResourceCount;
}
}
diff --git a/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/ISearchParameterOperations.cs b/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/ISearchParameterOperations.cs
index 4c450db375..bb614bb229 100644
--- a/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/ISearchParameterOperations.cs
+++ b/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/ISearchParameterOperations.cs
@@ -24,8 +24,9 @@ public interface ISearchParameterOperations
/// It should also be called when a user starts a reindex job
///
/// Cancellation token
+ /// When true, forces a full refresh from database instead of incremental updates
/// A task.
- Task GetAndApplySearchParameterUpdates(CancellationToken cancellationToken);
+ Task GetAndApplySearchParameterUpdates(CancellationToken cancellationToken, bool forceFullRefresh = false);
string GetResourceTypeSearchParameterHashMap(string resourceType);
}
diff --git a/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/SearchParameterOperations.cs b/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/SearchParameterOperations.cs
index 196dbeb518..84af81b317 100644
--- a/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/SearchParameterOperations.cs
+++ b/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/SearchParameterOperations.cs
@@ -285,10 +285,22 @@ await SearchParameterConcurrencyManager.ExecuteWithLockAsync(
/// It should also be called when a user starts a reindex job
///
/// Cancellation token
+ /// When true, forces a full refresh from database instead of incremental updates
/// A task.
- public async Task GetAndApplySearchParameterUpdates(CancellationToken cancellationToken = default)
+ public async Task GetAndApplySearchParameterUpdates(CancellationToken cancellationToken = default, bool forceFullRefresh = false)
{
- var updatedSearchParameterStatus = await _searchParameterStatusManager.GetSearchParameterStatusUpdates(cancellationToken);
+ IReadOnlyCollection updatedSearchParameterStatus;
+
+ if (forceFullRefresh)
+ {
+ _logger.LogInformation("Performing full SearchParameter database refresh.");
+ updatedSearchParameterStatus = await _searchParameterStatusManager.GetAllSearchParameterStatus(cancellationToken);
+ _logger.LogInformation("Retrieved {Count} search parameters from database for full refresh.", updatedSearchParameterStatus.Count);
+ }
+ else
+ {
+ updatedSearchParameterStatus = await _searchParameterStatusManager.GetSearchParameterStatusUpdates(cancellationToken);
+ }
// First process any deletes or disables, then we will do any adds or updates
// this way any deleted or params which might have the same code or name as a new
@@ -303,12 +315,28 @@ public async Task GetAndApplySearchParameterUpdates(CancellationToken cancellati
_searchParameterDefinitionManager.UpdateSearchParameterStatus(searchParam.Uri.OriginalString, SearchParameterStatus.PendingDelete);
}
+ // Get all URLs that need to be fetched
+ var urlsToFetch = updatedSearchParameterStatus
+ .Where(p => p.Status == SearchParameterStatus.Enabled || p.Status == SearchParameterStatus.Supported)
+ .Select(p => p.Uri.OriginalString)
+ .ToList();
+
+ if (!urlsToFetch.Any())
+ {
+ // No parameters to add, but still apply status updates
+ await _searchParameterStatusManager.ApplySearchParameterStatus(
+ updatedSearchParameterStatus,
+ cancellationToken);
+ return;
+ }
+
+ // Batch fetch all SearchParameter resources in one call
+ var searchParamResources = await GetSearchParametersByUrls(urlsToFetch, cancellationToken);
+
var paramsToAdd = new List();
foreach (var searchParam in updatedSearchParameterStatus.Where(p => p.Status == SearchParameterStatus.Enabled || p.Status == SearchParameterStatus.Supported))
{
- var searchParamResource = await GetSearchParameterByUrl(searchParam.Uri.OriginalString, cancellationToken);
-
- if (searchParamResource == null)
+ if (!searchParamResources.TryGetValue(searchParam.Uri.OriginalString, out var searchParamResource))
{
_logger.LogInformation(
"Updated SearchParameter status found for SearchParameter: {Url}, but did not find any SearchParameter resources when querying for this url.",
@@ -332,7 +360,6 @@ public async Task GetAndApplySearchParameterUpdates(CancellationToken cancellati
}
// Once added to the definition manager we can update their status
-
await _searchParameterStatusManager.ApplySearchParameterStatus(
updatedSearchParameterStatus,
cancellationToken);
@@ -371,5 +398,53 @@ private async Task GetSearchParameterByUrl(string url, Cancellati
return null;
}
+
+ private async Task> GetSearchParametersByUrls(List urls, CancellationToken cancellationToken)
+ {
+ if (!urls.Any())
+ {
+ return new Dictionary();
+ }
+
+ const int chunkSize = 1500;
+ var searchParametersByUrl = new Dictionary();
+
+ // Process URLs in chunks to avoid SQL query limitations
+ for (int i = 0; i < urls.Count; i += chunkSize)
+ {
+ var urlChunk = urls.Skip(i).Take(chunkSize).ToList();
+
+ using IScoped search = _searchServiceFactory.Invoke();
+
+ // Build a query like: url=url1,url2,url3
+ var urlQueryValue = string.Join(",", urlChunk);
+ var queryParams = new List>
+ {
+ new Tuple("url", urlQueryValue),
+ };
+
+ var result = await search.Value.SearchAsync(KnownResourceTypes.SearchParameter, queryParams, cancellationToken);
+
+ foreach (var searchResultEntry in result.Results)
+ {
+ var typedElement = searchResultEntry.Resource.RawResource.ToITypedElement(_modelInfoProvider);
+ var url = typedElement.GetStringScalar("url");
+
+ if (!string.IsNullOrEmpty(url))
+ {
+ if (!searchParametersByUrl.ContainsKey(url))
+ {
+ searchParametersByUrl[url] = typedElement;
+ }
+ else
+ {
+ _logger.LogWarning("More than one SearchParameter found with url {Url}. Using the first one found.", url);
+ }
+ }
+ }
+ }
+
+ return searchParametersByUrl;
+ }
}
}
diff --git a/src/Microsoft.Health.Fhir.Core/Features/Search/Registry/SearchParameterCacheRefreshBackgroundService.cs b/src/Microsoft.Health.Fhir.Core/Features/Search/Registry/SearchParameterCacheRefreshBackgroundService.cs
index 403f9adcb7..8e501e14a1 100644
--- a/src/Microsoft.Health.Fhir.Core/Features/Search/Registry/SearchParameterCacheRefreshBackgroundService.cs
+++ b/src/Microsoft.Health.Fhir.Core/Features/Search/Registry/SearchParameterCacheRefreshBackgroundService.cs
@@ -4,6 +4,7 @@
// -------------------------------------------------------------------------------------------------
using System;
+using System.Security.Cryptography;
using System.Threading;
using System.Threading.Tasks;
using EnsureThat;
@@ -32,6 +33,7 @@ public class SearchParameterCacheRefreshBackgroundService : BackgroundService, I
private readonly SemaphoreSlim _refreshSemaphore;
private bool _isInitialized;
private CancellationToken _stoppingToken;
+ private DateTime _lastForceRefreshTime;
public SearchParameterCacheRefreshBackgroundService(
ISearchParameterStatusManager searchParameterStatusManager,
@@ -55,6 +57,12 @@ public SearchParameterCacheRefreshBackgroundService(
// Create semaphore to prevent concurrent refresh operations (max 1 concurrent operation)
_refreshSemaphore = new SemaphoreSlim(1, 1);
+
+ // Initialize last force refresh time to now with random offset (0-5 minutes) to stagger force refreshes across instances
+ // We use UtcNow instead of MinValue because SearchParameters are already loaded during initialization,
+ // so there's no need to do a full refresh on first timer execution
+ var randomOffsetMinutes = RandomNumberGenerator.GetInt32(0, 6); // 0-5 minutes
+ _lastForceRefreshTime = DateTime.UtcNow.AddMinutes(randomOffsetMinutes);
}
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
@@ -116,34 +124,67 @@ private async void OnRefreshTimer(object state)
try
{
- _logger.LogDebug("Starting SearchParameter cache freshness check.");
-
- // First check if cache is stale using efficient database query
- bool cacheIsStale = await _searchParameterStatusManager.EnsureCacheFreshnessAsync(_stoppingToken);
+ var timeSinceLastForceRefresh = DateTime.UtcNow - _lastForceRefreshTime;
+ var shouldForceRefresh = timeSinceLastForceRefresh >= TimeSpan.FromHours(1);
- // Check again if shutdown was requested after the async call
- if (_stoppingToken.IsCancellationRequested)
+ if (shouldForceRefresh)
{
- _logger.LogDebug("SearchParameter cache refresh was cancelled during freshness check.");
- return;
- }
+ _logger.LogInformation("Performing full SearchParameter database refresh (last force refresh: {TimeSinceLastRefresh} ago).", timeSinceLastForceRefresh);
- if (cacheIsStale)
- {
- _logger.LogInformation("SearchParameter cache is stale. Performing full SearchParameter synchronization.");
+ // Get ALL search parameters from database to ensure complete synchronization
+ var allSearchParameterStatus = await _searchParameterStatusManager.GetAllSearchParameterStatus(_stoppingToken);
+
+ // Check if shutdown was requested after the async call
+ if (_stoppingToken.IsCancellationRequested)
+ {
+ _logger.LogDebug("SearchParameter cache refresh was cancelled during database fetch.");
+ return;
+ }
+
+ _logger.LogInformation("Retrieved {Count} search parameters from database for full refresh.", allSearchParameterStatus.Count);
- // Cache is stale - perform full SearchParameter lifecycle management
- await _searchParameterOperations.GetAndApplySearchParameterUpdates(_stoppingToken);
+ // Apply all search parameters (this will recalculate the hash)
+ await _searchParameterStatusManager.ApplySearchParameterStatus(allSearchParameterStatus, _stoppingToken);
+
+ _lastForceRefreshTime = DateTime.UtcNow;
// Check one more time if shutdown was requested after the async call
if (!_stoppingToken.IsCancellationRequested)
{
- _logger.LogInformation("SearchParameter cache refresh completed successfully.");
+ _logger.LogInformation("SearchParameter full database refresh completed successfully.");
}
}
else
{
- _logger.LogDebug("SearchParameter cache is up to date. No refresh needed.");
+ _logger.LogDebug("Starting SearchParameter cache freshness check (last force refresh: {TimeSinceLastRefresh} ago).", timeSinceLastForceRefresh);
+
+ // Check if cache is stale using efficient database query
+ bool cacheIsStale = await _searchParameterStatusManager.EnsureCacheFreshnessAsync(_stoppingToken);
+
+ // Check again if shutdown was requested after the async call
+ if (_stoppingToken.IsCancellationRequested)
+ {
+ _logger.LogDebug("SearchParameter cache refresh was cancelled during freshness check.");
+ return;
+ }
+
+ if (cacheIsStale)
+ {
+ _logger.LogInformation("SearchParameter cache is stale. Performing incremental SearchParameter synchronization.");
+
+ // Cache is stale - perform incremental SearchParameter lifecycle management
+ await _searchParameterOperations.GetAndApplySearchParameterUpdates(_stoppingToken);
+
+ // Check one more time if shutdown was requested after the async call
+ if (!_stoppingToken.IsCancellationRequested)
+ {
+ _logger.LogInformation("SearchParameter incremental refresh completed successfully.");
+ }
+ }
+ else
+ {
+ _logger.LogDebug("SearchParameter cache is up to date. No refresh needed.");
+ }
}
}
catch (OperationCanceledException)
@@ -176,8 +217,25 @@ public async Task Handle(SearchParametersInitializedNotification notification, C
// Only start the timer if the service hasn't been cancelled
if (!_stoppingToken.IsCancellationRequested)
{
- // Start the timer now that search parameters are initialized
- _refreshTimer.Change(TimeSpan.Zero, _refreshInterval);
+ // Add random initial delay to stagger first refresh across instances
+ // This prevents thundering herd problem when multiple pods start simultaneously
+ var maxInitialDelaySeconds = Math.Max(0, _coreFeatureConfiguration.Value.SearchParameterCacheRefreshMaxInitialDelaySeconds);
+ var randomInitialDelaySeconds = maxInitialDelaySeconds > 0
+ ? RandomNumberGenerator.GetInt32(0, maxInitialDelaySeconds + 1)
+ : 0;
+ var initialDelay = TimeSpan.FromSeconds(randomInitialDelaySeconds);
+
+ if (randomInitialDelaySeconds > 0)
+ {
+ _logger.LogInformation("Starting cache refresh timer with {InitialDelay} initial delay to stagger instance startup.", initialDelay);
+ }
+ else
+ {
+ _logger.LogInformation("Starting cache refresh timer immediately (no initial delay configured).");
+ }
+
+ // Start the timer with random initial delay, then use regular refresh interval
+ _refreshTimer.Change(initialDelay, _refreshInterval);
}
await Task.CompletedTask;
diff --git a/src/Microsoft.Health.Fhir.Core/Features/Search/SearchResultReindex.cs b/src/Microsoft.Health.Fhir.Core/Features/Search/SearchResultReindex.cs
index 97ce29bbf0..8a901bbba6 100644
--- a/src/Microsoft.Health.Fhir.Core/Features/Search/SearchResultReindex.cs
+++ b/src/Microsoft.Health.Fhir.Core/Features/Search/SearchResultReindex.cs
@@ -39,7 +39,6 @@ public SearchResultReindex(long count)
///
/// The continuation token for the search results
///
- [JsonIgnore]
public string ContinuationToken { get; set; }
///
diff --git a/src/Microsoft.Health.Fhir.CosmosDb/Features/Search/Queries/QueryBuilder.cs b/src/Microsoft.Health.Fhir.CosmosDb/Features/Search/Queries/QueryBuilder.cs
index 07d7b97359..064f9da5cc 100644
--- a/src/Microsoft.Health.Fhir.CosmosDb/Features/Search/Queries/QueryBuilder.cs
+++ b/src/Microsoft.Health.Fhir.CosmosDb/Features/Search/Queries/QueryBuilder.cs
@@ -162,6 +162,11 @@ public QueryDefinition GenerateReindexSql(SearchOptions searchOptions, string se
searchOptions.Expression.AcceptVisitor(expressionQueryBuilder);
}
+ AppendFilterCondition(
+ "AND",
+ true,
+ (KnownResourceWrapperProperties.IsHistory, false));
+
AppendFilterCondition(
"AND",
true,
diff --git a/src/Microsoft.Health.Fhir.CosmosDb/Features/Storage/Queues/CosmosQueueClient.cs b/src/Microsoft.Health.Fhir.CosmosDb/Features/Storage/Queues/CosmosQueueClient.cs
index dbcc053328..15d96ea18a 100644
--- a/src/Microsoft.Health.Fhir.CosmosDb/Features/Storage/Queues/CosmosQueueClient.cs
+++ b/src/Microsoft.Health.Fhir.CosmosDb/Features/Storage/Queues/CosmosQueueClient.cs
@@ -63,6 +63,29 @@ public async Task> EnqueueAsync(
{
EnsureArg.IsNotNull(definitions, nameof(definitions));
+ // Check if the orchestrator job (where JobId == GroupId) has been cancelled
+ if (groupId.HasValue)
+ {
+ QueryDefinition orchestratorJobSpec = new QueryDefinition(@"SELECT VALUE c FROM root c
+ JOIN d in c.definitions
+ WHERE c.queueType = @queueType
+ AND c.groupId = @groupId
+ AND d.jobId = @groupId
+ AND d.cancelRequested = true")
+ .WithParameter("@queueType", queueType)
+ .WithParameter("@groupId", groupId.Value.ToString());
+
+ IReadOnlyList cancelledOrchestratorJobs = await ExecuteQueryAsync(orchestratorJobSpec, 1, queueType, cancellationToken);
+
+ if (cancelledOrchestratorJobs.Any())
+ {
+ _logger.LogWarning(
+ "Attempting to enqueue jobs for group {GroupId} but orchestrator job has been cancelled.",
+ groupId.Value);
+ throw new OperationCanceledException($"Job group {groupId.Value} has been cancelled.");
+ }
+ }
+
var id = GetLongId();
var jobInfos = new List();
diff --git a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/ConvertData/ConvertDataRequestHandlerTests.cs b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/ConvertData/ConvertDataRequestHandlerTests.cs
index 5fac204917..d80b16da17 100644
--- a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/ConvertData/ConvertDataRequestHandlerTests.cs
+++ b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/ConvertData/ConvertDataRequestHandlerTests.cs
@@ -10,6 +10,7 @@
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using Microsoft.Health.Core.Features.Security.Authorization;
+using Microsoft.Health.Extensions.Xunit;
using Microsoft.Health.Fhir.Core.Configs;
using Microsoft.Health.Fhir.Core.Features.Operations.ConvertData;
using Microsoft.Health.Fhir.Core.Features.Security;
@@ -29,7 +30,7 @@ namespace Microsoft.Health.Fhir.Shared.Core.UnitTests.Features.Operations.Conver
[Trait(Traits.Category, Categories.Operations)]
public class ConvertDataRequestHandlerTests
{
- [Fact]
+ [RetryFact(MaxRetries = 3, DelayBetweenRetriesMs = 5000)]
public async Task GivenAHl7v2ConvertRequest_WhenConvertData_CorrectResponseShouldReturn()
{
var convertDataRequestHandler = GetRequestHandler();
diff --git a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Import/ImportResourceParserTests.cs b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Import/ImportResourceParserTests.cs
index 403a087a5e..4b480d5c20 100644
--- a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Import/ImportResourceParserTests.cs
+++ b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Import/ImportResourceParserTests.cs
@@ -8,6 +8,7 @@
using Hl7.Fhir.Serialization;
using Microsoft.Health.Core.Features.Context;
using Microsoft.Health.Core.Features.Security;
+using Microsoft.Health.Extensions.Xunit;
using Microsoft.Health.Fhir.Core.Features.Compartment;
using Microsoft.Health.Fhir.Core.Features.Context;
using Microsoft.Health.Fhir.Core.Features.Definition;
@@ -54,7 +55,7 @@ public ImportResourceParserTests()
_importResourceParser = new(_jsonParser, _wrapperFactory);
}
- [Fact]
+ [RetryFact(MaxRetries = 3, DelayBetweenRetriesMs = 5000)]
public void GivenImportWithSoftDeletedFile_WhenParsed_DeletedExtensionShouldBeRemoved()
{
Patient patient = new Patient();
diff --git a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexOrchestratorJobTests.cs b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexOrchestratorJobTests.cs
index a01e2b04ee..1430bad5e7 100644
--- a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexOrchestratorJobTests.cs
+++ b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexOrchestratorJobTests.cs
@@ -1,4 +1,4 @@
-// -------------------------------------------------------------------------------------------------
+// -------------------------------------------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
// -------------------------------------------------------------------------------------------------
@@ -8,10 +8,12 @@
using System.Collections.ObjectModel;
using System.Linq;
using System.Threading;
-using MediatR;
+using System.Threading.Tasks;
+using Hl7.Fhir.Rest;
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using Microsoft.Health.Fhir.Core.Configs;
+using Microsoft.Health.Fhir.Core.Features;
using Microsoft.Health.Fhir.Core.Features.Definition;
using Microsoft.Health.Fhir.Core.Features.Operations;
using Microsoft.Health.Fhir.Core.Features.Operations.Reindex;
@@ -23,132 +25,315 @@
using Microsoft.Health.Fhir.Core.Models;
using Microsoft.Health.Fhir.Core.Registration;
using Microsoft.Health.Fhir.Core.UnitTests.Extensions;
-using Microsoft.Health.Fhir.Core.UnitTests.Features.Search;
using Microsoft.Health.Fhir.Tests.Common;
-using Microsoft.Health.Fhir.Tests.Common.FixtureParameters;
using Microsoft.Health.JobManagement;
using Microsoft.Health.JobManagement.UnitTests;
using Microsoft.Health.Test.Utilities;
using Newtonsoft.Json;
using NSubstitute;
using Xunit;
-using Task = System.Threading.Tasks.Task;
namespace Microsoft.Health.Fhir.Core.UnitTests.Features.Operations.Reindex
{
[CollectionDefinition("ReindexOrchestratorJobTests")]
[Trait(Traits.OwningTeam, OwningTeam.Fhir)]
[Trait(Traits.Category, Categories.IndexAndReindex)]
- public class ReindexOrchestratorJobTests : IClassFixture
+ public class ReindexOrchestratorJobTests
{
- private const int _mockedSearchCount = 5;
-
private readonly ISearchService _searchService = Substitute.For();
private readonly CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource();
- private readonly IMediator _mediator = Substitute.For();
- private ISearchParameterStatusManager _searchParameterStatusmanager;
- private Func _reindexOrchestratorJobTaskFactory;
- private Func _reindexProcessingJobTaskFactory;
+ private readonly ISearchParameterStatusManager _searchParameterStatusManager;
+ private readonly ISearchParameterDefinitionManager _searchDefinitionManager;
private readonly ISearchParameterOperations _searchParameterOperations;
- private readonly IQueueClient _queueClient = new TestQueueClient();
-
- private ISearchParameterDefinitionManager _searchDefinitionManager;
- private CancellationToken _cancellationToken;
+ private IQueueClient _queueClient;
+ private readonly CancellationToken _cancellationToken;
public ReindexOrchestratorJobTests()
{
+ // Configure cancellation token source with 1 minute timeout
+ _cancellationTokenSource.CancelAfter(TimeSpan.FromMinutes(1));
_cancellationToken = _cancellationTokenSource.Token;
- _searchDefinitionManager = Substitute.For();
- _searchParameterStatusmanager = Substitute.For();
+ _searchDefinitionManager = Substitute.For();
+ _searchParameterStatusManager = Substitute.For();
_searchParameterOperations = Substitute.For();
- var job = CreateReindexJobRecord();
- List searchParameterInfos = new List()
+ // Initialize a fresh queue client for each test
+ _queueClient = new TestQueueClient();
+ }
+
+ private void Dispose()
+ {
+ // Clean up the queue client if it's a TestQueueClient
+ if (_queueClient is TestQueueClient testQueueClient)
{
- new SearchParameterInfo(
- "Account",
- "status",
- ValueSets.SearchParamType.Token,
- url: new Uri("http://hl7.org/fhir/SearchParameter/Account-status"),
- baseResourceTypes: new List()
- {
- "Account",
- "_count",
- "_type",
- })
- {
- IsSearchable = true,
- SearchParameterStatus = SearchParameterStatus.Enabled,
- },
- };
- ReadOnlyCollection status = new ReadOnlyCollection(new List()
+ testQueueClient.ClearJobs();
+ }
+
+ _cancellationTokenSource?.Dispose();
+ }
+
+ private ReindexOrchestratorJob CreateReindexOrchestratorJob(
+ IFhirRuntimeConfiguration runtimeConfig = null,
+ int searchParameterCacheRefreshIntervalSeconds = 1,
+ int reindexDelayMultiplier = 1)
+ {
+ runtimeConfig ??= new AzureHealthDataServicesRuntimeConfiguration();
+
+ var coreFeatureConfig = Substitute.For>();
+ coreFeatureConfig.Value.Returns(new CoreFeatureConfiguration
+ {
+ SearchParameterCacheRefreshIntervalSeconds = searchParameterCacheRefreshIntervalSeconds,
+ });
+
+ var operationsConfig = Substitute.For>();
+ operationsConfig.Value.Returns(new OperationsConfiguration
{
- new ResourceSearchParameterStatus()
+ Reindex = new ReindexJobConfiguration
{
- LastUpdated = DateTime.UtcNow,
- Uri = new Uri("http://hl7.org/fhir/SearchParameter/Account-status"),
- Status = SearchParameterStatus.Enabled,
+ ReindexDelayMultiplier = reindexDelayMultiplier,
},
});
- // Fix: Properly set up the mock with explicit CancellationToken argument matching
- _searchParameterStatusmanager
- .GetAllSearchParameterStatus(_cancellationToken)
- .Returns(status);
+ return new ReindexOrchestratorJob(
+ _queueClient,
+ () => _searchService.CreateMockScope(),
+ _searchDefinitionManager,
+ ModelInfoProvider.Instance,
+ _searchParameterStatusManager,
+ _searchParameterOperations,
+ runtimeConfig,
+ NullLoggerFactory.Instance,
+ coreFeatureConfig,
+ operationsConfig);
+ }
+
+ private async Task CreateReindexJobRecord(
+ uint maxResourcePerQuery = 100,
+ IReadOnlyDictionary paramHashMap = null,
+ List targetResourceTypes = null)
+ {
+ paramHashMap ??= new Dictionary { { "Patient", "patientHash" } };
+ targetResourceTypes ??= new List();
+
+ var jobRecord = new ReindexJobRecord(
+ paramHashMap,
+ targetResourceTypes,
+ new List(),
+ new List(),
+ maxResourcePerQuery);
+
+ // Enqueue the orchestrator job through the queue client
+ var orchestratorJobs = await _queueClient.EnqueueAsync(
+ (byte)QueueType.Reindex,
+ new[] { JsonConvert.SerializeObject(jobRecord) },
+ groupId: 0,
+ forceOneActiveJobGroup: false,
+ _cancellationToken);
+
+ // Return the enqueued job with Running status
+ var jobInfo = orchestratorJobs.First();
+ jobInfo.Status = JobStatus.Running;
+
+ return jobInfo;
+ }
+
+ private SearchResult CreateSearchResult(
+ int resourceCount = 1,
+ string continuationToken = null,
+ long startSurrogateId = 1,
+ long endSurrogateId = 1000,
+ string resourceType = "Patient")
+ {
+ var resultList = new List();
- _searchDefinitionManager.AllSearchParameters.Returns(searchParameterInfos);
+ for (var i = 0; i < resourceCount; i++)
+ {
+ var wrapper = Substitute.For();
+ var propertyInfo = wrapper.GetType().GetProperty("ResourceTypeName");
+ propertyInfo?.SetValue(wrapper, resourceType);
- IFhirRuntimeConfiguration fhirRuntimeConfiguration = new AzureHealthDataServicesRuntimeConfiguration();
+ var entry = new SearchResultEntry(wrapper);
+ resultList.Add(entry);
+ }
- _reindexOrchestratorJobTaskFactory = () =>
+ var searchResult = new SearchResult(resultList, continuationToken, null, new List>());
+ searchResult.MaxResourceSurrogateId = endSurrogateId;
+ searchResult.TotalCount = resourceCount;
+ searchResult.ReindexResult = new SearchResultReindex
{
- // Create a mock CoreFeatureConfiguration for the test
- var coreFeatureConfig = Substitute.For>();
- coreFeatureConfig.Value.Returns(new CoreFeatureConfiguration
- {
- SearchParameterCacheRefreshIntervalSeconds = 1, // Use a short interval for tests
- });
+ Count = resourceCount,
+ StartResourceSurrogateId = startSurrogateId,
+ EndResourceSurrogateId = endSurrogateId,
+ };
+ return searchResult;
+ }
- // Create a mock OperationsConfiguration for the test
- var operationsConfig = Substitute.For>();
- operationsConfig.Value.Returns(new OperationsConfiguration
- {
- Reindex = new ReindexJobConfiguration
- {
- ReindexDelayMultiplier = 1, // Use a short multiplier for tests
- },
- });
+ private SearchParameterInfo CreateSearchParameterInfo(
+ string url = "http://hl7.org/fhir/SearchParameter/Patient-name",
+ List baseResourceTypes = null,
+ string resourceType = "Patient")
+ {
+ baseResourceTypes ??= new List { resourceType };
- return new ReindexOrchestratorJob(
- _queueClient,
- () => _searchService.CreateMockScope(),
- _searchDefinitionManager,
- ModelInfoProvider.Instance,
- _searchParameterStatusmanager,
- _searchParameterOperations,
- fhirRuntimeConfiguration,
- NullLoggerFactory.Instance,
- coreFeatureConfig,
- operationsConfig);
+ return new SearchParameterInfo(
+ resourceType,
+ "name",
+ ValueSets.SearchParamType.String,
+ url: new Uri(url),
+ baseResourceTypes: baseResourceTypes)
+ {
+ IsSearchable = true,
+ SearchParameterStatus = SearchParameterStatus.Supported,
};
}
- [Theory(Skip = "Causing random timeouts")]
- [InlineData(DataStore.SqlServer)]
- [InlineData(DataStore.CosmosDb)]
- public async Task GivenSupportedParams_WhenExecuted_ThenCorrectSearchIsPerformed(DataStore dataStore)
+ private async Task> WaitForJobsAsync(long groupId, TimeSpan timeout, int expectedMinimumJobs = 1)
+ {
+ var startTime = DateTime.UtcNow;
+ var pollInterval = TimeSpan.FromMilliseconds(100);
+
+ while (DateTime.UtcNow - startTime < timeout)
+ {
+ var jobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, groupId, true, _cancellationToken);
+ var processingJobs = jobs.Where(j => j.Id != groupId).ToList();
+
+ if (processingJobs.Count >= expectedMinimumJobs)
+ {
+ return processingJobs;
+ }
+
+ await Task.Delay(pollInterval, _cancellationToken);
+ }
+
+ // Return whatever jobs we have after timeout
+ var finalJobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, groupId, true, _cancellationToken);
+ return finalJobs.Where(j => j.Id != groupId).ToList();
+ }
+
+ [Fact]
+ public async Task ExecuteAsync_WhenCancellationRequested_ReturnsJobCancelledResult()
{
- const int maxNumberOfResourcesPerQuery = 100;
- const int startResourceSurrogateId = 1;
- const int endResourceSurrogateId = 1000;
- const string resourceTypeHash = "accountHash";
+ // Arrange
+ var cancellationTokenSource = new CancellationTokenSource();
+ cancellationTokenSource.CancelAfter(1); // Cancel after short delay
- _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(resourceTypeHash);
+ var jobInfo = await CreateReindexJobRecord();
+ var orchestrator = CreateReindexOrchestratorJob();
- IFhirRuntimeConfiguration fhirRuntimeConfiguration = dataStore == DataStore.SqlServer ?
- new AzureHealthDataServicesRuntimeConfiguration() :
- new AzureApiForFhirRuntimeConfiguration();
+ // Act
+ var result = await orchestrator.ExecuteAsync(jobInfo, cancellationTokenSource.Token);
+ var jobResult = JsonConvert.DeserializeObject(result);
+
+ // Assert
+ Assert.NotNull(jobResult);
+ Assert.NotNull(jobResult.Error);
+ Assert.Contains(jobResult.Error, e => e.Diagnostics.Contains("cancelled"));
+ }
+
+ [Fact]
+ public async Task ExecuteAsync_WithExceptionInProcessing_ReturnsErrorResult()
+ {
+ // Arrange
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(Task.FromException>(
+ new InvalidOperationException("Database error")));
+
+ var jobInfo = await CreateReindexJobRecord();
+ var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0);
+
+ // Act
+ var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken);
+ var jobResult = JsonConvert.DeserializeObject(result);
+
+ // Assert
+ Assert.NotNull(jobResult);
+ Assert.NotNull(jobResult.Error);
+ Assert.Contains(jobResult.Error, e => e.Severity == OperationOutcomeConstants.IssueSeverity.Error);
+ }
+
+ [Fact]
+ public async Task AddErrorResult_AddsErrorToExistingErrors()
+ {
+ // Arrange
+ var initialError = new OperationOutcomeIssue(
+ OperationOutcomeConstants.IssueSeverity.Information,
+ OperationOutcomeConstants.IssueType.Informational,
+ "Initial error");
+
+ var emptyStatus = new ReadOnlyCollection(
+ new List());
+
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(emptyStatus);
+
+ _searchDefinitionManager.AllSearchParameters
+ .Returns(new List());
+
+ var jobInfo = await CreateReindexJobRecord();
+
+ jobInfo.Result = JsonConvert.SerializeObject(new ReindexOrchestratorJobResult
+ {
+ Error = new List { initialError },
+ });
+
+ var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0);
+
+ // Act
+ var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken);
+ var jobResult = JsonConvert.DeserializeObject(result);
+
+ // Assert
+ Assert.NotNull(jobResult);
+ Assert.True(jobResult.Error.Count >= 2, "Should have at least 2 errors (initial + new)");
+ }
+
+ [Fact]
+ public async Task GetDerivedResourceTypes_WithResourceBaseType_ReturnsAllResourceTypes()
+ {
+ // Arrange - Create a search parameter that applies to the base Resource type
+ var searchParam = CreateSearchParameterInfo(
+ url: "http://hl7.org/fhir/SearchParameter/Resource-id",
+ baseResourceTypes: new List { KnownResourceTypes.Resource },
+ resourceType: KnownResourceTypes.Resource);
+
+ var searchParamStatus = new ResourceSearchParameterStatus
+ {
+ LastUpdated = DateTime.UtcNow,
+ Uri = new Uri(searchParam.Url.OriginalString),
+ Status = SearchParameterStatus.Supported,
+ };
+
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(new List { searchParamStatus });
+
+ _searchDefinitionManager.AllSearchParameters
+ .Returns(new List { searchParam });
+
+ _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any())
+ .Returns("hash");
+
+ // Get all resource types from the model info provider
+ var allResourceTypes = ModelInfoProvider.Instance.GetResourceTypeNames().ToList();
+
+ // Set up search results to return data for all resource types
+ _searchService.SearchForReindexAsync(
+ Arg.Any>>(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any())
+ .Returns(x =>
+ {
+ // Parse the resource type from the query parameters
+ var queryParams = x.ArgAt>>(0);
+ var resourceTypeParam = queryParams.FirstOrDefault(p => p.Item1 == KnownQueryParameterNames.Type);
+ var resourceType = resourceTypeParam?.Item2 ?? "Patient";
+
+ return CreateSearchResult(
+ resourceCount: 10,
+ resourceType: resourceType);
+ });
_searchService.GetSurrogateIdRanges(
Arg.Any(),
@@ -158,262 +343,1692 @@ public async Task GivenSupportedParams_WhenExecuted_ThenCorrectSearchIsPerformed
Arg.Any(),
Arg.Any(),
Arg.Any(),
- Arg.Any())
- .Returns(new List<(long, long)>
- {
- (startResourceSurrogateId, endResourceSurrogateId),
- });
+ Arg.Any())
+ .Returns(Task.FromResult>(
+ new List<(long StartId, long EndId)> { (1, 10) }));
- // Get one search parameter and configure it such that it needs to be reindexed
- var param = new SearchParameterInfo(
- "Account",
- "status",
- ValueSets.SearchParamType.Token,
- url: new Uri("http://hl7.org/fhir/SearchParameter/Account-status"),
- baseResourceTypes: new List() { "Account" }) // Fix: Only include valid resource types
- {
- IsSearchable = true,
- SearchParameterStatus = SearchParameterStatus.Supported, // This parameter needs reindexing
- };
+ var jobInfo = await CreateReindexJobRecord();
+ var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0);
- // Create search parameter status that indicates it needs reindexing
- ReadOnlyCollection status = new ReadOnlyCollection(new List()
- {
- new ResourceSearchParameterStatus()
- {
- LastUpdated = DateTime.UtcNow,
- Uri = new Uri("http://hl7.org/fhir/SearchParameter/Account-status"),
- Status = SearchParameterStatus.Supported,
- },
- });
- var searchParameterStatusmanager = Substitute.For();
- searchParameterStatusmanager.GetAllSearchParameterStatus(Arg.Any()).Returns(status);
+ // Act
+ _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken);
- // Set up the search definition manager properly
- var searchDefinitionManager = Substitute.For();
- searchDefinitionManager.AllSearchParameters.Returns(new List { param });
+ // Wait for processing jobs to be created - expect many jobs since all resource types should be included
+ var processingJobs = await WaitForJobsAsync(
+ jobInfo.GroupId,
+ TimeSpan.FromSeconds(60),
+ expectedMinimumJobs: Math.Min(10, allResourceTypes.Count)); // Expect at least 10 different resource types
- // Mock GetSearchParameters to return the Account-status parameter for "Account" resource type
- searchDefinitionManager.GetSearchParameters("Account").Returns(new List { param });
+ // Assert
+ Assert.True(processingJobs.Count > 0, "Processing jobs should have been created");
- // Mock GetSearchParameter to return the specific parameter by URL
- searchDefinitionManager.GetSearchParameter("http://hl7.org/fhir/SearchParameter/Account-status").Returns(param);
+ // Get unique resource types from the processing jobs
+ var resourceTypesInJobs = processingJobs
+ .Select(j => JsonConvert.DeserializeObject(j.Definition))
+ .Select(def => def.ResourceType)
+ .Distinct()
+ .ToList();
- // Create mock dependencies for ReindexProcessingJob
- var fhirDataStore = Substitute.For();
- var resourceWrapperFactory = Substitute.For();
+ // Verify that a significant number of resource types were included
+ // Since Resource is the base type, we should have jobs for MANY resource types
+ Assert.True(
+ resourceTypesInJobs.Count >= 10,
+ $"Should have jobs for at least 10 different resource types when base type is Resource. Found {resourceTypesInJobs.Count}: {string.Join(", ", resourceTypesInJobs)}");
- // Set up the factory for ReindexProcessingJob
- _reindexProcessingJobTaskFactory = () => new ReindexProcessingJob(
- () => _searchService.CreateMockScope(),
- () => fhirDataStore.CreateMockScope(),
- resourceWrapperFactory,
- _searchParameterOperations,
- NullLogger.Instance);
+ // Verify that both DomainResource types AND non-DomainResource types are included
+ Assert.Contains("Patient", resourceTypesInJobs); // DomainResource descendant
+ Assert.Contains("Observation", resourceTypesInJobs); // DomainResource descendant
+
+ // These should be included since Resource is the base type (unlike DomainResource which excludes them)
+ var hasAnyNonDomainResource = resourceTypesInJobs.Any(rt =>
+ rt == "Binary" || rt == "Bundle" || rt == "Parameters");
+
+ Assert.True(
+ hasAnyNonDomainResource,
+ $"When base type is Resource, should include non-DomainResource types (Binary, Bundle, Parameters). Found: {string.Join(", ", resourceTypesInJobs)}");
+
+ // Log the actual resource types found
+ var expectedResourceTypes = allResourceTypes.Count;
+ var foundResourceTypes = resourceTypesInJobs.Count;
+ }
- var reindexOrchestratorJobTaskFactory = () =>
+ [Fact]
+ public async Task GetDerivedResourceTypes_WithDomainResourceBaseType_ReturnsApplicableResourceTypes()
+ {
+ // Arrange - Create a search parameter that applies to DomainResource
+ var searchParam = CreateSearchParameterInfo(
+ url: "http://hl7.org/fhir/SearchParameter/DomainResource-text",
+ baseResourceTypes: new List { KnownResourceTypes.DomainResource },
+ resourceType: KnownResourceTypes.DomainResource);
+
+ var searchParamStatus = new ResourceSearchParameterStatus
{
- // Create a mock CoreFeatureConfiguration for the test
- var coreFeatureConfig = Substitute.For>();
- coreFeatureConfig.Value.Returns(new CoreFeatureConfiguration
- {
- SearchParameterCacheRefreshIntervalSeconds = 1, // Use a short interval for tests
- });
+ LastUpdated = DateTime.UtcNow,
+ Uri = new Uri(searchParam.Url.OriginalString),
+ Status = SearchParameterStatus.Supported,
+ };
+
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(new List { searchParamStatus });
+
+ _searchDefinitionManager.AllSearchParameters
+ .Returns(new List { searchParam });
+
+ // Mock SearchParameterHashMap for RefreshSearchParameterCache
+ var paramHashMap = new Dictionary { { "Patient", "patientHash" } };
+ _searchDefinitionManager.SearchParameterHashMap
+ .Returns(paramHashMap);
- // Create a mock OperationsConfiguration for the test
- var operationsConfig = Substitute.For>();
- operationsConfig.Value.Returns(new OperationsConfiguration
+ _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any())
+ .Returns("hash");
+
+ // Set up search results for various resource types
+ _searchService.SearchForReindexAsync(
+ Arg.Any>>(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any())
+ .Returns(x =>
{
- Reindex = new ReindexJobConfiguration
- {
- ReindexDelayMultiplier = 1, // Use a short multiplier for tests
- },
+ // Parse the resource type from the query parameters
+ var queryParams = x.ArgAt>>(0);
+ var resourceTypeParam = queryParams.FirstOrDefault(p => p.Item1 == KnownQueryParameterNames.Type);
+ var resourceType = resourceTypeParam?.Item2 ?? "Patient";
+
+ // Return count based on whether it's a DomainResource type
+ var isDomainResourceType = resourceType != "Binary" &&
+ resourceType != "Bundle" &&
+ resourceType != "Parameters";
+
+ return CreateSearchResult(
+ resourceCount: isDomainResourceType ? 10 : 0,
+ resourceType: resourceType);
});
- return new ReindexOrchestratorJob(
- _queueClient,
- () => _searchService.CreateMockScope(),
- searchDefinitionManager,
- ModelInfoProvider.Instance,
- searchParameterStatusmanager,
- _searchParameterOperations,
- fhirRuntimeConfiguration,
- NullLoggerFactory.Instance,
- coreFeatureConfig,
- operationsConfig);
- };
+ _searchService.GetSurrogateIdRanges(
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any())
+ .Returns(Task.FromResult>(
+ new List<(long StartId, long EndId)> { (1, 10) }));
+
+ var jobInfo = await CreateReindexJobRecord();
+ var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0);
+
+ // Act
+ _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken);
+
+ // Wait for processing jobs to be created
+ var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(60), expectedMinimumJobs: 1);
+
+ // Assert
+ Assert.True(processingJobs.Count > 0, "Processing jobs should have been created");
+
+ // Get unique resource types from the processing jobs
+ var resourceTypesInJobs = processingJobs
+ .Select(j => JsonConvert.DeserializeObject(j.Definition))
+ .Select(def => def.ResourceType)
+ .Distinct()
+ .ToList();
- var expectedResourceType = "Account"; // Fix: Use the actual resource type
+ // Verify that multiple resource types were included
+ Assert.True(resourceTypesInJobs.Count >= 3, $"Should have jobs for multiple resource types derived from DomainResource. Found: {string.Join(", ", resourceTypesInJobs)}");
- // Fix: Create a proper ReindexJobRecord (this was the main issue)
- ReindexJobRecord job = new ReindexJobRecord(
- new Dictionary() { { "Account", resourceTypeHash } }, // Resource type hash map
- new List(), // No specific target resource types (will process all applicable)
- new List(), // No specific target search parameter types
- new List(), // No specific search parameter resource types
- maxNumberOfResourcesPerQuery); // Max resources per query
+ // Verify that non-DomainResource types were excluded
+ Assert.DoesNotContain("Binary", resourceTypesInJobs);
+ Assert.DoesNotContain("Bundle", resourceTypesInJobs);
+ Assert.DoesNotContain("Parameters", resourceTypesInJobs);
+ }
- JobInfo jobInfo = new JobInfo()
+ [Fact]
+ public async Task GetResourceCountForQueryAsync_WithValidQuery_ReturnsSearchResult()
+ {
+ // Arrange
+ var searchParam = CreateSearchParameterInfo();
+ var searchParamStatus = new ResourceSearchParameterStatus
{
- Id = 1,
- Definition = JsonConvert.SerializeObject(job),
- QueueType = (byte)QueueType.Reindex,
- GroupId = 1,
- CreateDate = DateTime.UtcNow,
- Status = JobStatus.Running,
+ LastUpdated = DateTime.UtcNow,
+ Uri = new Uri(searchParam.Url.OriginalString),
+ Status = SearchParameterStatus.Supported,
};
- var mockSearchResult = new SearchResult(_mockedSearchCount, new List>());
- mockSearchResult.ReindexResult = new SearchResultReindex()
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(new List { searchParamStatus });
+
+ _searchDefinitionManager.AllSearchParameters
+ .Returns(new List { searchParam });
+
+ _searchDefinitionManager.GetSearchParameters("Patient")
+ .Returns(new List { searchParam });
+
+ _searchDefinitionManager.GetSearchParameter(searchParam.Url.OriginalString)
+ .Returns(searchParam);
+
+ _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any())
+ .Returns("hash");
+
+ // Mock SearchParameterHashMap for RefreshSearchParameterCache
+ // IMPORTANT: Return a concrete Dictionary instance, not a mock
+ var paramHashMap = new Dictionary
{
- Count = _mockedSearchCount,
- StartResourceSurrogateId = startResourceSurrogateId,
- EndResourceSurrogateId = endResourceSurrogateId,
+ { "Patient", "patientHash" },
+ { "Observation", "observationHash" },
+ { "Condition", "conditionHash" },
};
- // Setup search result for count queries
+ // Return the concrete dictionary as IReadOnlyDictionary
+ _searchDefinitionManager.SearchParameterHashMap
+ .Returns(paramHashMap);
+
+ _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any())
+ .Returns("hash");
+
+ // Create a search result with 100 resources
+ var searchResult = CreateSearchResult(
+ resourceCount: 100,
+ startSurrogateId: 1,
+ endSurrogateId: 100,
+ resourceType: "Patient");
+
_searchService.SearchForReindexAsync(
- Arg.Is>>(l => l.Any(t => t.Item1 == "_count") && l.Any(t => t.Item1 == "_type" && t.Item2 == expectedResourceType)),
+ Arg.Any>>(),
Arg.Any(),
- true,
+ Arg.Any(),
Arg.Any(),
- true).
- Returns(mockSearchResult);
+ Arg.Any())
+ .Returns(searchResult);
- var defaultSearchResult = new SearchResult(0, new List>());
- _searchService.SearchForReindexAsync(
- Arg.Is>>(l => l.Any(t => t.Item1 == "_count") && l.Any(t => t.Item1 == "_type" && t.Item2 != expectedResourceType)),
- Arg.Any(),
- true,
+ _searchService.GetSurrogateIdRanges(
+ "Patient",
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
Arg.Any(),
- true).
- Returns(defaultSearchResult);
+ Arg.Any())
+ .Returns(Task.FromResult>(
+ new List<(long StartId, long EndId)> { (1, 100) }));
- // Setup search result for processing queries (non-count)
- _searchService.SearchForReindexAsync(
+ var jobInfo = await CreateReindexJobRecord();
+ var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0);
+
+ // Act
+ _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken);
+
+ // Wait for processing jobs to be created
+ var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 1);
+
+ // Assert - Verify that SearchForReindexAsync was called
+ await _searchService.Received().SearchForReindexAsync(
Arg.Any>>(),
Arg.Any(),
- false,
+ Arg.Any(),
Arg.Any(),
- true).
- Returns(CreateSearchResult(resourceType: resourceTypeHash));
+ Arg.Any());
- // Execute the orchestrator job
- var orchestratorTask = reindexOrchestratorJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken);
+ // Assert - Verify processing jobs were created
+ Assert.True(processingJobs.Count > 0, "Processing jobs should have been created based on search results");
- // Simulate processing job execution by running them in parallel
- _ = Task.Run(
- async () =>
- {
- await Task.Delay(5000, _cancellationToken); // Give orchestrator time to create jobs
+ // Assert - Verify the job definitions contain the correct resource counts
+ var firstJob = processingJobs.First();
+ var jobDef = JsonConvert.DeserializeObject(firstJob.Definition);
- // Get all processing jobs created by the orchestrator
- var processingJobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, jobInfo.GroupId, true, _cancellationToken);
- var childJobs = processingJobs.Where(j => j.Id != jobInfo.Id).ToList();
+ Assert.NotNull(jobDef);
+ Assert.Equal("Patient", jobDef.ResourceType);
+ Assert.NotNull(jobDef.ResourceCount);
+ Assert.True(jobDef.ResourceCount.Count > 0, "Resource count should be greater than 0");
+ Assert.Equal(1, jobDef.ResourceCount.StartResourceSurrogateId);
+ Assert.Equal(100, jobDef.ResourceCount.EndResourceSurrogateId);
- // Execute each processing job
- foreach (var childJob in childJobs)
- {
- try
- {
- childJob.Status = JobStatus.Running;
- var result = await _reindexProcessingJobTaskFactory().ExecuteAsync(childJob, _cancellationToken);
- childJob.Status = JobStatus.Completed;
- childJob.Result = result;
- await _queueClient.CompleteJobAsync(childJob, false, _cancellationToken);
- }
- catch (Exception ex)
- {
- childJob.Status = JobStatus.Failed;
- childJob.Result = JsonConvert.SerializeObject(new { Error = ex.Message });
- await _queueClient.CompleteJobAsync(childJob, false, _cancellationToken);
- }
- }
- },
- _cancellationToken);
+ // Assert - Verify search parameter hash is set
+ Assert.NotNull(jobDef.ResourceTypeSearchParameterHashMap);
+ Assert.Equal("patientHash", jobDef.ResourceTypeSearchParameterHashMap);
- // Wait for orchestrator job to complete
- var jobResult = JsonConvert.DeserializeObject(await orchestratorTask);
+ // Assert - Verify search parameter URLs are included
+ Assert.NotNull(jobDef.SearchParameterUrls);
+ Assert.Contains(searchParam.Url.OriginalString, jobDef.SearchParameterUrls);
+ }
- // Verify search for count was called
- await _searchService.Received().SearchForReindexAsync(
+ [Fact]
+ public async Task EnqueueQueryProcessingJobsAsync_WithValidSearchParameters_CreatesProcessingJobs()
+ {
+ var searchParam = CreateSearchParameterInfo();
+ var searchParamStatus = new ResourceSearchParameterStatus
+ {
+ LastUpdated = DateTime.UtcNow,
+ Uri = new Uri(searchParam.Url.OriginalString),
+ Status = SearchParameterStatus.Supported,
+ };
+
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(new List { searchParamStatus });
+
+ _searchDefinitionManager.AllSearchParameters
+ .Returns(new List { searchParam });
+
+ _searchDefinitionManager.GetSearchParameters("Patient")
+ .Returns(new List { searchParam });
+
+ _searchDefinitionManager.GetSearchParameter(searchParam.Url.OriginalString)
+ .Returns(searchParam);
+
+ _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any())
+ .Returns("hash");
+
+ var searchResultWithData = CreateSearchResult(resourceCount: 250);
+ _searchService.SearchForReindexAsync(
Arg.Any>>(),
Arg.Any(),
- Arg.Is(true),
+ Arg.Any(),
Arg.Any(),
- true);
+ Arg.Any())
+ .Returns(searchResultWithData);
- // Verify specific search for Account resource type
- await _searchService.Received().SearchForReindexAsync(
- Arg.Is>>(l => l.Any(t => t.Item1 == "_count") && l.Any(t => t.Item1 == "_type" && t.Item2 == expectedResourceType)),
+ var ranges = new List<(long, long)>
+ {
+ (1, 100),
+ (101, 200),
+ (201, 250),
+ };
+ _searchService.GetSurrogateIdRanges(
Arg.Any(),
- true,
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
Arg.Any(),
- true);
+ Arg.Any())
+ .Returns(ranges);
- var reindexJobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, 1, true, _cancellationToken);
- var processingJob = reindexJobs.FirstOrDefault(j => j.Id != jobInfo.Id);
+ var jobInfo = await CreateReindexJobRecord();
+ var orchestrator = CreateReindexOrchestratorJob(
+ new AzureHealthDataServicesRuntimeConfiguration(),
+ searchParameterCacheRefreshIntervalSeconds: 0);
- Assert.NotNull(processingJob);
- Assert.Equal(JobStatus.Completed, processingJob.Status);
+ // Act: Fire off execute asynchronously without awaiting
+ var executeTask = orchestrator.ExecuteAsync(jobInfo, _cancellationToken);
- var processingJobDefinition = JsonConvert.DeserializeObject(processingJob.Definition);
- Assert.Equal(_mockedSearchCount, processingJobDefinition.ResourceCount.Count);
- Assert.Equal(expectedResourceType, processingJobDefinition.ResourceType);
- Assert.Contains(param.Url.ToString(), processingJobDefinition.SearchParameterUrls);
+ // Assert: Check that processing jobs were created
+ var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 1);
+ Assert.True(processingJobs.Count > 0, "Processing jobs should have been enqueued");
+
+ // Cancel the orchestrator job to stop it from waiting for completion
+ await _queueClient.CancelJobByGroupIdAsync((byte)QueueType.Reindex, jobInfo.GroupId, _cancellationToken);
+
+ // Now safely await the result
+ var result = await executeTask;
+ var jobResult = JsonConvert.DeserializeObject(result);
+
+ Assert.NotNull(jobResult);
}
- [Fact(Skip = "Causing random timeouts")]
- public async Task GivenNoSupportedParams_WhenExecuted_ThenJobCompletesWithNoWork()
+ [Fact]
+ public async Task UpdateSearchParameterStatus_WithPendingDisableStatus_UpdatesToDisabled()
{
- var job = CreateReindexJobRecord();
+ // Arrange
+ var searchParam = CreateSearchParameterInfo();
+ var searchParamStatus = new ResourceSearchParameterStatus
+ {
+ LastUpdated = DateTime.UtcNow,
+ Uri = new Uri(searchParam.Url.OriginalString),
+ Status = SearchParameterStatus.PendingDisable,
+ };
+
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(new List { searchParamStatus });
+
+ _searchDefinitionManager.AllSearchParameters
+ .Returns(new List { searchParam });
+
+ _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any())
+ .Returns("hash");
+
+ var emptySearchResult = new SearchResult(0, new List>());
+ _searchService.SearchForReindexAsync(
+ Arg.Any>>(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any())
+ .Returns(emptySearchResult);
- _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(job.ResourceTypeSearchParameterHashMap.First().Value);
+ var jobInfo = await CreateReindexJobRecord();
+ var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0);
- JobInfo jobInfo = new JobInfo()
+ // Act
+ var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken);
+ var jobResult = JsonConvert.DeserializeObject(result);
+
+ // Assert
+ Assert.NotNull(jobResult);
+ await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync(
+ Arg.Is>(l => l.Contains(searchParam.Url.ToString())),
+ SearchParameterStatus.Disabled,
+ Arg.Any());
+ }
+
+ [Fact]
+ public async Task GivenNoSupportedParams_WhenExecuted_ThenJobCompletesWithNoWork()
+ {
+ // Arrange
+ var searchParam = CreateSearchParameterInfo();
+ var searchParamStatus = new ResourceSearchParameterStatus
{
- Id = 3,
- Definition = JsonConvert.SerializeObject(job),
- QueueType = (byte)QueueType.Reindex,
- GroupId = 3,
- CreateDate = DateTime.UtcNow,
- Status = JobStatus.Running,
+ LastUpdated = DateTime.UtcNow,
+ Uri = new Uri(searchParam.Url.OriginalString),
+ Status = SearchParameterStatus.PendingDelete,
};
- var result = JsonConvert.DeserializeObject(await _reindexOrchestratorJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken));
- Assert.Equal("Nothing to process. Reindex complete.", result.Error.First().Diagnostics);
- var jobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, jobInfo.GroupId, false, _cancellationToken);
- Assert.False(jobs.Any());
+
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(new List { searchParamStatus });
+
+ _searchDefinitionManager.AllSearchParameters
+ .Returns(new List { searchParam });
+
+ _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any())
+ .Returns("hash");
+
+ var emptySearchResult = new SearchResult(0, new List>());
+ _searchService.SearchForReindexAsync(
+ Arg.Any>>(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any(),
+ Arg.Any())
+ .Returns(emptySearchResult);
+
+ var jobInfo = await CreateReindexJobRecord();
+ var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0);
+
+ // Act
+ var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken);
+ var jobResult = JsonConvert.DeserializeObject(result);
+
+ // Assert
+ Assert.NotNull(jobResult);
+ await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync(
+ Arg.Is>(l => l.Contains(searchParam.Url.ToString())),
+ SearchParameterStatus.Deleted,
+ Arg.Any());
}
- private SearchResult CreateSearchResult(string resourceType, string continuationToken = null, int resourceCount = 1)
+ [Fact]
+ public async Task UpdateSearchParameterStatus_WithNullReadySearchParameters_ReturnsEarly()
{
- var resultList = new List();
+ // Arrange
+ var emptyStatus = new ReadOnlyCollection(
+ new List());
- for (var i = 0; i < resourceCount; i++)
- {
- var wrapper = Substitute.For();
- var propertyInfo = wrapper.GetType().GetProperty("ResourceTypeName");
- propertyInfo.SetValue(wrapper, resourceType);
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(emptyStatus);
- var entry = new SearchResultEntry(wrapper);
+ _searchDefinitionManager.AllSearchParameters
+ .Returns(new List());
- resultList.Add(entry);
- }
+ var jobInfo = await CreateReindexJobRecord();
+ var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0);
- var searchResult = new SearchResult(resultList, continuationToken, null, new List>());
- searchResult.MaxResourceSurrogateId = 1;
- searchResult.TotalCount = resultList.Count;
- return searchResult;
+ // Act
+ var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken);
+ var jobResult = JsonConvert.DeserializeObject(result);
+
+ // Assert
+ Assert.NotNull(jobResult);
+ await _searchParameterStatusManager.DidNotReceive().UpdateSearchParameterStatusAsync(
+ Arg.Any>(),
+ Arg.Any(),
+ Arg.Any());
}
- private ReindexJobRecord CreateReindexJobRecord(uint maxResourcePerQuery = 100, IReadOnlyDictionary paramHashMap = null)
+ [Fact]
+ public async Task GetValidSearchParameterUrlsForResourceType_WithValidParameters_ReturnsFilteredUrls()
{
- if (paramHashMap == null)
+ // Arrange
+ var patientNameParam = CreateSearchParameterInfo(
+ url: "http://hl7.org/fhir/SearchParameter/Patient-name",
+ baseResourceTypes: new List { "Patient" },
+ resourceType: "Patient");
+
+ var patientBirthdateParam = CreateSearchParameterInfo(
+ url: "http://hl7.org/fhir/SearchParameter/Patient-birthdate",
+ baseResourceTypes: new List { "Patient" },
+ resourceType: "Patient");
+
+ // Create a third parameter that should be filtered OUT
+ // (not valid for Patient even though it's in the reindex job)
+ var observationCodeParam = CreateSearchParameterInfo(
+ url: "http://hl7.org/fhir/SearchParameter/Observation-code",
+ baseResourceTypes: new List { "Observation" },
+ resourceType: "Observation");
+
+ var searchParamStatus1 = new ResourceSearchParameterStatus
{
- paramHashMap = new Dictionary() { { "Patient", "patientHash" } };
+ LastUpdated = DateTime.UtcNow,
+ Uri = new Uri(patientNameParam.Url.OriginalString),
+ Status = SearchParameterStatus.Supported,
+ };
+
+ var searchParamStatus2 = new ResourceSearchParameterStatus
+ {
+ LastUpdated = DateTime.UtcNow,
+ Uri = new Uri(patientBirthdateParam.Url.OriginalString),
+ Status = SearchParameterStatus.Supported,
+ };
+
+ var searchParamStatus3 = new ResourceSearchParameterStatus
+ {
+ LastUpdated = DateTime.UtcNow,
+ Uri = new Uri(observationCodeParam.Url.OriginalString),
+ Status = SearchParameterStatus.Supported,
+ };
+
+ _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken)
+ .Returns(new List { searchParamStatus1, searchParamStatus2, searchParamStatus3 });
+
+ // ALL three parameters are in the reindex job
+ _searchDefinitionManager.AllSearchParameters
+ .Returns(new List { patientNameParam, patientBirthdateParam, observationCodeParam });
+
+ // GetSearchParameters("Patient") returns ONLY the Patient parameters
+ // This is the first level of filtering
+ _searchDefinitionManager.GetSearchParameters("Patient")
+ .Returns(new List { patientNameParam, patientBirthdateParam });
+
+ _searchDefinitionManager.GetSearchParameter(Arg.Any())
+ .Returns(x => x[0] switch
+ {
+ "http://hl7.org/fhir/SearchParameter/Patient-name" => patientNameParam,
+ "http://hl7.org/fhir/SearchParameter/Patient-birthdate" => patientBirthdateParam,
+ "http://hl7.org/fhir/SearchParameter/Observation-code" => observationCodeParam,
+ _ => throw new SearchParameterNotSupportedException("Not found"),
+ });
+
+ _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any())
+ .Returns("hash");
+
+ var searchResult = CreateSearchResult(resourceCount: 10);
+ _searchService.SearchForReindexAsync(
+ Arg.Any>>(),
+ Arg.Any(),
+ Arg.Any