diff --git a/build/build-variables.yml b/build/build-variables.yml index 75699622d0..8e4c41360b 100644 --- a/build/build-variables.yml +++ b/build/build-variables.yml @@ -16,6 +16,14 @@ variables: DeploymentEnvironmentNameR4BSql: '$(DeploymentEnvironmentNameR4B)-sql' DeploymentEnvironmentNameR5: '$(DeploymentEnvironmentName)-r5' DeploymentEnvironmentNameR5Sql: '$(DeploymentEnvironmentNameR5)-sql' + # Key Vault names (shorter due to 24 character limit) + KeyVaultNameSql: '$(KeyVaultBaseName)-sql' + KeyVaultNameR4: '$(KeyVaultBaseName)-r4' + KeyVaultNameR4Sql: '$(KeyVaultNameR4)-sql' + KeyVaultNameR4B: '$(KeyVaultBaseName)-r4b' + KeyVaultNameR4BSql: '$(KeyVaultNameR4B)-sql' + KeyVaultNameR5: '$(KeyVaultBaseName)-r5' + KeyVaultNameR5Sql: '$(KeyVaultNameR5)-sql' TestEnvironmentUrl: 'https://$(DeploymentEnvironmentName).azurewebsites.net' # These variables are not used in the deployment scripts, but are used in the E2E tests files. TestEnvironmentUrl_Sql: 'https://$(DeploymentEnvironmentName)-sql.azurewebsites.net' diff --git a/build/ci-variables.yml b/build/ci-variables.yml index 6afcea9248..467c10e304 100644 --- a/build/ci-variables.yml +++ b/build/ci-variables.yml @@ -4,6 +4,8 @@ variables: resourceGroupRoot: 'msh-fhir-ci4' appServicePlanName: '$(resourceGroupRoot)-linux' DeploymentEnvironmentName: '$(resourceGroupRoot)' + # CI uses the same name for Key Vaults as the deployment environment (no BuildId suffix needed) + KeyVaultBaseName: '$(DeploymentEnvironmentName)' ResourceGroupName: '$(resourceGroupRoot)' CrucibleEnvironmentUrl: 'https://crucible.mshapis.com/' TestEnvironmentName: 'OSS CI' diff --git a/build/jobs/add-aad-test-environment.yml b/build/jobs/add-aad-test-environment.yml index dbb8c5a70e..dcc035582e 100644 --- a/build/jobs/add-aad-test-environment.yml +++ b/build/jobs/add-aad-test-environment.yml @@ -32,22 +32,10 @@ steps: $password = ConvertTo-SecureString -AsPlainText $password_raw -Force $clientSecretCredential = New-Object -TypeName System.Management.Automation.PSCredential -ArgumentList $clientId, $password - # If a deleted keyvault with purge protection exists, try to restore it. $environmentName = "$(DeploymentEnvironmentName)".ToLower() -replace "\.", "" + $keyVaultName = "$(KeyVaultBaseName)-ts".ToLower() -replace "\.", "" Write-Host "Installed module and set variables" - $vaultName = "${environmentName}-ts" - $vaultLocation = "westus" - $vaultResourceGroupName = "$(ResourceGroupName)" - if (Get-AzKeyVault -VaultName $vaultName -Location $vaultLocation -InRemovedState) - { - Write-Host "Attempting to restore vault ${vaultName}" - - Undo-AzKeyVaultRemoval -VaultName $vaultName -ResourceGroupName $vaultResourceGroupName -Location $vaultLocation - Write-Host "KeyVault $vaultName is restored" - } - Write-Host "Restored keyvaults" - # Connect to Microsoft Graph using client credentials Write-Host "Connecting to Microsoft Graph" Connect-MgGraph -TenantId $tenantId -ClientSecretCredential $clientSecretCredential @@ -57,4 +45,4 @@ steps: Import-Module $(System.DefaultWorkingDirectory)/release/scripts/PowerShell/FhirServerRelease/FhirServerRelease.psd1 Write-Host "Imported modules" - $output = Add-AadTestAuthEnvironment -TestAuthEnvironmentPath $(System.DefaultWorkingDirectory)/testauthenvironment.json -EnvironmentName $environmentName -ResourceGroupName $(ResourceGroupName) -TenantAdminCredential $clientSecretCredential -TenantId $tenantId -ClientId $clientId -ClientSecret $password + $output = Add-AadTestAuthEnvironment -TestAuthEnvironmentPath $(System.DefaultWorkingDirectory)/testauthenvironment.json -EnvironmentName $environmentName -KeyVaultName $keyVaultName -ResourceGroupName $(UniqueResourceGroupName) -EnvironmentLocation $(ResourceGroupRegion) -TenantAdminCredential $clientSecretCredential -TenantId $tenantId -ClientId $clientId -ClientSecret $password diff --git a/build/jobs/cleanup.yml b/build/jobs/cleanup.yml index 62d395e514..b8edd363aa 100644 --- a/build/jobs/cleanup.yml +++ b/build/jobs/cleanup.yml @@ -12,12 +12,71 @@ jobs: azurePowerShellVersion: latestVersion ScriptType: InlineScript Inline: | - Get-AzResourceGroup -Name $(ResourceGroupName) -ErrorVariable notPresent -ErrorAction SilentlyContinue + $deploymentEnvName = "$(DeploymentEnvironmentName)" + $keyVaultBaseName = "$(KeyVaultBaseName)" + + Get-AzResourceGroup -Name $(UniqueResourceGroupName) -ErrorVariable notPresent -ErrorAction SilentlyContinue if ($notPresent) { - Write-Host "Resource group $(ResourceGroupName) not found" + Write-Host "Resource group $(UniqueResourceGroupName) not found" } else { - Write-Host "Deleting resource group $(ResourceGroupName)" - Remove-AzResourceGroup -Name $(ResourceGroupName) -Force -Verbose + Write-Host "Deleting resource group $(UniqueResourceGroupName)" + $maxRetries = 3 + $retryCount = 0 + $deleted = $false + + while (-not $deleted -and $retryCount -lt $maxRetries) { + try { + Remove-AzResourceGroup -Name $(UniqueResourceGroupName) -Force -Verbose -ErrorAction Stop + $deleted = $true + Write-Host "Successfully deleted resource group $(UniqueResourceGroupName)" + } + catch { + $retryCount++ + if ($retryCount -lt $maxRetries) { + Write-Warning "Failed to delete resource group (attempt $retryCount of $maxRetries). Retrying in 30 seconds... Error: $($_.Exception.Message)" + Start-Sleep -Seconds 30 + } else { + Write-Warning "Failed to delete resource group after $maxRetries attempts. Error: $($_.Exception.Message)" + } + } + } + } + + # Purge any soft-deleted Key Vaults from this build + # Vault names use KeyVaultBaseName (e.g., f20585pr5240-ts, f20585pr5240-r4, etc.) + $vaultPattern = "$keyVaultBaseName*" + Write-Host "Checking for soft-deleted Key Vaults matching pattern: $vaultPattern" + $softDeletedVaults = Get-AzKeyVault -InRemovedState -ErrorAction SilentlyContinue | Where-Object { + $_.VaultName -like $vaultPattern + } + + if ($softDeletedVaults.Count -eq 0) { + Write-Host "No soft-deleted vaults found to purge" + } else { + Write-Host "Found $($softDeletedVaults.Count) soft-deleted vault(s) to purge" + + $results = $softDeletedVaults | ForEach-Object -Parallel { + $vaultName = $_.VaultName + $vaultLocation = $_.Location + try { + Remove-AzKeyVault -VaultName $vaultName -Location $vaultLocation -InRemovedState -Force -ErrorAction Stop + [PSCustomObject]@{ Vault = $vaultName; Status = "Success" } + } + catch { + [PSCustomObject]@{ Vault = $vaultName; Status = "Failed: $($_.Exception.Message)" } + } + } -ThrottleLimit 10 + + $results | ForEach-Object { + if ($_.Status -eq "Success") { + Write-Host "Successfully purged vault: $($_.Vault)" + } else { + Write-Warning "Failed to purge vault $($_.Vault). $($_.Status)" + } + } + + $successCount = ($results | Where-Object { $_.Status -eq "Success" }).Count + Write-Host "Completed purging: $successCount of $($softDeletedVaults.Count) vaults" } diff --git a/build/jobs/e2e-setup.yml b/build/jobs/e2e-setup.yml index 33890f84d2..2ca2fd2bfc 100644 --- a/build/jobs/e2e-setup.yml +++ b/build/jobs/e2e-setup.yml @@ -17,7 +17,7 @@ steps: KeyVaultName: 'resolute-oss-tenant-info' - task: AzureKeyVault@1 - displayName: 'Azure Key Vault: $(DeploymentEnvironmentName)-ts' + displayName: 'Azure Key Vault: $(KeyVaultBaseName)-ts' inputs: azureSubscription: $(ConnectedServiceName) - KeyVaultName: '$(DeploymentEnvironmentName)-ts' + KeyVaultName: '$(KeyVaultBaseName)-ts' diff --git a/build/jobs/e2e-tests.yml b/build/jobs/e2e-tests.yml index a23f74b228..40f455c58c 100644 --- a/build/jobs/e2e-tests.yml +++ b/build/jobs/e2e-tests.yml @@ -5,92 +5,29 @@ parameters: type: string - name: appServiceType type: string +- name: categoryFilter + type: string + default: 'Category!=ExportLongRunning' +- name: testRunTitleSuffix + type: string + default: '' steps: - template: e2e-tests-extract.yml parameters: version: ${{parameters.version}} - - task: AzurePowerShell@5 - displayName: 'Set Variables' - inputs: - azureSubscription: $(ConnectedServiceName) - azurePowerShellVersion: latestVersion - ScriptType: inlineScript - Inline: | - $keyVault = "$(DeploymentEnvironmentName)-ts" - $secrets = Get-AzKeyVaultSecret -VaultName $keyVault - - foreach($secret in $secrets) - { - $environmentVariableName = $secret.Name.Replace("--","_") - - $secretValue = Get-AzKeyVaultSecret -VaultName $keyVault -Name $secret.Name - # Replace with -AsPlainText flag when v5.3 of the Az Module is supported - $plainValue = ([System.Net.NetworkCredential]::new("", $secretValue.SecretValue).Password).ToString() - if([string]::IsNullOrEmpty($plainValue)) - { - throw "$($secret.Name) is empty" - } - Write-Host "##vso[task.setvariable variable=$($environmentVariableName)]$($plainValue)" - } - - $appServiceName = "${{ parameters.appServiceName }}" - $appSettings = (Get-AzWebApp -ResourceGroupName $(ResourceGroupName) -Name $appServiceName).SiteConfig.AppSettings - $acrSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__ConvertData__ContainerRegistryServers__0"} - $acrLoginServer = $acrSettings[0].Value - $acrAccountName = ($acrLoginServer -split '\.')[0] - - ## This needs to be moved to MI, WI #125246 - # $acrPassword = (Get-AzContainerRegistryCredential -ResourceGroupName $(ResourceGroupName) -Name $acrAccountName).Password - # Write-Host "##vso[task.setvariable variable=TestContainerRegistryServer]$($acrLoginServer)" - # Write-Host "##vso[task.setvariable variable=TestContainerRegistryPassword]$($acrPassword)" - - $exportStoreSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__Export__StorageAccountUri"} - $exportStoreUri = $exportStoreSettings[0].Value - Write-Host "$exportStoreUri" - $exportStoreAccountName = [System.Uri]::new("$exportStoreUri").Host.Split('.')[0] - $exportStoreKey = Get-AzStorageAccountKey -ResourceGroupName $(ResourceGroupName) -Name "$exportStoreAccountName" | Where-Object {$_.KeyName -eq "key1"} - - Write-Host "##vso[task.setvariable variable=TestExportStoreUri]$($exportStoreUri)" - - $integrationStoreSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__IntegrationDataStore__StorageAccountUri"} - $integrationStoreUri = $integrationStoreSettings[0].Value - Write-Host "$integrationStoreUri" - $integrationStoreAccountName = [System.Uri]::new("$integrationStoreUri").Host.Split('.')[0] - Write-Host "##vso[task.setvariable variable=TestIntegrationStoreUri]$($integrationStoreUri)" - - Write-Host "##vso[task.setvariable variable=Resource]$(TestApplicationResource)" - - $secrets = Get-AzKeyVaultSecret -VaultName resolute-oss-tenant-info - foreach($secret in $secrets) - { - $environmentVariableName = $secret.Name.Replace("--","_") - - $secretValue = Get-AzKeyVaultSecret -VaultName resolute-oss-tenant-info -Name $secret.Name - # Replace with -AsPlainText flag when v5.3 of the Az Module is supported - $plainValue = ([System.Net.NetworkCredential]::new("", $secretValue.SecretValue).Password).ToString() - if([string]::IsNullOrEmpty($plainValue)) - { - throw "$($secret.Name) is empty" - } - Write-Host "##vso[task.setvariable variable=$($environmentVariableName)]$($plainValue)" - } - # ---------------------------------------- - - dotnet dev-certs https - - Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_CLIENT_ID]$env:AZURESUBSCRIPTION_CLIENT_ID" - Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_TENANT_ID]$env:AZURESUBSCRIPTION_TENANT_ID" - Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_SERVICE_CONNECTION_ID]$env:AZURESUBSCRIPTION_SERVICE_CONNECTION_ID" + - template: ../tasks/e2e-set-variables.yml + parameters: + appServiceName: ${{ parameters.appServiceName }} - task: DotNetCoreCLI@2 - displayName: 'E2E ${{ parameters.version }} ${{parameters.appServiceType}}' + displayName: 'E2E ${{ parameters.version }} ${{parameters.appServiceType}}${{ parameters.testRunTitleSuffix }}' inputs: command: test - arguments: '"$(Agent.TempDirectory)/E2ETests/**/*${{ parameters.version }}.Tests.E2E*.dll" --blame-hang-timeout 7m --filter "FullyQualifiedName~${{parameters.appServiceType}}&Category!=ExportLongRunning"' + arguments: '"$(Agent.TempDirectory)/E2ETests/**/*${{ parameters.version }}.Tests.E2E*.dll" --blame-hang-timeout 15m --filter "FullyQualifiedName~${{parameters.appServiceType}}&${{ parameters.categoryFilter }}"' workingDirectory: "$(System.ArtifactsDirectory)" - testRunTitle: '${{ parameters.version }} ${{parameters.appServiceType}}' + testRunTitle: '${{ parameters.version }} ${{parameters.appServiceType}}${{ parameters.testRunTitleSuffix }}' env: 'TestEnvironmentUrl': $(TestEnvironmentUrl) 'TestEnvironmentUrl_${{ parameters.version }}': $(TestEnvironmentUrl_${{ parameters.version }}) diff --git a/build/jobs/provision-deploy.yml b/build/jobs/provision-deploy.yml index 9c81e99080..ebc47d22b1 100644 --- a/build/jobs/provision-deploy.yml +++ b/build/jobs/provision-deploy.yml @@ -30,6 +30,9 @@ parameters: - name: reindexEnabled type: boolean default: true +- name: keyVaultName + type: string + default: '' jobs: - job: provisionEnvironment @@ -73,6 +76,7 @@ jobs: appServicePlanSku = "P2V3" numberOfInstances = 2 serviceName = $webAppName + keyVaultName = "${{ parameters.keyVaultName }}".ToLower() securityAuthenticationAuthority = "https://login.microsoftonline.com/$(tenant-id)" securityAuthenticationAudience = "${{ parameters.testEnvironmentUrl }}" additionalFhirServerConfigProperties = $additionalProperties @@ -99,13 +103,6 @@ jobs: $deploymentName = $webAppName $resourceGroupName = "${{ parameters.resourceGroup }}" - Write-Host "Check for keyvaults in removed state..." - if (Get-AzKeyVault -VaultName $webAppName -Location $(ResourceGroupRegion) -InRemovedState) - { - Undo-AzKeyVaultRemoval -VaultName $webAppName -ResourceGroupName $resourceGroupName -Location $(ResourceGroupRegion) - Write-Host "KeyVault $webAppName is restored" - } - Write-Host "Provisioning Resource Group" Write-Host "ResourceGroupName: ${{ parameters.resourceGroup }}" diff --git a/build/jobs/provision-sqlServer.yml b/build/jobs/provision-sqlServer.yml index 42590d6af9..a228129e8d 100644 --- a/build/jobs/provision-sqlServer.yml +++ b/build/jobs/provision-sqlServer.yml @@ -114,13 +114,34 @@ jobs: 'nspProfile' = 'default' } - # Deploy using external template - $nspDeploymentResult = New-AzResourceGroupDeployment ` - -ResourceGroupName $resourceGroup ` - -Name "$sqlServerName-nsp-association" ` - -TemplateFile $(System.DefaultWorkingDirectory)/samples/templates/nsp-resource-association.json ` - -TemplateParameterObject $templateParameters ` - -Verbose + # Deploy using external template with retry logic for transient conflicts + $maxRetries = 3 + $retryDelaySeconds = 15 + $attempt = 0 + $success = $false + + do { + try { + $attempt++ + Write-Host "Attempt $attempt of $maxRetries to create NSP association..." + + $nspDeploymentResult = New-AzResourceGroupDeployment ` + -ResourceGroupName $resourceGroup ` + -Name "$sqlServerName-nsp-association" ` + -TemplateFile $(System.DefaultWorkingDirectory)/samples/templates/nsp-resource-association.json ` + -TemplateParameterObject $templateParameters ` + -Verbose + + $success = $true + } catch { + if ($_.Exception.Message -like "*Conflict*" -and $attempt -lt $maxRetries) { + Write-Host "Conflict detected. Retrying in $retryDelaySeconds seconds..." + Start-Sleep -Seconds $retryDelaySeconds + } else { + throw + } + } + } while (-not $success -and $attempt -lt $maxRetries) Write-Host "Successfully associated SQL Server with NSP using external ARM template" Write-Host "Association Resource ID: $($nspDeploymentResult.Outputs.associationResourceId.Value)" diff --git a/build/jobs/run-export-tests.yml b/build/jobs/run-export-tests.yml index 91b64c3de8..1717f7fea0 100644 --- a/build/jobs/run-export-tests.yml +++ b/build/jobs/run-export-tests.yml @@ -23,7 +23,7 @@ jobs: azurePowerShellVersion: latestVersion ScriptType: inlineScript Inline: | - $keyVault = "$(DeploymentEnvironmentName)-ts" + $keyVault = "$(KeyVaultBaseName)-ts" $secrets = Get-AzKeyVaultSecret -VaultName $keyVault foreach($secret in $secrets) @@ -116,7 +116,7 @@ jobs: azurePowerShellVersion: latestVersion ScriptType: inlineScript Inline: | - $keyVault = "$(DeploymentEnvironmentName)-ts" + $keyVault = "$(KeyVaultBaseName)-ts" $secrets = Get-AzKeyVaultSecret -VaultName $keyVault foreach($secret in $secrets) diff --git a/build/jobs/run-tests.yml b/build/jobs/run-tests.yml index a9c3d29a3c..6e5e602acb 100644 --- a/build/jobs/run-tests.yml +++ b/build/jobs/run-tests.yml @@ -40,7 +40,7 @@ jobs: Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_SERVICE_CONNECTION_ID]$env:AZURESUBSCRIPTION_SERVICE_CONNECTION_ID" $appServiceName = '${{ parameters.appServiceName }}' - $appSettings = (Get-AzWebApp -ResourceGroupName $(ResourceGroupName) -Name $appServiceName).SiteConfig.AppSettings + $appSettings = (Get-AzWebApp -ResourceGroupName $(UniqueResourceGroupName) -Name $appServiceName).SiteConfig.AppSettings $dataStoreResourceId = $appSettings | where {$_.Name -eq "FhirServer__ResourceManager__DataStoreResourceId"} $dataStoreResourceId = $dataStoreResourceId[0].Value Write-Host "$dataStoreResourceId" @@ -180,6 +180,22 @@ jobs: version: ${{ parameters.version }} appServiceName: ${{ parameters.appServiceName }} appServiceType: 'CosmosDb' + categoryFilter: 'Category!=ExportLongRunning&Category!=IndexAndReindex' + +- job: 'cosmosE2eTests_Reindex' + dependsOn: [] + pool: + name: '$(SharedLinuxPool)' + vmImage: '$(LinuxVmImage)' + steps: + - template: e2e-setup.yml + - template: e2e-tests.yml + parameters: + version: ${{ parameters.version }} + appServiceName: ${{ parameters.appServiceName }} + appServiceType: 'CosmosDb' + categoryFilter: 'Category=IndexAndReindex' + testRunTitleSuffix: ' Reindex' - job: 'sqlE2eTests' dependsOn: [] @@ -193,3 +209,34 @@ jobs: version: ${{ parameters.version }} appServiceName: '${{ parameters.appServiceName }}-sql' appServiceType: 'SqlServer' + categoryFilter: 'Category!=ExportLongRunning&Category!=IndexAndReindex&Category!=BulkUpdate' + +- job: 'sqlE2eTests_Reindex' + dependsOn: [] + pool: + name: '$(SharedLinuxPool)' + vmImage: '$(LinuxVmImage)' + steps: + - template: e2e-setup.yml + - template: e2e-tests.yml + parameters: + version: ${{ parameters.version }} + appServiceName: '${{ parameters.appServiceName }}-sql' + appServiceType: 'SqlServer' + categoryFilter: 'Category=IndexAndReindex' + testRunTitleSuffix: ' Reindex' + +- job: 'sqlE2eTests_BulkUpdate' + dependsOn: [] + pool: + name: '$(SharedLinuxPool)' + vmImage: '$(LinuxVmImage)' + steps: + - template: e2e-setup.yml + - template: e2e-tests.yml + parameters: + version: ${{ parameters.version }} + appServiceName: '${{ parameters.appServiceName }}-sql' + appServiceType: 'SqlServer' + categoryFilter: 'Category=BulkUpdate' + testRunTitleSuffix: ' BulkUpdate' diff --git a/build/pr-pipeline.yml b/build/pr-pipeline.yml index c1792d561f..1daf06f5b5 100644 --- a/build/pr-pipeline.yml +++ b/build/pr-pipeline.yml @@ -107,6 +107,19 @@ stages: tag: $(ImageTag) buildPlatform: $(testDockerImagePlatforms) +- stage: cleanupOldResources + displayName: 'Cleanup Old PR Resources' + dependsOn: + - setupEnvironment + jobs: + - job: deleteOldResourceGroups + displayName: 'Delete Old Resource Groups' + pool: + name: '$(DefaultLinuxPool)' + vmImage: '$(LinuxVmImage)' + steps: + - template: ./tasks/delete-resource-groups.yml + - stage: provisionEnvironment displayName: Provision Environment dependsOn: [] @@ -124,17 +137,20 @@ stages: azurePowerShellVersion: latestVersion ScriptType: inlineScript Inline: | + # Handle re-run scenario: remove existing RG for this build if it exists try { - Get-AzResourceGroup -Name $(ResourceGroupName) | Remove-AzResourceGroup -Verbose -Force + Get-AzResourceGroup -Name $(UniqueResourceGroupName) | Remove-AzResourceGroup -Verbose -Force } catch {} - New-AzResourceGroup -Name "$(ResourceGroupName)" -Location "$(ResourceGroupRegion)" -Force + # Create new resource group with unique name + New-AzResourceGroup -Name "$(UniqueResourceGroupName)" -Location "$(ResourceGroupRegion)" -Force + Write-Host "Created resource group: $(UniqueResourceGroupName)" - template: ./jobs/add-resource-group-role-assignments.yml parameters: azureSubscription: $(ConnectedServiceName) - resourceGroupName: $(ResourceGroupName) + resourceGroupName: $(UniqueResourceGroupName) - stage: setupEnvironment displayName: Setup Test Environment @@ -143,10 +159,18 @@ stages: - provisionEnvironment jobs: - template: ./jobs/cleanup-aad.yml + - job: deleteKeyVaults + displayName: 'Delete and Purge Key Vaults' + pool: + name: '$(DefaultLinuxPool)' + vmImage: '$(LinuxVmImage)' + steps: + - template: ./tasks/delete-keyvaults.yml - job: setup displayName: 'Setup AAD' dependsOn: - cleanupAad + - deleteKeyVaults pool: vmImage: '$(WindowsVmImage)' steps: @@ -159,8 +183,8 @@ stages: jobs: - template: ./jobs/provision-nsp.yml parameters: - resourceGroup: $(ResourceGroupName) - nspName: 'nsp-$(ResourceGroupName)' + resourceGroup: $(UniqueResourceGroupName) + nspName: 'nsp-$(UniqueResourceGroupName)' nspProfile: 'default' - stage: associateKeyVaultNsp @@ -171,9 +195,9 @@ stages: jobs: - template: ./jobs/associate-keyvault-nsp.yml parameters: - resourceGroup: $(ResourceGroupName) - keyVaultName: '$(DeploymentEnvironmentName)-ts' - nspName: 'nsp-$(ResourceGroupName)' + resourceGroup: $(UniqueResourceGroupName) + keyVaultName: '$(KeyVaultBaseName)-ts' + nspName: 'nsp-$(UniqueResourceGroupName)' nspProfile: 'default' - stage: deploySqlServer @@ -184,19 +208,19 @@ stages: jobs: - template: ./jobs/provision-sqlServer.yml parameters: - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) sqlServerName: $(DeploymentEnvironmentName) adminType: 'userAssignedManagedIdentity' adminUserAssignedManagedIdentityName: "$(DeploymentEnvironmentName)-uami" deploymentName: "AppServer" - nspName: 'nsp-$(ResourceGroupName)' + nspName: 'nsp-$(UniqueResourceGroupName)' - template: ./jobs/provision-sqlServer.yml parameters: - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) sqlServerName: $(DeploymentEnvironmentName)inttest adminType: 'federatedServiceConnection' deploymentName: "IntegrationTests" - nspName: 'nsp-$(ResourceGroupName)' + nspName: 'nsp-$(UniqueResourceGroupName)' - stage: deployStu3 displayName: 'Deploy STU3 CosmosDB Site' @@ -209,10 +233,11 @@ stages: parameters: version: Stu3 webAppName: $(DeploymentEnvironmentName) + keyVaultName: $(KeyVaultBaseName) appServicePlanName: '$(appServicePlanName)-cosmos' appServicePlanResourceGroup: $(appServicePlanResourceGroup) subscription: $(ConnectedServiceName) - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) testEnvironmentUrl: $(TestApplicationResource) imageTag: $(ImageTag) reindexEnabled: true @@ -229,10 +254,11 @@ stages: version: Stu3 sql: true webAppName: $(DeploymentEnvironmentNameSql) + keyVaultName: $(KeyVaultNameSql) appServicePlanName: '$(appServicePlanName)-sql' appServicePlanResourceGroup: $(appServicePlanResourceGroup) subscription: $(ConnectedServiceName) - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) testEnvironmentUrl: $(TestApplicationResource) imageTag: $(ImageTag) schemaAutomaticUpdatesEnabled: 'auto' @@ -251,10 +277,11 @@ stages: parameters: version: R4 webAppName: $(DeploymentEnvironmentNameR4) + keyVaultName: $(KeyVaultNameR4) appServicePlanName: '$(appServicePlanName)-cosmos' appServicePlanResourceGroup: $(appServicePlanResourceGroup) subscription: $(ConnectedServiceName) - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) testEnvironmentUrl: $(TestApplicationResource) imageTag: $(ImageTag) reindexEnabled: true @@ -271,10 +298,11 @@ stages: version: R4 sql: true webAppName: $(DeploymentEnvironmentNameR4Sql) + keyVaultName: $(KeyVaultNameR4Sql) appServicePlanName: '$(appServicePlanName)-sql' appServicePlanResourceGroup: $(appServicePlanResourceGroup) subscription: $(ConnectedServiceName) - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) testEnvironmentUrl: $(TestApplicationResource) imageTag: $(ImageTag) schemaAutomaticUpdatesEnabled: 'auto' @@ -293,10 +321,11 @@ stages: parameters: version: R4B webAppName: $(DeploymentEnvironmentNameR4B) + keyVaultName: $(KeyVaultNameR4B) appServicePlanName: '$(appServicePlanName)-cosmos' appServicePlanResourceGroup: $(appServicePlanResourceGroup) subscription: $(ConnectedServiceName) - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) testEnvironmentUrl: $(TestApplicationResource) imageTag: $(ImageTag) reindexEnabled: true @@ -313,10 +342,11 @@ stages: version: R4B sql: true webAppName: $(DeploymentEnvironmentNameR4BSql) + keyVaultName: $(KeyVaultNameR4BSql) appServicePlanName: '$(appServicePlanName)-sql' appServicePlanResourceGroup: $(appServicePlanResourceGroup) subscription: $(ConnectedServiceName) - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) testEnvironmentUrl: $(TestApplicationResource) imageTag: $(ImageTag) schemaAutomaticUpdatesEnabled: 'auto' @@ -335,10 +365,11 @@ stages: parameters: version: R5 webAppName: $(DeploymentEnvironmentNameR5) + keyVaultName: $(KeyVaultNameR5) appServicePlanName: '$(appServicePlanName)-cosmos' appServicePlanResourceGroup: $(appServicePlanResourceGroup) subscription: $(ConnectedServiceName) - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) testEnvironmentUrl: $(TestApplicationResource) imageTag: $(ImageTag) reindexEnabled: true @@ -355,10 +386,11 @@ stages: version: R5 sql: true webAppName: $(DeploymentEnvironmentNameR5Sql) + keyVaultName: $(KeyVaultNameR5Sql) appServicePlanName: '$(appServicePlanName)-sql' appServicePlanResourceGroup: $(appServicePlanResourceGroup) subscription: $(ConnectedServiceName) - resourceGroup: $(ResourceGroupName) + resourceGroup: $(UniqueResourceGroupName) testEnvironmentUrl: $(TestApplicationResource) imageTag: $(ImageTag) schemaAutomaticUpdatesEnabled: 'auto' @@ -377,7 +409,7 @@ stages: - template: ./jobs/run-tests.yml parameters: version: Stu3 - keyVaultName: $(DeploymentEnvironmentName) + keyVaultName: $(KeyVaultBaseName) appServiceName: $(DeploymentEnvironmentName) integrationSqlServerName: $(DeploymentEnvironmentName)inttest @@ -392,7 +424,7 @@ stages: - template: ./jobs/run-tests.yml parameters: version: R4 - keyVaultName: $(DeploymentEnvironmentNameR4) + keyVaultName: $(KeyVaultNameR4) appServiceName: $(DeploymentEnvironmentNameR4) integrationSqlServerName: $(DeploymentEnvironmentName)inttest @@ -407,7 +439,7 @@ stages: - template: ./jobs/run-tests.yml parameters: version: R4B - keyVaultName: $(DeploymentEnvironmentNameR4B) + keyVaultName: $(KeyVaultNameR4B) appServiceName: $(DeploymentEnvironmentNameR4B) integrationSqlServerName: $(DeploymentEnvironmentName)inttest @@ -422,7 +454,7 @@ stages: - template: ./jobs/run-tests.yml parameters: version: R5 - keyVaultName: $(DeploymentEnvironmentNameR5) + keyVaultName: $(KeyVaultNameR5) appServiceName: $(DeploymentEnvironmentNameR5) integrationSqlServerName: $(DeploymentEnvironmentName)inttest diff --git a/build/pr-variables.yml b/build/pr-variables.yml index 8507e12d31..f2c2ac69b1 100644 --- a/build/pr-variables.yml +++ b/build/pr-variables.yml @@ -3,7 +3,12 @@ variables: resourceGroupRoot: 'msh-fhir-pr' appServicePlanName: '$(resourceGroupRoot)-$(prName)-asp' ResourceGroupName: '$(resourceGroupRoot)-$(prName)' - DeploymentEnvironmentName: 'f$(build.BuildNumber)' + # Unique resource group name with Build.BuildId suffix to avoid waiting for deletion of existing RG + UniqueResourceGroupName: '$(resourceGroupRoot)-$(prName)-$(Build.BuildId)' + # Base name for Key Vaults (kept short due to 24 char limit) + KeyVaultBaseName: 'f$(build.BuildNumber)' + # Deployment environment name includes BuildId for uniqueness across builds + DeploymentEnvironmentName: 'f$(build.BuildNumber)-$(Build.BuildId)' TestEnvironmentName: 'OSS PR$(prName)' ImageTag: '$(build.BuildNumber)' diff --git a/build/tasks/delete-keyvaults.yml b/build/tasks/delete-keyvaults.yml new file mode 100644 index 0000000000..95f08d75e5 --- /dev/null +++ b/build/tasks/delete-keyvaults.yml @@ -0,0 +1,97 @@ +# Task template to delete Key Vaults from old resource groups and purge them +# This prevents soft-delete conflicts when deploying new vaults with the same name + +steps: +- task: AzurePowerShell@5 + displayName: 'Delete and Purge Key Vaults' + inputs: + azureSubscription: $(ConnectedServiceName) + azurePowerShellVersion: latestVersion + ScriptType: inlineScript + Inline: | + $keyVaultBaseName = "$(KeyVaultBaseName)" + $currentRgName = "$(UniqueResourceGroupName)" + $region = "$(ResourceGroupRegion)" + $pattern = "$keyVaultBaseName*" + + Write-Host "Looking for Key Vaults matching pattern: $pattern in region: $region" + Write-Host "Current resource group (excluded): $currentRgName" + + # Get all key vaults matching the pattern that are NOT in the current RG + $vaults = Get-AzKeyVault | Where-Object { + $_.VaultName -like $pattern -and + $_.Location -eq $region -and + $_.ResourceGroupName -ne $currentRgName + } + + if ($null -eq $vaults -or $vaults.Count -eq 0) { + Write-Host "No Key Vaults found to delete" + } else { + Write-Host "Found $($vaults.Count) Key Vault(s) to delete" + + # Filter out vaults with resource locks + $vaultsToDelete = $vaults | Where-Object { + $locks = Get-AzResourceLock -ResourceGroupName $_.ResourceGroupName -ResourceName $_.VaultName -ResourceType "Microsoft.KeyVault/vaults" -ErrorAction SilentlyContinue + if ($locks) { + Write-Warning "Vault '$($_.VaultName)' has resource locks - skipping" + return $false + } + return $true + } + + if ($vaultsToDelete.Count -eq 0) { + Write-Host "No vaults eligible for deletion after lock check" + } else { + Write-Host "Deleting $($vaultsToDelete.Count) vault(s) in parallel..." + + # Step 1: Delete vaults (moves them to soft-deleted state) + $deleteResults = $vaultsToDelete | ForEach-Object -Parallel { + $vaultName = $_.VaultName + $sourceRg = $_.ResourceGroupName + try { + Remove-AzKeyVault -VaultName $vaultName -ResourceGroupName $sourceRg -Force -ErrorAction Stop + [PSCustomObject]@{ Vault = $vaultName; Location = $_.Location; Status = "Deleted" } + } + catch { + [PSCustomObject]@{ Vault = $vaultName; Location = $_.Location; Status = "DeleteFailed: $($_.Exception.Message)" } + } + } -ThrottleLimit 10 + + $deleteResults | ForEach-Object { + if ($_.Status -eq "Deleted") { + Write-Host "Deleted vault: $($_.Vault)" + } else { + Write-Warning "$($_.Vault): $($_.Status)" + } + } + + # Step 2: Purge the deleted vaults + $deletedVaults = $deleteResults | Where-Object { $_.Status -eq "Deleted" } + if ($deletedVaults.Count -gt 0) { + Write-Host "Purging $($deletedVaults.Count) soft-deleted vault(s)..." + + $purgeResults = $deletedVaults | ForEach-Object -Parallel { + $vaultName = $_.Vault + $location = $_.Location + try { + Remove-AzKeyVault -VaultName $vaultName -Location $location -InRemovedState -Force -ErrorAction Stop + [PSCustomObject]@{ Vault = $vaultName; Status = "Purged" } + } + catch { + [PSCustomObject]@{ Vault = $vaultName; Status = "PurgeFailed: $($_.Exception.Message)" } + } + } -ThrottleLimit 10 + + $purgeResults | ForEach-Object { + if ($_.Status -eq "Purged") { + Write-Host "Purged vault: $($_.Vault)" + } else { + Write-Warning "$($_.Vault): $($_.Status)" + } + } + + $purgedCount = ($purgeResults | Where-Object { $_.Status -eq "Purged" }).Count + Write-Host "Completed: $purgedCount of $($deletedVaults.Count) vaults purged" + } + } + } diff --git a/build/tasks/delete-resource-groups.yml b/build/tasks/delete-resource-groups.yml new file mode 100644 index 0000000000..91f6fe581a --- /dev/null +++ b/build/tasks/delete-resource-groups.yml @@ -0,0 +1,64 @@ +# Task template to delete old resource groups for a PR +# Runs after Key Vaults have been moved out to prevent soft-delete conflicts + +steps: +- task: AzurePowerShell@5 + displayName: 'Delete Old Resource Groups' + inputs: + azureSubscription: $(ConnectedServiceName) + azurePowerShellVersion: latestVersion + ScriptType: inlineScript + Inline: | + $prNumber = "$(prName)" + $resourceGroupRoot = "$(resourceGroupRoot)" + $currentRgName = "$(UniqueResourceGroupName)" + $pattern = "$resourceGroupRoot-$prNumber*" + + Write-Host "Looking for old resource groups matching pattern: $pattern" + Write-Host "Excluding current RG: $currentRgName" + + $oldRGs = Get-AzResourceGroup | Where-Object { + $_.ResourceGroupName -like $pattern -and + $_.ResourceGroupName -ne $currentRgName + } + + if ($oldRGs.Count -eq 0) { + Write-Host "No old resource groups found to cleanup" + } else { + Write-Host "Found $($oldRGs.Count) old resource group(s) to cleanup" + + # Delete all RGs in parallel with retry logic + $results = $oldRGs | ForEach-Object -Parallel { + $rgName = $_.ResourceGroupName + $maxRetries = 3 + $retryCount = 0 + $deleted = $false + + while (-not $deleted -and $retryCount -lt $maxRetries) { + try { + Remove-AzResourceGroup -Name $rgName -Force -ErrorAction Stop + $deleted = $true + [PSCustomObject]@{ RG = $rgName; Status = "Success" } + } + catch { + $retryCount++ + if ($retryCount -lt $maxRetries) { + Start-Sleep -Seconds 30 + } else { + [PSCustomObject]@{ RG = $rgName; Status = "Failed: $($_.Exception.Message)" } + } + } + } + } -ThrottleLimit 10 + + $results | ForEach-Object { + if ($_.Status -eq "Success") { + Write-Host "Successfully removed resource group: $($_.RG)" + } else { + Write-Warning "Failed to remove resource group $($_.RG). $($_.Status)" + } + } + + $successCount = ($results | Where-Object { $_.Status -eq "Success" }).Count + Write-Host "Completed removal: $successCount of $($oldRGs.Count) resource group(s)" + } diff --git a/build/tasks/e2e-set-variables.yml b/build/tasks/e2e-set-variables.yml new file mode 100644 index 0000000000..35d15f646d --- /dev/null +++ b/build/tasks/e2e-set-variables.yml @@ -0,0 +1,77 @@ +parameters: +- name: appServiceName + type: string + +steps: +- task: AzurePowerShell@5 + displayName: 'Set Variables' + inputs: + azureSubscription: $(ConnectedServiceName) + azurePowerShellVersion: LatestVersion + ScriptType: InlineScript + Inline: | + $keyVault = "$(KeyVaultBaseName)-ts" + $secrets = Get-AzKeyVaultSecret -VaultName $keyVault + + foreach($secret in $secrets) + { + $environmentVariableName = $secret.Name.Replace("--","_") + + $secretValue = Get-AzKeyVaultSecret -VaultName $keyVault -Name $secret.Name + # Replace with -AsPlainText flag when v5.3 of the Az Module is supported + $plainValue = ([System.Net.NetworkCredential]::new("", $secretValue.SecretValue).Password).ToString() + if([string]::IsNullOrEmpty($plainValue)) + { + throw "$($secret.Name) is empty" + } + Write-Host "##vso[task.setvariable variable=$($environmentVariableName)]$($plainValue)" + } + + $appServiceName = "${{ parameters.appServiceName }}" + $appSettings = (Get-AzWebApp -ResourceGroupName $(UniqueResourceGroupName) -Name $appServiceName).SiteConfig.AppSettings + $acrSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__ConvertData__ContainerRegistryServers__0"} + $acrLoginServer = $acrSettings[0].Value + $acrAccountName = ($acrLoginServer -split '\.')[0] + + ## This needs to be moved to MI, WI #125246 + # $acrPassword = (Get-AzContainerRegistryCredential -ResourceGroupName $(ResourceGroupName) -Name $acrAccountName).Password + # Write-Host "##vso[task.setvariable variable=TestContainerRegistryServer]$($acrLoginServer)" + # Write-Host "##vso[task.setvariable variable=TestContainerRegistryPassword]$($acrPassword)" + + $exportStoreSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__Export__StorageAccountUri"} + $exportStoreUri = $exportStoreSettings[0].Value + Write-Host "$exportStoreUri" + $exportStoreAccountName = [System.Uri]::new("$exportStoreUri").Host.Split('.')[0] + $exportStoreKey = Get-AzStorageAccountKey -ResourceGroupName $(UniqueResourceGroupName) -Name "$exportStoreAccountName" | Where-Object {$_.KeyName -eq "key1"} + + Write-Host "##vso[task.setvariable variable=TestExportStoreUri]$($exportStoreUri)" + + $integrationStoreSettings = $appSettings | where {$_.Name -eq "FhirServer__Operations__IntegrationDataStore__StorageAccountUri"} + $integrationStoreUri = $integrationStoreSettings[0].Value + Write-Host "$integrationStoreUri" + $integrationStoreAccountName = [System.Uri]::new("$integrationStoreUri").Host.Split('.')[0] + Write-Host "##vso[task.setvariable variable=TestIntegrationStoreUri]$($integrationStoreUri)" + + Write-Host "##vso[task.setvariable variable=Resource]$(TestApplicationResource)" + + $secrets = Get-AzKeyVaultSecret -VaultName resolute-oss-tenant-info + foreach($secret in $secrets) + { + $environmentVariableName = $secret.Name.Replace("--","_") + + $secretValue = Get-AzKeyVaultSecret -VaultName resolute-oss-tenant-info -Name $secret.Name + # Replace with -AsPlainText flag when v5.3 of the Az Module is supported + $plainValue = ([System.Net.NetworkCredential]::new("", $secretValue.SecretValue).Password).ToString() + if([string]::IsNullOrEmpty($plainValue)) + { + throw "$($secret.Name) is empty" + } + Write-Host "##vso[task.setvariable variable=$($environmentVariableName)]$($plainValue)" + } + # ---------------------------------------- + + dotnet dev-certs https + + Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_CLIENT_ID]$env:AZURESUBSCRIPTION_CLIENT_ID" + Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_TENANT_ID]$env:AZURESUBSCRIPTION_TENANT_ID" + Write-Host "##vso[task.setvariable variable=AZURESUBSCRIPTION_SERVICE_CONNECTION_ID]$env:AZURESUBSCRIPTION_SERVICE_CONNECTION_ID" diff --git a/samples/templates/default-azuredeploy-docker.json b/samples/templates/default-azuredeploy-docker.json index 4187e4b81b..1238646dd7 100644 --- a/samples/templates/default-azuredeploy-docker.json +++ b/samples/templates/default-azuredeploy-docker.json @@ -5,11 +5,18 @@ "serviceName": { "type": "string", "minLength": 3, - "maxLength": 24, "metadata": { "description": "Name of the FHIR service Web App." } }, + "keyVaultName": { + "type": "string", + "defaultValue": "", + "maxLength": 24, + "metadata": { + "description": "Name of the Key Vault. If empty, serviceName is used. Must be 24 characters or less." + } + }, "appServicePlanResourceGroup": { "type": "string", "defaultValue": "", @@ -258,7 +265,8 @@ "variables": { "isMAG": "[or(contains(resourceGroup().location,'usgov'),contains(resourceGroup().location,'usdod'))]", "serviceName": "[toLower(parameters('serviceName'))]", - "keyvaultEndpoint": "[if(variables('isMAG'), concat('https://', variables('serviceName'), '.vault.usgovcloudapi.net/'), concat('https://', variables('serviceName'), '.vault.azure.net/'))]", + "keyVaultName": "[if(empty(parameters('keyVaultName')), variables('serviceName'), toLower(parameters('keyVaultName')))]", + "keyvaultEndpoint": "[if(variables('isMAG'), concat('https://', variables('keyVaultName'), '.vault.usgovcloudapi.net/'), concat('https://', variables('keyVaultName'), '.vault.azure.net/'))]", "appServicePlanResourceGroup": "[if(empty(parameters('appServicePlanResourceGroup')), resourceGroup().name, parameters('appServicePlanResourceGroup'))]", "appServicePlanName": "[if(empty(parameters('appServicePlanName')),concat(variables('serviceName'),'-asp'),parameters('appServicePlanName'))]", "appServiceResourceId": "[resourceId('Microsoft.Web/sites', variables('serviceName'))]", @@ -310,7 +318,7 @@ "sqlSkuFamily": "[if(variables('isSqlHyperscaleTier'), 'Gen5', '')]", "sqlSkuName": "[if(variables('isSqlHyperscaleTier'), 'HS_Gen5', 'Standard')]", "sqlSkuTier": "[if(variables('isSqlHyperscaleTier'), 'Hyperscale', 'Standard')]", - "keyVaultRoleName": "[concat('Key Vault Secrets Officer-', variables('serviceName'))]", + "keyVaultRoleName": "[concat('Key Vault Secrets Officer-', variables('keyVaultName'))]", "imageRepositoryName": "[if(contains(parameters('registryName'),'mcr.'), concat(toLower(parameters('fhirVersion')), '-fhir-server'), concat(toLower(parameters('fhirVersion')), '_fhir-server'))]", "networkSecurityPerimeterName": "[parameters('networkSecurityPerimeterName')]" }, @@ -386,7 +394,7 @@ "dependsOn": [ "[resourceId('Microsoft.Web/serverfarms', variables('appServicePlanName'))]", "[resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', variables('managedIdentityName'))]", - "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('serviceName')))]" + "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName')))]" ], "resources": [ { @@ -395,8 +403,8 @@ "type": "config", "dependsOn": [ "[variables('appServiceResourceId')]", - "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('serviceName')))]", - "[if(equals(parameters('solutionType'),'FhirServerCosmosDB'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('serviceName'), 'CosmosDb--Host'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('serviceName'), 'SqlServer--ConnectionString'))]" + "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName')))]", + "[if(equals(parameters('solutionType'),'FhirServerCosmosDB'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('keyVaultName'), 'CosmosDb--Host'), resourceId('Microsoft.KeyVault/vaults/secrets', variables('keyVaultName'), 'SqlServer--ConnectionString'))]" ], "properties": "[union(variables('combinedFhirServerConfigProperties'), json(concat('{ \"FhirServer__ResourceManager__DataStoreResourceId\": \"', if(equals(parameters('solutionType'),'FhirServerCosmosDB'), resourceId('Microsoft.DocumentDb/databaseAccounts', variables('serviceName')), resourceId('Microsoft.Sql/servers/', variables('sqlServerDerivedName'))), '\", ', if(variables('deployAppInsights'), concat('\"Telemetry__Provider\": \"', parameters('telemetryProviderType'), '\",', '\"Telemetry__InstrumentationKey\": \"', reference(resourceId('Microsoft.Insights/components', variables('appInsightsName'))).InstrumentationKey, '\",', '\"Telemetry__ConnectionString\": \"', reference(resourceId('Microsoft.Insights/components', variables('appInsightsName'))).ConnectionString, '\"'), ''), '}')))]" }, @@ -407,7 +415,7 @@ "dependsOn": [ "appsettings", "[variables('appServiceResourceId')]", - "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('serviceName')))]" + "[if(variables('deployAppInsights'),concat('Microsoft.Insights/components/', variables('appInsightsName')),resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName')))]" ], "properties": { "linuxFxVersion": "[concat('DOCKER|', parameters('registryName'), '/', variables('imageRepositoryName'),':', parameters('imageTag'))]", @@ -641,7 +649,7 @@ }, { "type": "Microsoft.KeyVault/vaults", - "name": "[variables('serviceName')]", + "name": "[variables('keyVaultName')]", "apiVersion": "2022-07-01", "location": "[resourceGroup().location]", "tags": { @@ -676,9 +684,9 @@ { "type": "Microsoft.KeyVault/vaults/providers/roleAssignments", "apiVersion": "2022-04-01", - "name": "[concat(variables('serviceName'), '/Microsoft.Authorization/', guid(uniqueString(variables('keyVaultRoleName'), parameters('fhirVersion'), variables('serviceName'))))]", + "name": "[concat(variables('keyVaultName'), '/Microsoft.Authorization/', guid(uniqueString(variables('keyVaultRoleName'), parameters('fhirVersion'), variables('keyVaultName'))))]", "dependsOn": [ - "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]" + "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]" ], "properties": { "roleDefinitionId": "[concat(subscription().Id, '/providers/Microsoft.Authorization/roleDefinitions/', 'b86a8fe4-44ce-4948-aee5-eccb2c155cd7')]", // Role definition ID for "Key Vault Secrets Officer" @@ -688,9 +696,9 @@ { "type": "Microsoft.KeyVault/vaults/providers/roleAssignments", "apiVersion": "2022-04-01", - "name": "[concat(variables('serviceName'), '/Microsoft.Authorization/', guid(uniqueString('Reader', parameters('fhirVersion'), variables('serviceName'))))]", + "name": "[concat(variables('keyVaultName'), '/Microsoft.Authorization/', guid(uniqueString('Reader', parameters('fhirVersion'), variables('keyVaultName'))))]", "dependsOn": [ - "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]" + "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]" ], "properties": { "roleDefinitionId": "[concat(subscription().Id, '/providers/Microsoft.Authorization/roleDefinitions/', 'b86a8fe4-44ce-4948-aee5-eccb2c155cd7')]", // Role definition ID for "Key Vault Secrets Officer" @@ -700,28 +708,28 @@ { "condition": "[equals(parameters('solutionType'),'FhirServerCosmosDB')]", "type": "Microsoft.KeyVault/vaults/secrets", - "name": "[concat(variables('serviceName'), '/CosmosDb--Host')]", + "name": "[concat(variables('keyVaultName'), '/CosmosDb--Host')]", "apiVersion": "2015-06-01", "properties": { "contentType": "text/plain", "value": "[if(equals(parameters('solutionType'),'FhirServerCosmosDB'), reference(concat('Microsoft.DocumentDb/databaseAccounts/', variables('serviceName'))).documentEndpoint, '')]" }, "dependsOn": [ - "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]", + "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]", "[resourceId('Microsoft.DocumentDb/databaseAccounts', variables('serviceName'))]" ] }, { "condition": "[equals(parameters('solutionType'),'FhirServerSqlServer')]", "type": "Microsoft.KeyVault/vaults/secrets", - "name": "[concat(variables('serviceName'), '/SqlServer--ConnectionString')]", + "name": "[concat(variables('keyVaultName'), '/SqlServer--ConnectionString')]", "apiVersion": "2015-06-01", "properties": { "contentType": "text/plain", "value": "[concat('Server=tcp:', if(equals(parameters('solutionType'),'FhirServerSqlServer'), reference(variables('computedSqlServerReference'), '2015-05-01-preview').fullyQualifiedDomainName, ''),',1433;Initial Catalog=',variables('sqlDatabaseName'),';Persist Security Info=False;Authentication=ActiveDirectoryMSI;MultipleActiveResultSets=False;Encrypt=True;TrustServerCertificate=False;Connection Timeout=30;User Id=', reference(resourceId('Microsoft.ManagedIdentity/userAssignedIdentities', variables('managedIdentityName')), '2018-11-30').clientId, ';')]" }, "dependsOn": [ - "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]", + "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]", "[resourceId('Microsoft.Sql/servers', variables('sqlServerDerivedName'))]" ] }, @@ -817,15 +825,15 @@ { "type": "Microsoft.Network/networkSecurityPerimeters/resourceAssociations", "apiVersion": "2023-07-01-preview", - "name": "[concat(variables('networkSecurityPerimeterName'), '/keyvault-', uniqueString(variables('serviceName')))]", + "name": "[concat(variables('networkSecurityPerimeterName'), '/keyvault-', uniqueString(variables('keyVaultName')))]", "dependsOn": [ - "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]", + "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]", "[resourceId('Microsoft.Network/networkSecurityPerimeters', variables('networkSecurityPerimeterName'))]", "[resourceId('Microsoft.Network/networkSecurityPerimeters/profiles', variables('networkSecurityPerimeterName'), parameters('networkSecurityPerimeterProfileName'))]" ], "properties": { "privateLinkResource": { - "id": "[resourceId('Microsoft.KeyVault/vaults', variables('serviceName'))]" + "id": "[resourceId('Microsoft.KeyVault/vaults', variables('keyVaultName'))]" }, "profile": { "id": "[resourceId('Microsoft.Network/networkSecurityPerimeters/profiles', variables('networkSecurityPerimeterName'), parameters('networkSecurityPerimeterProfileName'))]" diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryFactAttribute.cs b/src/Microsoft.Health.Extensions.Xunit/RetryFactAttribute.cs new file mode 100644 index 0000000000..60f6e32b2d --- /dev/null +++ b/src/Microsoft.Health.Extensions.Xunit/RetryFactAttribute.cs @@ -0,0 +1,37 @@ +// ------------------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. +// ------------------------------------------------------------------------------------------------- + +using System; +using Xunit; +using Xunit.Sdk; + +namespace Microsoft.Health.Extensions.Xunit +{ + /// + /// Attribute that marks a test method to be retried a specified number of times if it fails. + /// Useful for handling transient failures in integration and end-to-end tests. + /// + [XunitTestCaseDiscoverer("Microsoft.Health.Extensions.Xunit.RetryFactDiscoverer", "Microsoft.Health.Extensions.Xunit")] + [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)] + public sealed class RetryFactAttribute : FactAttribute + { + /// + /// Gets or sets the maximum number of retry attempts (default is 3). + /// + public int MaxRetries { get; set; } = 3; + + /// + /// Gets or sets the delay in milliseconds between retry attempts (default is 5000ms). + /// + public int DelayBetweenRetriesMs { get; set; } = 5000; + + /// + /// Gets or sets whether to retry on assertion failures (XunitException). + /// Default is false - assertion failures usually indicate test bugs, not transient issues. + /// Set to true for tests that validate eventually-consistent systems (e.g., cache refresh, reindex operations). + /// + public bool RetryOnAssertionFailure { get; set; } = false; + } +} diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryFactDiscoverer.cs b/src/Microsoft.Health.Extensions.Xunit/RetryFactDiscoverer.cs new file mode 100644 index 0000000000..f45ab9690f --- /dev/null +++ b/src/Microsoft.Health.Extensions.Xunit/RetryFactDiscoverer.cs @@ -0,0 +1,54 @@ +// ------------------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. +// ------------------------------------------------------------------------------------------------- + +using System.Collections.Generic; +using Xunit.Abstractions; +using Xunit.Sdk; + +namespace Microsoft.Health.Extensions.Xunit +{ + /// + /// Test case discoverer for . + /// + public class RetryFactDiscoverer : IXunitTestCaseDiscoverer + { + private readonly IMessageSink _diagnosticMessageSink; + + public RetryFactDiscoverer(IMessageSink diagnosticMessageSink) + { + _diagnosticMessageSink = diagnosticMessageSink; + } + + public IEnumerable Discover( + ITestFrameworkDiscoveryOptions discoveryOptions, + ITestMethod testMethod, + IAttributeInfo factAttribute) + { + var maxRetries = factAttribute.GetNamedArgument(nameof(RetryFactAttribute.MaxRetries)); + var delayMs = factAttribute.GetNamedArgument(nameof(RetryFactAttribute.DelayBetweenRetriesMs)); + var retryOnAssertionFailure = factAttribute.GetNamedArgument(nameof(RetryFactAttribute.RetryOnAssertionFailure)); + + // Use default values if not specified + if (maxRetries == 0) + { + maxRetries = 3; + } + + if (delayMs == 0) + { + delayMs = 5000; + } + + yield return new RetryTestCase( + _diagnosticMessageSink, + discoveryOptions.MethodDisplayOrDefault(), + discoveryOptions.MethodDisplayOptionsOrDefault(), + testMethod, + maxRetries, + delayMs, + retryOnAssertionFailure); + } + } +} diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryTestCase.cs b/src/Microsoft.Health.Extensions.Xunit/RetryTestCase.cs new file mode 100644 index 0000000000..1afc54c07f --- /dev/null +++ b/src/Microsoft.Health.Extensions.Xunit/RetryTestCase.cs @@ -0,0 +1,316 @@ +// ------------------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. +// ------------------------------------------------------------------------------------------------- + +using System; +using System.ComponentModel; +using System.Diagnostics; +using System.Threading; +using System.Threading.Tasks; +using Xunit.Abstractions; +using Xunit.Sdk; + +namespace Microsoft.Health.Extensions.Xunit +{ + /// + /// Test case that implements retry logic. + /// + public class RetryTestCase : XunitTestCase + { + private int _maxRetries; + private int _delayMs; + private bool _retryOnAssertionFailure; + + [EditorBrowsable(EditorBrowsableState.Never)] + [Obsolete("Called by the de-serializer; should only be called by deriving classes for de-serialization purposes")] + public RetryTestCase() + { + } + + public RetryTestCase( + IMessageSink diagnosticMessageSink, + TestMethodDisplay defaultMethodDisplay, + TestMethodDisplayOptions defaultMethodDisplayOptions, + ITestMethod testMethod, + int maxRetries, + int delayMs, + bool retryOnAssertionFailure = false, + object[] testMethodArguments = null) + : base(diagnosticMessageSink, defaultMethodDisplay, defaultMethodDisplayOptions, testMethod, testMethodArguments) + { + _maxRetries = maxRetries; + _delayMs = delayMs; + _retryOnAssertionFailure = retryOnAssertionFailure; + } + + public override async Task RunAsync( + IMessageSink diagnosticMessageSink, + IMessageBus messageBus, + object[] constructorArguments, + ExceptionAggregator aggregator, + CancellationTokenSource cancellationTokenSource) + { + // Use System.Diagnostics.Trace for ADO visibility + Trace.WriteLine($"##vso[task.logdetail]RetryFact starting test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' with MaxRetries={_maxRetries}, DelayMs={_delayMs}, RetryOnAssertionFailure={_retryOnAssertionFailure}"); + + var runSummary = new RunSummary { Total = 1 }; + Exception lastException = null; + + for (int attempt = 1; attempt <= _maxRetries; attempt++) + { + var isLastAttempt = attempt == _maxRetries; + + Trace.WriteLine($"##vso[task.logdetail]RetryFact attempt {attempt}/{_maxRetries} for test '{TestMethod.Method.Name}'"); + + // Create a fresh aggregator for each attempt + var attemptAggregator = new ExceptionAggregator(); + + // Only intercept failure messages on non-final attempts + // On the final attempt, let everything go through (both success and failure) + IMessageBus busToUse; + FailureInterceptingMessageBus interceptingBus = null; + + if (isLastAttempt) + { + busToUse = messageBus; + } + else + { + interceptingBus = new FailureInterceptingMessageBus(messageBus); + busToUse = interceptingBus; + } + + try + { + var summary = await base.RunAsync( + diagnosticMessageSink, + busToUse, + constructorArguments, + attemptAggregator, + cancellationTokenSource); + + runSummary.Time = summary.Time; + + if (summary.Failed == 0) + { + // Test passed - success message already went through to Test Explorer + Trace.WriteLine($"##vso[task.logdetail]RetryFact test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' passed on attempt {attempt}/{_maxRetries}"); + diagnosticMessageSink.OnMessage( + new DiagnosticMessage($"[RetryFact] Test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' passed on attempt {attempt}/{_maxRetries}")); + + runSummary.Failed = 0; + return runSummary; + } + + // Test failed on this attempt + lastException = attemptAggregator.ToException(); + + // If no exception was captured but test failed, create an exception using captured failure details + if (lastException == null && summary.Failed > 0) + { + string failureMsg = interceptingBus?.LastFailureMessage ?? "Test failed but no exception was captured."; + string stackTrace = interceptingBus?.LastFailureStackTrace; + bool isAssertionFailure = interceptingBus?.IsAssertionFailure ?? false; + + string fullMessage = failureMsg + + (stackTrace != null ? Environment.NewLine + "Stack Trace:" + Environment.NewLine + stackTrace : string.Empty); + + // If this is an assertion failure (based on exception types), create an XunitException + // so that RetryOnAssertionFailure logic works correctly + if (isAssertionFailure) + { + lastException = new XunitException(fullMessage); + } + else + { + lastException = new InvalidOperationException(fullMessage); + } + + Trace.WriteLine($"##vso[task.logdetail]RetryFact: Test failed but exception is null, created placeholder exception (IsAssertion={isAssertionFailure})"); + } + + Trace.WriteLine($"##vso[task.logdetail]RetryFact test failed on attempt {attempt} with exception type: {lastException?.GetType().FullName ?? "null"}, Message: {lastException?.Message ?? "null"}"); + + if (!isLastAttempt) + { + // Check if we should retry this exception (now handles null) + var shouldRetry = ShouldRetry(lastException); + Trace.WriteLine($"##vso[task.logdetail]RetryFact ShouldRetry={shouldRetry} for exception type {lastException?.GetType().FullName ?? "null"}"); + + if (!shouldRetry) + { + Trace.WriteLine($"##vso[task.logissue type=warning]Test '{TestMethod.Method.Name}' failed with non-retriable exception. Skipping retries."); + diagnosticMessageSink.OnMessage( + new DiagnosticMessage($"[RetryFact] Test '{TestMethod.Method.Name}' failed with non-retriable exception. Skipping retries.")); + + if (lastException != null) + { + aggregator.Add(lastException); + } + + runSummary.Failed = 1; + return runSummary; + } + + // Not the last attempt - the failure was intercepted, so retry + Trace.WriteLine($"##vso[task.logissue type=warning]Test '{TestMethod.Method.Name}' failed on attempt {attempt}/{_maxRetries}, will retry after {_delayMs}ms"); + diagnosticMessageSink.OnMessage( + new DiagnosticMessage($"[RetryFact] Test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' failed on attempt {attempt}/{_maxRetries}. Retrying after {_delayMs}ms delay. Error: {lastException?.Message ?? "No exception message"}")); + + await Task.Delay(_delayMs); + } + else + { + // Last attempt - failure message already went through to Test Explorer + Trace.WriteLine($"##vso[task.logissue type=error]Test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' failed after {_maxRetries} attempts. Final error: {lastException?.Message ?? "No exception captured"}"); + diagnosticMessageSink.OnMessage( + new DiagnosticMessage($"[RetryFact] Test '{TestMethod.TestClass.Class.Name}.{TestMethod.Method.Name}' failed after {_maxRetries} attempts. Last exception: {lastException?.Message ?? "No exception message"}")); + + if (lastException != null) + { + aggregator.Add(lastException); + } + else if (summary.Failed > 0) + { + // Add an exception with captured failure details if test failed but no exception was captured + aggregator.Add(new InvalidOperationException($"Test failed after {_maxRetries} attempts but no exception was captured")); + } + + runSummary.Failed = 1; + return runSummary; + } + } + catch (Exception ex) + { + Trace.WriteLine($"##vso[task.logissue type=error]RetryFact unexpected exception: {ex.GetType().FullName}: {ex.Message}"); + throw; + } + finally + { + // Dispose the intercepting bus if we created one + interceptingBus?.Dispose(); + } + } + + // Should never reach here + Trace.WriteLine($"##vso[task.logissue type=error]RetryFact WARNING: Reached end of retry loop unexpectedly"); + runSummary.Failed = 1; + return runSummary; + } + + public override void Serialize(IXunitSerializationInfo data) + { + base.Serialize(data); + data.AddValue(nameof(_maxRetries), _maxRetries); + data.AddValue(nameof(_delayMs), _delayMs); + data.AddValue(nameof(_retryOnAssertionFailure), _retryOnAssertionFailure); + } + + public override void Deserialize(IXunitSerializationInfo data) + { + base.Deserialize(data); + _maxRetries = data.GetValue(nameof(_maxRetries)); + _delayMs = data.GetValue(nameof(_delayMs)); + _retryOnAssertionFailure = data.GetValue(nameof(_retryOnAssertionFailure)); + } + + /// + /// Determines if an exception should trigger a retry. + /// + private bool ShouldRetry(Exception ex) + { + // If exception is null, we should retry (something went wrong with exception capture) + if (ex == null) + { + Trace.WriteLine($"##vso[task.logdetail]RetryFact: Exception is null, will retry"); + return true; // Retry when we can't determine the exception type + } + + // Unwrap aggregate exceptions + while (ex is AggregateException aggEx && aggEx.InnerExceptions.Count == 1) + { + ex = aggEx.InnerException; + } + + // Don't retry assertion failures unless explicitly configured + if (ex is XunitException) + { + if (!_retryOnAssertionFailure) + { + Trace.WriteLine($"##vso[task.logdetail]RetryFact: Not retrying XunitException because _retryOnAssertionFailure is false"); + return false; + } + else + { + Trace.WriteLine($"##vso[task.logdetail]RetryFact: Retrying XunitException because _retryOnAssertionFailure is true"); + return true; + } + } + + // Retry everything else (network, timeout, SQL transient, etc.) + Trace.WriteLine($"##vso[task.logdetail]RetryFact: Retrying non-assertion exception of type {ex.GetType().FullName}"); + return true; + } + + /// + /// Message bus that intercepts ONLY failure messages (ITestFailed). + /// Used on non-final retry attempts to suppress intermediate failures. + /// Success messages and all other messages always pass through. + /// Also captures failure details (messages and stack traces) for diagnostic purposes. + /// + private class FailureInterceptingMessageBus : IMessageBus + { + private readonly IMessageBus _innerBus; + + public FailureInterceptingMessageBus(IMessageBus innerBus) + { + _innerBus = innerBus; + } + + public string LastFailureMessage { get; private set; } + + public string LastFailureStackTrace { get; private set; } + + public bool IsAssertionFailure { get; private set; } + + public bool QueueMessage(IMessageSinkMessage message) + { + // Intercept ONLY failure messages - suppress them for non-final attempts + if (message is ITestFailed failed) + { + // Capture failure details for diagnostics + LastFailureMessage = failed.Messages != null && failed.Messages.Length > 0 + ? string.Join(Environment.NewLine, failed.Messages) + : failed.ExceptionTypes != null && failed.ExceptionTypes.Length > 0 + ? string.Join(", ", failed.ExceptionTypes) + : "Unknown failure"; + + LastFailureStackTrace = failed.StackTraces != null && failed.StackTraces.Length > 0 + ? string.Join(Environment.NewLine, failed.StackTraces) + : null; + + // Detect if this is an assertion failure by checking exception types + // XUnit assertion exceptions typically have types containing "Xunit" or "Assert" + IsAssertionFailure = failed.ExceptionTypes != null && + failed.ExceptionTypes.Length > 0 && + (failed.ExceptionTypes[0].Contains("Xunit", StringComparison.Ordinal) || + failed.ExceptionTypes[0].Contains("Assert", StringComparison.Ordinal) || + failed.ExceptionTypes[0].Contains("EqualException", StringComparison.Ordinal) || + failed.ExceptionTypes[0].Contains("TrueException", StringComparison.Ordinal) || + failed.ExceptionTypes[0].Contains("FalseException", StringComparison.Ordinal)); + + return true; // Swallow the failure - we're going to retry + } + + // All other messages (ITestPassed, ITestStarting, ITestFinished, etc.) pass through + return _innerBus.QueueMessage(message); + } + + public void Dispose() + { + // Don't dispose the inner bus - it's owned by the caller + } + } + } +} diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryTheoryAttribute.cs b/src/Microsoft.Health.Extensions.Xunit/RetryTheoryAttribute.cs new file mode 100644 index 0000000000..88a65b6b54 --- /dev/null +++ b/src/Microsoft.Health.Extensions.Xunit/RetryTheoryAttribute.cs @@ -0,0 +1,37 @@ +// ------------------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. +// ------------------------------------------------------------------------------------------------- + +using System; +using Xunit; +using Xunit.Sdk; + +namespace Microsoft.Health.Extensions.Xunit +{ + /// + /// Attribute that marks a theory method to be retried a specified number of times if it fails. + /// Useful for handling transient failures in integration and end-to-end tests with parameterized data. + /// + [XunitTestCaseDiscoverer("Microsoft.Health.Extensions.Xunit.RetryTheoryDiscoverer", "Microsoft.Health.Extensions.Xunit")] + [AttributeUsage(AttributeTargets.Method, AllowMultiple = false)] + public sealed class RetryTheoryAttribute : TheoryAttribute + { + /// + /// Gets or sets the maximum number of retry attempts (default is 3). + /// + public int MaxRetries { get; set; } = 3; + + /// + /// Gets or sets the delay in milliseconds between retry attempts (default is 5000ms). + /// + public int DelayBetweenRetriesMs { get; set; } = 5000; + + /// + /// Gets or sets whether to retry on assertion failures (XunitException). + /// Default is false - assertion failures usually indicate test bugs, not transient issues. + /// Set to true for tests that validate eventually-consistent systems (e.g., cache refresh, reindex operations). + /// + public bool RetryOnAssertionFailure { get; set; } = false; + } +} diff --git a/src/Microsoft.Health.Extensions.Xunit/RetryTheoryDiscoverer.cs b/src/Microsoft.Health.Extensions.Xunit/RetryTheoryDiscoverer.cs new file mode 100644 index 0000000000..9539e3c1db --- /dev/null +++ b/src/Microsoft.Health.Extensions.Xunit/RetryTheoryDiscoverer.cs @@ -0,0 +1,91 @@ +// ------------------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. +// ------------------------------------------------------------------------------------------------- + +using System.Collections.Generic; +using System.Linq; +using Xunit.Abstractions; +using Xunit.Sdk; + +namespace Microsoft.Health.Extensions.Xunit +{ + /// + /// Test case discoverer for . + /// For Theory tests, we need to let xUnit discover the data-driven test cases first, + /// then wrap each one with retry logic. + /// + public class RetryTheoryDiscoverer : TheoryDiscoverer + { + public RetryTheoryDiscoverer(IMessageSink diagnosticMessageSink) + : base(diagnosticMessageSink) + { + } + + protected override IEnumerable CreateTestCasesForDataRow( + ITestFrameworkDiscoveryOptions discoveryOptions, + ITestMethod testMethod, + IAttributeInfo theoryAttribute, + object[] dataRow) + { + var maxRetries = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.MaxRetries)); + var delayMs = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.DelayBetweenRetriesMs)); + var retryOnAssertionFailure = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.RetryOnAssertionFailure)); + + // Use default values if not specified + if (maxRetries == 0) + { + maxRetries = 3; + } + + if (delayMs == 0) + { + delayMs = 5000; + } + + // Create a RetryTestCase for each data row + yield return new RetryTestCase( + DiagnosticMessageSink, + discoveryOptions.MethodDisplayOrDefault(), + discoveryOptions.MethodDisplayOptionsOrDefault(), + testMethod, + maxRetries, + delayMs, + retryOnAssertionFailure, + dataRow); + } + + protected override IEnumerable CreateTestCasesForTheory( + ITestFrameworkDiscoveryOptions discoveryOptions, + ITestMethod testMethod, + IAttributeInfo theoryAttribute) + { + var maxRetries = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.MaxRetries)); + var delayMs = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.DelayBetweenRetriesMs)); + var retryOnAssertionFailure = theoryAttribute.GetNamedArgument(nameof(RetryTheoryAttribute.RetryOnAssertionFailure)); + + // Use default values if not specified + if (maxRetries == 0) + { + maxRetries = 3; + } + + if (delayMs == 0) + { + delayMs = 5000; + } + + // For theories without data (will be skipped), wrap in retry + return base.CreateTestCasesForTheory(discoveryOptions, testMethod, theoryAttribute) + .Select(testCase => new RetryTestCase( + DiagnosticMessageSink, + discoveryOptions.MethodDisplayOrDefault(), + discoveryOptions.MethodDisplayOptionsOrDefault(), + testMethod, + maxRetries, + delayMs, + retryOnAssertionFailure, + testCase.TestMethodArguments)); + } + } +} diff --git a/src/Microsoft.Health.Fhir.Core.UnitTests/Features/Search/Registry/SearchParameterCacheRefreshBackgroundServiceTests.cs b/src/Microsoft.Health.Fhir.Core.UnitTests/Features/Search/Registry/SearchParameterCacheRefreshBackgroundServiceTests.cs index 709e9b47a3..6c273679a6 100644 --- a/src/Microsoft.Health.Fhir.Core.UnitTests/Features/Search/Registry/SearchParameterCacheRefreshBackgroundServiceTests.cs +++ b/src/Microsoft.Health.Fhir.Core.UnitTests/Features/Search/Registry/SearchParameterCacheRefreshBackgroundServiceTests.cs @@ -36,7 +36,8 @@ public SearchParameterCacheRefreshBackgroundServiceTests() _coreFeatureConfiguration = Substitute.For>(); _coreFeatureConfiguration.Value.Returns(new CoreFeatureConfiguration { - SearchParameterCacheRefreshIntervalSeconds = 60, + SearchParameterCacheRefreshIntervalSeconds = 1, + SearchParameterCacheRefreshMaxInitialDelaySeconds = 0, // No delay for tests }); _service = new SearchParameterCacheRefreshBackgroundService( @@ -320,8 +321,9 @@ public async Task OnRefreshTimer_WhenOperationCanceled_ShouldHandleGracefully() // Act - Initialize and let timer run await service.Handle(new SearchParametersInitializedNotification(), CancellationToken.None); - // Wait for timer to fire and handle the exception - await Task.Delay(200); + // Wait longer for timer to fire and handle the exception - give it up to 2 seconds + // The timer starts immediately (TimeSpan.Zero) when Handle is called + await Task.Delay(2000); // Assert - Verify that OperationCanceledException was handled and logged appropriately mockLogger.Received().Log( diff --git a/src/Microsoft.Health.Fhir.Core/Configs/CoreFeatureConfiguration.cs b/src/Microsoft.Health.Fhir.Core/Configs/CoreFeatureConfiguration.cs index 581f29761c..e67091f37a 100644 --- a/src/Microsoft.Health.Fhir.Core/Configs/CoreFeatureConfiguration.cs +++ b/src/Microsoft.Health.Fhir.Core/Configs/CoreFeatureConfiguration.cs @@ -106,5 +106,12 @@ public class CoreFeatureConfiguration /// SearchParameter cache synchronized across instances. Default is 60 seconds if not specified. /// public int SearchParameterCacheRefreshIntervalSeconds { get; set; } = 60; + + /// + /// Gets or sets the maximum initial delay in seconds for the SearchParameter cache background service timer. + /// This random delay (0 to this value) staggers timer startup across instances to prevent thundering herd. + /// Default is 15 seconds. Set to 0 to disable the delay (useful for testing). + /// + public int SearchParameterCacheRefreshMaxInitialDelaySeconds { get; set; } = 15; } } diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/FhirOperationDataStoreBase.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/FhirOperationDataStoreBase.cs index 45d812f85c..c84a99f4fd 100644 --- a/src/Microsoft.Health.Fhir.Core/Features/Operations/FhirOperationDataStoreBase.cs +++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/FhirOperationDataStoreBase.cs @@ -376,7 +376,7 @@ record = JsonConvert.DeserializeObject(jobInfo.Definition); errorMessage += error.Diagnostics + " "; } - if (error.Severity == OperationOutcomeConstants.IssueSeverity.Error && !inFlightJobsExist) + if (error.Severity == OperationOutcomeConstants.IssueSeverity.Error && !inFlightJobsExist && !cancelledJobsExist) { status = JobStatus.Failed; } @@ -393,6 +393,12 @@ record = JsonConvert.DeserializeObject(jobInfo.Definition); record.FailureDetails = new JobFailureDetails(Core.Resources.ReindexFailedWithUnknownError, HttpStatusCode.InternalServerError); } + // To prevent sending consumer status that is wrong if there are still running prcessing jobs that were enqueued during race condition. + if (inFlightJobsExist && status != JobStatus.Running) + { + status = JobStatus.Running; + } + switch (status) { case JobStatus.Created: diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/GetReindexRequestHandler.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/GetReindexRequestHandler.cs index 43569f7a35..86693e5635 100644 --- a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/GetReindexRequestHandler.cs +++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/GetReindexRequestHandler.cs @@ -47,10 +47,22 @@ public async Task Handle(GetReindexRequest request, Cancella } else { - return await GetListOfReindexJobs(); + return await GetActiveReindexJob(cancellationToken); } } + private async Task GetActiveReindexJob(CancellationToken cancellationToken) + { + var reindexJobId = await _fhirOperationDataStore.CheckActiveReindexJobsAsync(cancellationToken); + + if (reindexJobId.found && reindexJobId.id != null) + { + return await GetSingleReindexJobAsync(reindexJobId.id, cancellationToken); + } + + return await GetListOfReindexJobs(); + } + private static Task GetListOfReindexJobs() { // TODO: build list of reindex jobs diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/Models/ReindexJobRecord.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/Models/ReindexJobRecord.cs index 555e085e72..79d1b17a1f 100644 --- a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/Models/ReindexJobRecord.cs +++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/Models/ReindexJobRecord.cs @@ -107,7 +107,7 @@ protected ReindexJobRecord() public long Progress { get; set; } [JsonProperty(JobRecordProperties.ResourceTypeSearchParameterHashMap)] - public IReadOnlyDictionary ResourceTypeSearchParameterHashMap { get; private set; } + public IReadOnlyDictionary ResourceTypeSearchParameterHashMap { get; internal set; } [JsonProperty(JobRecordProperties.LastModified)] public DateTimeOffset LastModified { get; set; } diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexOrchestratorJob.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexOrchestratorJob.cs index f7d0f4171f..a3417382af 100644 --- a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexOrchestratorJob.cs +++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexOrchestratorJob.cs @@ -11,6 +11,7 @@ using System.Threading; using System.Threading.Tasks; using EnsureThat; +using Hl7.Fhir.Model; using Microsoft.AspNetCore.JsonPatch.Internal; using Microsoft.Data.SqlClient; using Microsoft.Extensions.Logging; @@ -111,25 +112,32 @@ public async Task ExecuteAsync(JobInfo jobInfo, CancellationToken cancel try { - // Wait for the configured SearchParameterCacheRefreshIntervalSeconds before processing - var delaySeconds = Math.Max(1, _coreFeatureConfiguration.SearchParameterCacheRefreshIntervalSeconds); - var delayMultiplier = Math.Max(1, _operationsConfiguration.Reindex.ReindexDelayMultiplier); - _logger.LogInformation("Reindex job with Id: {Id} waiting for {DelaySeconds} second(s) before processing as configured by SearchParameterCacheRefreshIntervalSeconds and ReindexDelayMultiplier.", _jobInfo.Id, delaySeconds * delayMultiplier); + await Task.Delay(1000, cancellationToken); - await Task.Delay(TimeSpan.FromSeconds(delaySeconds) * delayMultiplier, cancellationToken); + if (cancellationToken.IsCancellationRequested || _jobInfo.CancelRequested) + { + throw new OperationCanceledException("Reindex operation cancelled by customer."); + } + + // Attempt to get and apply the latest search parameter updates + await RefreshSearchParameterCache(cancellationToken); _reindexJobRecord.Status = OperationStatus.Running; _jobInfo.Status = JobStatus.Running; _logger.LogInformation("Reindex job with Id: {Id} has been started. Status: {Status}.", _jobInfo.Id, _reindexJobRecord.Status); - await CreateReindexProcessingJobsAsync(cancellationToken); - var jobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, _jobInfo.GroupId, true, cancellationToken); + var queryReindexProcessingJobs = jobs.Where(j => j.Id != _jobInfo.GroupId).ToList(); - // Get only ProcessingJobs. - var queryProcessingJobs = jobs.Where(j => j.Id != _jobInfo.GroupId).ToList(); + // Only create jobs if we don't have any at this point. + if (!queryReindexProcessingJobs.Any()) + { + await CreateReindexProcessingJobsAsync(cancellationToken); + jobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, _jobInfo.GroupId, true, cancellationToken); + queryReindexProcessingJobs = jobs.Where(j => j.Id != _jobInfo.GroupId).ToList(); + } - if (!queryProcessingJobs.Any()) + if (!queryReindexProcessingJobs.Any()) { // Nothing to process so we are done. AddErrorResult(OperationOutcomeConstants.IssueSeverity.Information, OperationOutcomeConstants.IssueType.Informational, Core.Resources.ReindexingNothingToProcess); @@ -137,12 +145,11 @@ public async Task ExecuteAsync(JobInfo jobInfo, CancellationToken cancel return JsonConvert.SerializeObject(_currentResult); } - _currentResult.CreatedJobs = queryProcessingJobs.Count; + _currentResult.CreatedJobs = queryReindexProcessingJobs.Count; - if (queryProcessingJobs.Any()) - { - await CheckForCompletionAsync(queryProcessingJobs, cancellationToken); - } + await CheckForCompletionAsync(queryReindexProcessingJobs, cancellationToken); + + await RefreshSearchParameterCache(cancellationToken); } catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) { @@ -162,6 +169,30 @@ public async Task ExecuteAsync(JobInfo jobInfo, CancellationToken cancel return JsonConvert.SerializeObject(_currentResult); } + private async Task RefreshSearchParameterCache(CancellationToken cancellationToken) + { + try + { + _logger.LogJobInformation(_jobInfo, "Performing full SearchParameter database refresh and hash recalculation for reindex job."); + + // Use the enhanced method with forceFullRefresh flag + await _searchParameterOperations.GetAndApplySearchParameterUpdates(cancellationToken, forceFullRefresh: true); + + // Update the reindex job record with the latest hash map + _reindexJobRecord.ResourceTypeSearchParameterHashMap = _searchParameterDefinitionManager.SearchParameterHashMap; + + _logger.LogJobInformation( + _jobInfo, + "Completed full SearchParameter refresh. Hash map updated with {ResourceTypeCount} resource types.", + _reindexJobRecord.ResourceTypeSearchParameterHashMap.Count); + } + catch (Exception ex) + { + _logger.LogJobError(ex, _jobInfo, "Failed to refresh SearchParameter cache."); + throw; + } + } + private async Task> CreateReindexProcessingJobsAsync(CancellationToken cancellationToken) { // Build queries based on new search params @@ -169,16 +200,12 @@ private async Task> CreateReindexProcessingJobsAsync(Cancell List validStatus = new List() { SearchParameterStatus.Supported, SearchParameterStatus.PendingDelete, SearchParameterStatus.PendingDisable }; _initialSearchParamStatusCollection = await _searchParameterStatusManager.GetAllSearchParameterStatus(cancellationToken); - // Create a dictionary for efficient O(1) lookups by URI - var searchParamStatusByUri = _initialSearchParamStatusCollection.ToDictionary( - s => s.Uri.ToString(), - s => s.Status, - StringComparer.OrdinalIgnoreCase); - - var validUris = searchParamStatusByUri - .Where(s => validStatus.Contains(s.Value)) - .Select(s => s.Key) - .ToHashSet(StringComparer.OrdinalIgnoreCase); + // Get all URIs that have at least one entry with a valid status + // This handles case-variant duplicates naturally + var validUris = _initialSearchParamStatusCollection + .Where(s => validStatus.Contains(s.Status)) + .Select(s => s.Uri.ToString()) + .ToHashSet(); // Filter to only those search parameters with valid status var possibleNotYetIndexedParams = _searchParameterDefinitionManager.AllSearchParameters @@ -252,6 +279,42 @@ private async Task> CreateReindexProcessingJobsAsync(Cancell .Select(kvp => kvp.Key) .ToList(); + // Confirm counts for range by not ignoring hash this time incase it's 0 + // Because we ignore hash to get full range set for initial count, we need to double-check counts here + foreach (var resourceCount in _reindexJobRecord.ResourceCounts) + { + var resourceType = resourceCount.Key; + var resourceCountValue = resourceCount.Value; + var startResourceSurrogateId = resourceCountValue.StartResourceSurrogateId; + var endResourceSurrogateId = resourceCountValue.EndResourceSurrogateId; + var count = resourceCountValue.Count; + + var queryForCount = new ReindexJobQueryStatus(resourceType, continuationToken: null) + { + LastModified = Clock.UtcNow, + Status = OperationStatus.Queued, + StartResourceSurrogateId = startResourceSurrogateId, + EndResourceSurrogateId = endResourceSurrogateId, + }; + + SearchResult countOnlyResults = await GetResourceCountForQueryAsync(queryForCount, countOnly: true, false, _cancellationToken); + + // Check if the result has no records and add to zero-count list + if (countOnlyResults?.TotalCount == 0) + { + if (!resourceTypesWithZeroCount.Contains(resourceType)) + { + resourceTypesWithZeroCount.Add(resourceType); + + // subtract this count from JobRecordCount + _reindexJobRecord.Count -= resourceCountValue.Count; + + // Update the ResourceCounts entry to reflect zero count + resourceCountValue.Count = 0; + } + } + } + if (resourceTypesWithZeroCount.Any()) { _logger.LogJobInformation( @@ -285,6 +348,9 @@ private async Task> CreateReindexProcessingJobsAsync(Cancell // Update the SearchParameterStatus to Enabled so they can be used once data is loaded await UpdateSearchParameterStatus(null, zeroCountParams.Select(p => p.Url.ToString()).ToList(), cancellationToken); + // Attempt to get and apply the latest search parameter updates + await RefreshSearchParameterCache(cancellationToken); + _logger.LogJobInformation( _jobInfo, "Successfully updated status for {Count} search parameter(s) with zero-count resource types", @@ -340,6 +406,11 @@ private void AddErrorResult(string severity, string issueType, string message) private async Task> EnqueueQueryProcessingJobsAsync(CancellationToken cancellationToken) { + if (cancellationToken.IsCancellationRequested || _jobInfo.CancelRequested) + { + throw new OperationCanceledException("Reindex operation cancelled by customer."); + } + var resourcesPerJob = (int)_reindexJobRecord.MaximumNumberOfResourcesPerQuery; var definitions = new List(); @@ -545,7 +616,7 @@ private async Task GetResourceCountForQueryAsync(ReindexJobQuerySt } string searchParameterHash = string.Empty; - _reindexJobRecord.ResourceTypeSearchParameterHashMap.TryGetValue(queryStatus.ResourceType, out searchParameterHash); + searchParameterHash = GetHashMapByResourceType(queryStatus.ResourceType); // Ensure searchParameterHash is never null - for Cosmos DB scenarios, this will be empty string searchParameterHash ??= string.Empty; @@ -672,7 +743,7 @@ private string GetHashMapByResourceType(string resourceType) private bool CheckJobRecordForAnyWork() { - return _reindexJobRecord.Count > 0 || _reindexJobRecord.ResourceCounts.Any(e => e.Value.Count <= 0 && e.Value.StartResourceSurrogateId > 0); + return _reindexJobRecord.Count > 0 || _reindexJobRecord.ResourceCounts.Any(e => e.Value.Count > 0 && e.Value.StartResourceSurrogateId > 0); } private void LogReindexJobRecordErrorMessage() @@ -792,7 +863,15 @@ private async Task CheckForCompletionAsync(List jobInfos, CancellationT // Send heartbeat less frequently when stable if (unchangedCount <= MAX_UNCHANGED_CYCLES) { - await _queueClient.PutJobHeartbeatAsync(_jobInfo, cancellationToken); + try + { + await _queueClient.PutJobHeartbeatAsync(_jobInfo, cancellationToken); + } + catch (JobConflictException ex) + { + // Log but don't fail - heartbeat conflicts are acceptable + _logger.LogJobWarning(ex, _jobInfo, "Heartbeat conflict - another worker updated the job"); + } } } catch (OperationCanceledException) when (!cancellationToken.IsCancellationRequested) diff --git a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexProcessingJob.cs b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexProcessingJob.cs index a4a17d1f4a..da48e2c9b0 100644 --- a/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexProcessingJob.cs +++ b/src/Microsoft.Health.Fhir.Core/Features/Operations/Reindex/ReindexProcessingJob.cs @@ -4,10 +4,12 @@ // ------------------------------------------------------------------------------------------------- using System; +using System.ClientModel; using System.Collections.Generic; using System.Linq; using System.Net; using System.Security.Cryptography; +using System.Text; using System.Threading; using System.Threading.Tasks; using EnsureThat; @@ -16,6 +18,7 @@ using Microsoft.Health.Extensions.DependencyInjection; using Microsoft.Health.Fhir.Core.Exceptions; using Microsoft.Health.Fhir.Core.Extensions; +using Microsoft.Health.Fhir.Core.Features.Operations.Reindex.Models; using Microsoft.Health.Fhir.Core.Features.Persistence; using Microsoft.Health.Fhir.Core.Features.Search; using Microsoft.Health.Fhir.Core.Features.Search.Parameters; @@ -71,14 +74,14 @@ public async Task ExecuteAsync(JobInfo jobInfo, CancellationToken cancel _reindexProcessingJobDefinition = DeserializeJobDefinition(jobInfo); _reindexProcessingJobResult = new ReindexProcessingJobResult(); - ValidateSearchParametersHash(_jobInfo, _reindexProcessingJobDefinition, cancellationToken); + await ValidateSearchParametersHashAsync(_jobInfo, _reindexProcessingJobDefinition, cancellationToken); await ProcessQueryAsync(cancellationToken); return JsonConvert.SerializeObject(_reindexProcessingJobResult); } - private void ValidateSearchParametersHash(JobInfo jobInfo, ReindexProcessingJobDefinition jobDefinition, CancellationToken cancellationToken) + private async Task ValidateSearchParametersHashAsync(JobInfo jobInfo, ReindexProcessingJobDefinition jobDefinition, CancellationToken cancellationToken) { string currentResourceTypeHash = _searchParameterOperations.GetResourceTypeSearchParameterHashMap(jobDefinition.ResourceType); @@ -87,34 +90,62 @@ private void ValidateSearchParametersHash(JobInfo jobInfo, ReindexProcessingJobD if (string.IsNullOrEmpty(currentResourceTypeHash) || !string.Equals(currentResourceTypeHash, jobDefinition.ResourceTypeSearchParameterHashMap, StringComparison.Ordinal)) { - _logger.LogJobError( + _logger.LogJobWarning( jobInfo, - "Search parameters for resource type {ResourceType} have changed since the Reindex Job was created. Job definition hash: '{CurrentHash}', In-memory hash: '{JobHash}'.", + "Search parameters for resource type {ResourceType} have changed since the Reindex Job was created. Job definition hash: '{JobHash}', In-memory hash: '{CurrentHash}'. Attempting to refresh search parameters.", jobDefinition.ResourceType, jobDefinition.ResourceTypeSearchParameterHashMap, currentResourceTypeHash); - string message = "Search Parameter hash does not match. Resubmit reindex job to try again."; + // Attempt to get and apply the latest search parameter updates + await _searchParameterOperations.GetAndApplySearchParameterUpdates(cancellationToken, forceFullRefresh: true); - // Create error object to provide structured error information - var errorObject = new + // Re-check the hash after updating + currentResourceTypeHash = _searchParameterOperations.GetResourceTypeSearchParameterHashMap(jobDefinition.ResourceType); + + // If the hash still doesn't match after the update, fail the job + if (string.IsNullOrEmpty(currentResourceTypeHash) || !string.Equals(currentResourceTypeHash, jobDefinition.ResourceTypeSearchParameterHashMap, StringComparison.Ordinal)) { - message = message, - resourceType = jobDefinition.ResourceType, - jobDefinitionHash = jobDefinition.ResourceTypeSearchParameterHashMap, - currentHash = currentResourceTypeHash, - jobId = jobInfo.Id, - groupId = jobInfo.GroupId, - }; + _logger.LogJobError( + jobInfo, + "Search parameters for resource type {ResourceType} still do not match after refresh. Job definition hash: '{JobHash}', In-memory hash: '{CurrentHash}'.", + jobDefinition.ResourceType, + jobDefinition.ResourceTypeSearchParameterHashMap, + currentResourceTypeHash); + + string message = "Search Parameter hash does not match. Resubmit reindex job to try again."; + + // Create error object to provide structured error information + var errorObject = new + { + message = message, + resourceType = jobDefinition.ResourceType, + jobDefinitionHash = jobDefinition.ResourceTypeSearchParameterHashMap, + currentHash = currentResourceTypeHash, + jobId = jobInfo.Id, + groupId = jobInfo.GroupId, + }; - _reindexProcessingJobResult.FailedResourceCount = jobDefinition.ResourceCount.Count; + _reindexProcessingJobResult.FailedResourceCount = jobDefinition.ResourceCount.Count; - throw new ReindexProcessingJobSoftException(message, errorObject, isCustomerCaused: true); + throw new ReindexProcessingJobSoftException(message, errorObject, isCustomerCaused: true); + } + else + { + _logger.LogJobInformation( + jobInfo, + "Search parameters for resource type {ResourceType} successfully refreshed. Hash now matches: '{CurrentHash}'.", + jobDefinition.ResourceType, + currentResourceTypeHash); + } } } private async Task GetResourcesToReindexAsync(SearchResultReindex searchResultReindex, CancellationToken cancellationToken) { + string searchParameterHash = _reindexProcessingJobDefinition.ResourceTypeSearchParameterHashMap; + searchParameterHash ??= string.Empty; + var queryParametersList = new List>() { Tuple.Create(KnownQueryParameterNames.Type, _reindexProcessingJobDefinition.ResourceType), @@ -122,30 +153,36 @@ private async Task GetResourcesToReindexAsync(SearchResultReindex if (searchResultReindex != null) { - // Always use the StartResourceSurrogateId for the start of the range - // and the ResourceCount.EndResourceSurrogateId for the end. The sql will determine - // how many resources to actually return based on the configured maximumNumberOfResourcesPerQuery. - // When this function returns, it knows what the next starting value to use in - // searching for the next block of results and will use that as the queryStatus starting point - queryParametersList.AddRange(new[] + // If we have SurrogateId range, we simply use those and ignore search parameter hash + // Otherwise, it's cosmos DB and we must use it and ensure we pass MaximumNumberOfResourcesPerQuery so we get expected count returned. + if (searchResultReindex.StartResourceSurrogateId > 0 && searchResultReindex.EndResourceSurrogateId > 0) { - // This EndResourceSurrogateId is only needed because of the way the sql is written. It is not accurate initially. - Tuple.Create(KnownQueryParameterNames.EndSurrogateId, searchResultReindex.EndResourceSurrogateId.ToString()), - Tuple.Create(KnownQueryParameterNames.StartSurrogateId, searchResultReindex.StartResourceSurrogateId.ToString()), - Tuple.Create(KnownQueryParameterNames.GlobalEndSurrogateId, "0"), - }); - - queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.IgnoreSearchParamHash, "true")); - } + queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.IgnoreSearchParamHash, "true")); + + // Always use the StartResourceSurrogateId for the start of the range + // and the ResourceCount.EndResourceSurrogateId for the end. The sql will determine + // how many resources to actually return based on the configured maximumNumberOfResourcesPerQuery. + // When this function returns, it knows what the next starting value to use in + // searching for the next block of results and will use that as the queryStatus starting point + queryParametersList.AddRange(new[] + { + // This EndResourceSurrogateId is only needed because of the way the sql is written. It is not accurate initially. + Tuple.Create(KnownQueryParameterNames.StartSurrogateId, searchResultReindex.StartResourceSurrogateId.ToString()), + Tuple.Create(KnownQueryParameterNames.EndSurrogateId, searchResultReindex.EndResourceSurrogateId.ToString()), + Tuple.Create(KnownQueryParameterNames.GlobalEndSurrogateId, "0"), + }); + } + else + { + queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.Count, _reindexProcessingJobDefinition.MaximumNumberOfResourcesPerQuery.ToString())); + } - if (_reindexProcessingJobDefinition.ResourceCount.ContinuationToken != null) - { - queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.ContinuationToken, _reindexProcessingJobDefinition.ResourceCount.ContinuationToken)); + if (searchResultReindex.ContinuationToken != null) + { + queryParametersList.Add(Tuple.Create(KnownQueryParameterNames.ContinuationToken, searchResultReindex.ContinuationToken)); + } } - string searchParameterHash = _reindexProcessingJobDefinition.ResourceTypeSearchParameterHashMap; - searchParameterHash ??= string.Empty; - using (IScoped searchService = _searchServiceFactory()) { try @@ -168,7 +205,7 @@ private void LogReindexProcessingJobErrorMessage() { var ser = JsonConvert.SerializeObject(_reindexProcessingJobDefinition); var result = JsonConvert.SerializeObject(_reindexProcessingJobResult); - _logger.LogJobInformation(_jobInfo, $"ReindexProcessingJob Error: Current ReindexJobRecord: {ser}. ReindexProcessing Job Result: {result}."); + _logger.LogJobInformation(_jobInfo, "ReindexProcessingJob Error: Current ReindexJobRecord: {JobDefinition}. ReindexProcessing Job Result: {JobResult}.", ser, result); } private async Task ProcessQueryAsync(CancellationToken cancellationToken) @@ -178,6 +215,11 @@ private async Task ProcessQueryAsync(CancellationToken cancellationToken) throw new InvalidOperationException("_reindexProcessingJobDefinition cannot be null during processing."); } + if (cancellationToken.IsCancellationRequested) + { + return; + } + long resourceCount = 0; try { @@ -187,9 +229,9 @@ private async Task ProcessQueryAsync(CancellationToken cancellationToken) throw new OperationFailedException($"Search service returned null search result.", HttpStatusCode.InternalServerError); } - resourceCount = result.TotalCount ?? 0; - _reindexProcessingJobResult.SearchParameterUrls = _reindexProcessingJobDefinition.SearchParameterUrls; + resourceCount = result.TotalCount ?? result.Results?.Count() ?? 0; _jobInfo.Data = resourceCount; + _reindexProcessingJobResult.SearchParameterUrls = _reindexProcessingJobDefinition.SearchParameterUrls; if (resourceCount > _reindexProcessingJobDefinition.MaximumNumberOfResourcesPerQuery) { @@ -205,12 +247,52 @@ private async Task ProcessQueryAsync(CancellationToken cancellationToken) { _reindexProcessingJobDefinition.ResourceType, _reindexProcessingJobDefinition.ResourceTypeSearchParameterHashMap }, }; - await _timeoutRetries.ExecuteAsync(async () => await ProcessSearchResultsAsync(result, dictionary, (int)_reindexProcessingJobDefinition.MaximumNumberOfResourcesPerWrite, cancellationToken)); + // Process results in a loop to handle continuation tokens + do + { + await _timeoutRetries.ExecuteAsync(async () => await ProcessSearchResultsAsync(result, dictionary, (int)_reindexProcessingJobDefinition.MaximumNumberOfResourcesPerWrite, cancellationToken)); + + resourceCount += result.TotalCount ?? result.Results?.Count() ?? 0; + _jobInfo.Data = resourceCount; + _reindexProcessingJobResult.SucceededResourceCount += (long)result?.Results.Count(); + _jobInfo.Data = _reindexProcessingJobResult.SucceededResourceCount; + _logger.LogJobInformation(_jobInfo, "Reindex processing batch complete. Current number of resources indexed by this job: {Progress}.", _reindexProcessingJobResult.SucceededResourceCount); + + // Check if there's a continuation token to fetch more results + if (!string.IsNullOrEmpty(result.ContinuationToken) && !cancellationToken.IsCancellationRequested) + { + _logger.LogJobInformation(_jobInfo, "Continuation token found. Fetching next batch of resources for reindexing."); + + // Clear the previous continuation token first to avoid conflicts + _reindexProcessingJobDefinition.ResourceCount.ContinuationToken = null; + + // Create a new SearchResultReindex with the continuation token for the next query + var nextSearchResultReindex = new SearchResultReindex(_reindexProcessingJobDefinition.ResourceCount.Count) + { + StartResourceSurrogateId = _reindexProcessingJobDefinition.ResourceCount.StartResourceSurrogateId, + EndResourceSurrogateId = _reindexProcessingJobDefinition.ResourceCount.EndResourceSurrogateId, + ContinuationToken = Convert.ToBase64String(Encoding.UTF8.GetBytes(result.ContinuationToken)), + }; + + // Fetch the next batch of results + result = await _timeoutRetries.ExecuteAsync(async () => await GetResourcesToReindexAsync(nextSearchResultReindex, cancellationToken)); + + if (result == null) + { + throw new OperationFailedException($"Search service returned null search result during continuation.", HttpStatusCode.InternalServerError); + } + } + else + { + // No more continuation token, exit the loop + result = null; + } + } + while (result != null && !cancellationToken.IsCancellationRequested); if (!cancellationToken.IsCancellationRequested) { - _reindexProcessingJobResult.SucceededResourceCount += (long)result?.Results.Count(); - _logger.LogJobInformation(_jobInfo, "Reindex processing job complete. Current number of resources indexed by this job: {Progress}.", _reindexProcessingJobResult.SucceededResourceCount); + _logger.LogJobInformation(_jobInfo, "Reindex processing job complete. Total number of resources indexed by this job: {Progress}.", _reindexProcessingJobResult.SucceededResourceCount); } } catch (SqlException sqlEx) @@ -253,14 +335,14 @@ private async Task ProcessQueryAsync(CancellationToken cancellationToken) _logger.LogJobError(ex, _jobInfo, "Reindex processing job error occurred. Is FhirException: 'true'."); LogReindexProcessingJobErrorMessage(); _reindexProcessingJobResult.Error = ex.Message; - _reindexProcessingJobResult.FailedResourceCount = resourceCount; + _reindexProcessingJobResult.FailedResourceCount = _reindexProcessingJobDefinition.ResourceCount.Count - _reindexProcessingJobResult.SucceededResourceCount; } catch (Exception ex) { _logger.LogJobError(ex, _jobInfo, "Reindex processing job error occurred. Is FhirException: 'false'."); LogReindexProcessingJobErrorMessage(); _reindexProcessingJobResult.Error = ex.Message; - _reindexProcessingJobResult.FailedResourceCount = resourceCount; + _reindexProcessingJobResult.FailedResourceCount = _reindexProcessingJobDefinition.ResourceCount.Count - _reindexProcessingJobResult.SucceededResourceCount; } } diff --git a/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/ISearchParameterOperations.cs b/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/ISearchParameterOperations.cs index 4c450db375..bb614bb229 100644 --- a/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/ISearchParameterOperations.cs +++ b/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/ISearchParameterOperations.cs @@ -24,8 +24,9 @@ public interface ISearchParameterOperations /// It should also be called when a user starts a reindex job /// /// Cancellation token + /// When true, forces a full refresh from database instead of incremental updates /// A task. - Task GetAndApplySearchParameterUpdates(CancellationToken cancellationToken); + Task GetAndApplySearchParameterUpdates(CancellationToken cancellationToken, bool forceFullRefresh = false); string GetResourceTypeSearchParameterHashMap(string resourceType); } diff --git a/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/SearchParameterOperations.cs b/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/SearchParameterOperations.cs index 196dbeb518..84af81b317 100644 --- a/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/SearchParameterOperations.cs +++ b/src/Microsoft.Health.Fhir.Core/Features/Search/Parameters/SearchParameterOperations.cs @@ -285,10 +285,22 @@ await SearchParameterConcurrencyManager.ExecuteWithLockAsync( /// It should also be called when a user starts a reindex job /// /// Cancellation token + /// When true, forces a full refresh from database instead of incremental updates /// A task. - public async Task GetAndApplySearchParameterUpdates(CancellationToken cancellationToken = default) + public async Task GetAndApplySearchParameterUpdates(CancellationToken cancellationToken = default, bool forceFullRefresh = false) { - var updatedSearchParameterStatus = await _searchParameterStatusManager.GetSearchParameterStatusUpdates(cancellationToken); + IReadOnlyCollection updatedSearchParameterStatus; + + if (forceFullRefresh) + { + _logger.LogInformation("Performing full SearchParameter database refresh."); + updatedSearchParameterStatus = await _searchParameterStatusManager.GetAllSearchParameterStatus(cancellationToken); + _logger.LogInformation("Retrieved {Count} search parameters from database for full refresh.", updatedSearchParameterStatus.Count); + } + else + { + updatedSearchParameterStatus = await _searchParameterStatusManager.GetSearchParameterStatusUpdates(cancellationToken); + } // First process any deletes or disables, then we will do any adds or updates // this way any deleted or params which might have the same code or name as a new @@ -303,12 +315,28 @@ public async Task GetAndApplySearchParameterUpdates(CancellationToken cancellati _searchParameterDefinitionManager.UpdateSearchParameterStatus(searchParam.Uri.OriginalString, SearchParameterStatus.PendingDelete); } + // Get all URLs that need to be fetched + var urlsToFetch = updatedSearchParameterStatus + .Where(p => p.Status == SearchParameterStatus.Enabled || p.Status == SearchParameterStatus.Supported) + .Select(p => p.Uri.OriginalString) + .ToList(); + + if (!urlsToFetch.Any()) + { + // No parameters to add, but still apply status updates + await _searchParameterStatusManager.ApplySearchParameterStatus( + updatedSearchParameterStatus, + cancellationToken); + return; + } + + // Batch fetch all SearchParameter resources in one call + var searchParamResources = await GetSearchParametersByUrls(urlsToFetch, cancellationToken); + var paramsToAdd = new List(); foreach (var searchParam in updatedSearchParameterStatus.Where(p => p.Status == SearchParameterStatus.Enabled || p.Status == SearchParameterStatus.Supported)) { - var searchParamResource = await GetSearchParameterByUrl(searchParam.Uri.OriginalString, cancellationToken); - - if (searchParamResource == null) + if (!searchParamResources.TryGetValue(searchParam.Uri.OriginalString, out var searchParamResource)) { _logger.LogInformation( "Updated SearchParameter status found for SearchParameter: {Url}, but did not find any SearchParameter resources when querying for this url.", @@ -332,7 +360,6 @@ public async Task GetAndApplySearchParameterUpdates(CancellationToken cancellati } // Once added to the definition manager we can update their status - await _searchParameterStatusManager.ApplySearchParameterStatus( updatedSearchParameterStatus, cancellationToken); @@ -371,5 +398,53 @@ private async Task GetSearchParameterByUrl(string url, Cancellati return null; } + + private async Task> GetSearchParametersByUrls(List urls, CancellationToken cancellationToken) + { + if (!urls.Any()) + { + return new Dictionary(); + } + + const int chunkSize = 1500; + var searchParametersByUrl = new Dictionary(); + + // Process URLs in chunks to avoid SQL query limitations + for (int i = 0; i < urls.Count; i += chunkSize) + { + var urlChunk = urls.Skip(i).Take(chunkSize).ToList(); + + using IScoped search = _searchServiceFactory.Invoke(); + + // Build a query like: url=url1,url2,url3 + var urlQueryValue = string.Join(",", urlChunk); + var queryParams = new List> + { + new Tuple("url", urlQueryValue), + }; + + var result = await search.Value.SearchAsync(KnownResourceTypes.SearchParameter, queryParams, cancellationToken); + + foreach (var searchResultEntry in result.Results) + { + var typedElement = searchResultEntry.Resource.RawResource.ToITypedElement(_modelInfoProvider); + var url = typedElement.GetStringScalar("url"); + + if (!string.IsNullOrEmpty(url)) + { + if (!searchParametersByUrl.ContainsKey(url)) + { + searchParametersByUrl[url] = typedElement; + } + else + { + _logger.LogWarning("More than one SearchParameter found with url {Url}. Using the first one found.", url); + } + } + } + } + + return searchParametersByUrl; + } } } diff --git a/src/Microsoft.Health.Fhir.Core/Features/Search/Registry/SearchParameterCacheRefreshBackgroundService.cs b/src/Microsoft.Health.Fhir.Core/Features/Search/Registry/SearchParameterCacheRefreshBackgroundService.cs index 403f9adcb7..8e501e14a1 100644 --- a/src/Microsoft.Health.Fhir.Core/Features/Search/Registry/SearchParameterCacheRefreshBackgroundService.cs +++ b/src/Microsoft.Health.Fhir.Core/Features/Search/Registry/SearchParameterCacheRefreshBackgroundService.cs @@ -4,6 +4,7 @@ // ------------------------------------------------------------------------------------------------- using System; +using System.Security.Cryptography; using System.Threading; using System.Threading.Tasks; using EnsureThat; @@ -32,6 +33,7 @@ public class SearchParameterCacheRefreshBackgroundService : BackgroundService, I private readonly SemaphoreSlim _refreshSemaphore; private bool _isInitialized; private CancellationToken _stoppingToken; + private DateTime _lastForceRefreshTime; public SearchParameterCacheRefreshBackgroundService( ISearchParameterStatusManager searchParameterStatusManager, @@ -55,6 +57,12 @@ public SearchParameterCacheRefreshBackgroundService( // Create semaphore to prevent concurrent refresh operations (max 1 concurrent operation) _refreshSemaphore = new SemaphoreSlim(1, 1); + + // Initialize last force refresh time to now with random offset (0-5 minutes) to stagger force refreshes across instances + // We use UtcNow instead of MinValue because SearchParameters are already loaded during initialization, + // so there's no need to do a full refresh on first timer execution + var randomOffsetMinutes = RandomNumberGenerator.GetInt32(0, 6); // 0-5 minutes + _lastForceRefreshTime = DateTime.UtcNow.AddMinutes(randomOffsetMinutes); } protected override async Task ExecuteAsync(CancellationToken stoppingToken) @@ -116,34 +124,67 @@ private async void OnRefreshTimer(object state) try { - _logger.LogDebug("Starting SearchParameter cache freshness check."); - - // First check if cache is stale using efficient database query - bool cacheIsStale = await _searchParameterStatusManager.EnsureCacheFreshnessAsync(_stoppingToken); + var timeSinceLastForceRefresh = DateTime.UtcNow - _lastForceRefreshTime; + var shouldForceRefresh = timeSinceLastForceRefresh >= TimeSpan.FromHours(1); - // Check again if shutdown was requested after the async call - if (_stoppingToken.IsCancellationRequested) + if (shouldForceRefresh) { - _logger.LogDebug("SearchParameter cache refresh was cancelled during freshness check."); - return; - } + _logger.LogInformation("Performing full SearchParameter database refresh (last force refresh: {TimeSinceLastRefresh} ago).", timeSinceLastForceRefresh); - if (cacheIsStale) - { - _logger.LogInformation("SearchParameter cache is stale. Performing full SearchParameter synchronization."); + // Get ALL search parameters from database to ensure complete synchronization + var allSearchParameterStatus = await _searchParameterStatusManager.GetAllSearchParameterStatus(_stoppingToken); + + // Check if shutdown was requested after the async call + if (_stoppingToken.IsCancellationRequested) + { + _logger.LogDebug("SearchParameter cache refresh was cancelled during database fetch."); + return; + } + + _logger.LogInformation("Retrieved {Count} search parameters from database for full refresh.", allSearchParameterStatus.Count); - // Cache is stale - perform full SearchParameter lifecycle management - await _searchParameterOperations.GetAndApplySearchParameterUpdates(_stoppingToken); + // Apply all search parameters (this will recalculate the hash) + await _searchParameterStatusManager.ApplySearchParameterStatus(allSearchParameterStatus, _stoppingToken); + + _lastForceRefreshTime = DateTime.UtcNow; // Check one more time if shutdown was requested after the async call if (!_stoppingToken.IsCancellationRequested) { - _logger.LogInformation("SearchParameter cache refresh completed successfully."); + _logger.LogInformation("SearchParameter full database refresh completed successfully."); } } else { - _logger.LogDebug("SearchParameter cache is up to date. No refresh needed."); + _logger.LogDebug("Starting SearchParameter cache freshness check (last force refresh: {TimeSinceLastRefresh} ago).", timeSinceLastForceRefresh); + + // Check if cache is stale using efficient database query + bool cacheIsStale = await _searchParameterStatusManager.EnsureCacheFreshnessAsync(_stoppingToken); + + // Check again if shutdown was requested after the async call + if (_stoppingToken.IsCancellationRequested) + { + _logger.LogDebug("SearchParameter cache refresh was cancelled during freshness check."); + return; + } + + if (cacheIsStale) + { + _logger.LogInformation("SearchParameter cache is stale. Performing incremental SearchParameter synchronization."); + + // Cache is stale - perform incremental SearchParameter lifecycle management + await _searchParameterOperations.GetAndApplySearchParameterUpdates(_stoppingToken); + + // Check one more time if shutdown was requested after the async call + if (!_stoppingToken.IsCancellationRequested) + { + _logger.LogInformation("SearchParameter incremental refresh completed successfully."); + } + } + else + { + _logger.LogDebug("SearchParameter cache is up to date. No refresh needed."); + } } } catch (OperationCanceledException) @@ -176,8 +217,25 @@ public async Task Handle(SearchParametersInitializedNotification notification, C // Only start the timer if the service hasn't been cancelled if (!_stoppingToken.IsCancellationRequested) { - // Start the timer now that search parameters are initialized - _refreshTimer.Change(TimeSpan.Zero, _refreshInterval); + // Add random initial delay to stagger first refresh across instances + // This prevents thundering herd problem when multiple pods start simultaneously + var maxInitialDelaySeconds = Math.Max(0, _coreFeatureConfiguration.Value.SearchParameterCacheRefreshMaxInitialDelaySeconds); + var randomInitialDelaySeconds = maxInitialDelaySeconds > 0 + ? RandomNumberGenerator.GetInt32(0, maxInitialDelaySeconds + 1) + : 0; + var initialDelay = TimeSpan.FromSeconds(randomInitialDelaySeconds); + + if (randomInitialDelaySeconds > 0) + { + _logger.LogInformation("Starting cache refresh timer with {InitialDelay} initial delay to stagger instance startup.", initialDelay); + } + else + { + _logger.LogInformation("Starting cache refresh timer immediately (no initial delay configured)."); + } + + // Start the timer with random initial delay, then use regular refresh interval + _refreshTimer.Change(initialDelay, _refreshInterval); } await Task.CompletedTask; diff --git a/src/Microsoft.Health.Fhir.Core/Features/Search/SearchResultReindex.cs b/src/Microsoft.Health.Fhir.Core/Features/Search/SearchResultReindex.cs index 97ce29bbf0..8a901bbba6 100644 --- a/src/Microsoft.Health.Fhir.Core/Features/Search/SearchResultReindex.cs +++ b/src/Microsoft.Health.Fhir.Core/Features/Search/SearchResultReindex.cs @@ -39,7 +39,6 @@ public SearchResultReindex(long count) /// /// The continuation token for the search results /// - [JsonIgnore] public string ContinuationToken { get; set; } /// diff --git a/src/Microsoft.Health.Fhir.CosmosDb/Features/Search/Queries/QueryBuilder.cs b/src/Microsoft.Health.Fhir.CosmosDb/Features/Search/Queries/QueryBuilder.cs index 07d7b97359..064f9da5cc 100644 --- a/src/Microsoft.Health.Fhir.CosmosDb/Features/Search/Queries/QueryBuilder.cs +++ b/src/Microsoft.Health.Fhir.CosmosDb/Features/Search/Queries/QueryBuilder.cs @@ -162,6 +162,11 @@ public QueryDefinition GenerateReindexSql(SearchOptions searchOptions, string se searchOptions.Expression.AcceptVisitor(expressionQueryBuilder); } + AppendFilterCondition( + "AND", + true, + (KnownResourceWrapperProperties.IsHistory, false)); + AppendFilterCondition( "AND", true, diff --git a/src/Microsoft.Health.Fhir.CosmosDb/Features/Storage/Queues/CosmosQueueClient.cs b/src/Microsoft.Health.Fhir.CosmosDb/Features/Storage/Queues/CosmosQueueClient.cs index dbcc053328..15d96ea18a 100644 --- a/src/Microsoft.Health.Fhir.CosmosDb/Features/Storage/Queues/CosmosQueueClient.cs +++ b/src/Microsoft.Health.Fhir.CosmosDb/Features/Storage/Queues/CosmosQueueClient.cs @@ -63,6 +63,29 @@ public async Task> EnqueueAsync( { EnsureArg.IsNotNull(definitions, nameof(definitions)); + // Check if the orchestrator job (where JobId == GroupId) has been cancelled + if (groupId.HasValue) + { + QueryDefinition orchestratorJobSpec = new QueryDefinition(@"SELECT VALUE c FROM root c + JOIN d in c.definitions + WHERE c.queueType = @queueType + AND c.groupId = @groupId + AND d.jobId = @groupId + AND d.cancelRequested = true") + .WithParameter("@queueType", queueType) + .WithParameter("@groupId", groupId.Value.ToString()); + + IReadOnlyList cancelledOrchestratorJobs = await ExecuteQueryAsync(orchestratorJobSpec, 1, queueType, cancellationToken); + + if (cancelledOrchestratorJobs.Any()) + { + _logger.LogWarning( + "Attempting to enqueue jobs for group {GroupId} but orchestrator job has been cancelled.", + groupId.Value); + throw new OperationCanceledException($"Job group {groupId.Value} has been cancelled."); + } + } + var id = GetLongId(); var jobInfos = new List(); diff --git a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/ConvertData/ConvertDataRequestHandlerTests.cs b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/ConvertData/ConvertDataRequestHandlerTests.cs index 5fac204917..d80b16da17 100644 --- a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/ConvertData/ConvertDataRequestHandlerTests.cs +++ b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/ConvertData/ConvertDataRequestHandlerTests.cs @@ -10,6 +10,7 @@ using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; using Microsoft.Health.Core.Features.Security.Authorization; +using Microsoft.Health.Extensions.Xunit; using Microsoft.Health.Fhir.Core.Configs; using Microsoft.Health.Fhir.Core.Features.Operations.ConvertData; using Microsoft.Health.Fhir.Core.Features.Security; @@ -29,7 +30,7 @@ namespace Microsoft.Health.Fhir.Shared.Core.UnitTests.Features.Operations.Conver [Trait(Traits.Category, Categories.Operations)] public class ConvertDataRequestHandlerTests { - [Fact] + [RetryFact(MaxRetries = 3, DelayBetweenRetriesMs = 5000)] public async Task GivenAHl7v2ConvertRequest_WhenConvertData_CorrectResponseShouldReturn() { var convertDataRequestHandler = GetRequestHandler(); diff --git a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Import/ImportResourceParserTests.cs b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Import/ImportResourceParserTests.cs index 403a087a5e..4b480d5c20 100644 --- a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Import/ImportResourceParserTests.cs +++ b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Import/ImportResourceParserTests.cs @@ -8,6 +8,7 @@ using Hl7.Fhir.Serialization; using Microsoft.Health.Core.Features.Context; using Microsoft.Health.Core.Features.Security; +using Microsoft.Health.Extensions.Xunit; using Microsoft.Health.Fhir.Core.Features.Compartment; using Microsoft.Health.Fhir.Core.Features.Context; using Microsoft.Health.Fhir.Core.Features.Definition; @@ -54,7 +55,7 @@ public ImportResourceParserTests() _importResourceParser = new(_jsonParser, _wrapperFactory); } - [Fact] + [RetryFact(MaxRetries = 3, DelayBetweenRetriesMs = 5000)] public void GivenImportWithSoftDeletedFile_WhenParsed_DeletedExtensionShouldBeRemoved() { Patient patient = new Patient(); diff --git a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexOrchestratorJobTests.cs b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexOrchestratorJobTests.cs index a01e2b04ee..1430bad5e7 100644 --- a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexOrchestratorJobTests.cs +++ b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexOrchestratorJobTests.cs @@ -1,4 +1,4 @@ -// ------------------------------------------------------------------------------------------------- +// ------------------------------------------------------------------------------------------------- // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. // ------------------------------------------------------------------------------------------------- @@ -8,10 +8,12 @@ using System.Collections.ObjectModel; using System.Linq; using System.Threading; -using MediatR; +using System.Threading.Tasks; +using Hl7.Fhir.Rest; using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; using Microsoft.Health.Fhir.Core.Configs; +using Microsoft.Health.Fhir.Core.Features; using Microsoft.Health.Fhir.Core.Features.Definition; using Microsoft.Health.Fhir.Core.Features.Operations; using Microsoft.Health.Fhir.Core.Features.Operations.Reindex; @@ -23,132 +25,315 @@ using Microsoft.Health.Fhir.Core.Models; using Microsoft.Health.Fhir.Core.Registration; using Microsoft.Health.Fhir.Core.UnitTests.Extensions; -using Microsoft.Health.Fhir.Core.UnitTests.Features.Search; using Microsoft.Health.Fhir.Tests.Common; -using Microsoft.Health.Fhir.Tests.Common.FixtureParameters; using Microsoft.Health.JobManagement; using Microsoft.Health.JobManagement.UnitTests; using Microsoft.Health.Test.Utilities; using Newtonsoft.Json; using NSubstitute; using Xunit; -using Task = System.Threading.Tasks.Task; namespace Microsoft.Health.Fhir.Core.UnitTests.Features.Operations.Reindex { [CollectionDefinition("ReindexOrchestratorJobTests")] [Trait(Traits.OwningTeam, OwningTeam.Fhir)] [Trait(Traits.Category, Categories.IndexAndReindex)] - public class ReindexOrchestratorJobTests : IClassFixture + public class ReindexOrchestratorJobTests { - private const int _mockedSearchCount = 5; - private readonly ISearchService _searchService = Substitute.For(); private readonly CancellationTokenSource _cancellationTokenSource = new CancellationTokenSource(); - private readonly IMediator _mediator = Substitute.For(); - private ISearchParameterStatusManager _searchParameterStatusmanager; - private Func _reindexOrchestratorJobTaskFactory; - private Func _reindexProcessingJobTaskFactory; + private readonly ISearchParameterStatusManager _searchParameterStatusManager; + private readonly ISearchParameterDefinitionManager _searchDefinitionManager; private readonly ISearchParameterOperations _searchParameterOperations; - private readonly IQueueClient _queueClient = new TestQueueClient(); - - private ISearchParameterDefinitionManager _searchDefinitionManager; - private CancellationToken _cancellationToken; + private IQueueClient _queueClient; + private readonly CancellationToken _cancellationToken; public ReindexOrchestratorJobTests() { + // Configure cancellation token source with 1 minute timeout + _cancellationTokenSource.CancelAfter(TimeSpan.FromMinutes(1)); _cancellationToken = _cancellationTokenSource.Token; - _searchDefinitionManager = Substitute.For(); - _searchParameterStatusmanager = Substitute.For(); + _searchDefinitionManager = Substitute.For(); + _searchParameterStatusManager = Substitute.For(); _searchParameterOperations = Substitute.For(); - var job = CreateReindexJobRecord(); - List searchParameterInfos = new List() + // Initialize a fresh queue client for each test + _queueClient = new TestQueueClient(); + } + + private void Dispose() + { + // Clean up the queue client if it's a TestQueueClient + if (_queueClient is TestQueueClient testQueueClient) { - new SearchParameterInfo( - "Account", - "status", - ValueSets.SearchParamType.Token, - url: new Uri("http://hl7.org/fhir/SearchParameter/Account-status"), - baseResourceTypes: new List() - { - "Account", - "_count", - "_type", - }) - { - IsSearchable = true, - SearchParameterStatus = SearchParameterStatus.Enabled, - }, - }; - ReadOnlyCollection status = new ReadOnlyCollection(new List() + testQueueClient.ClearJobs(); + } + + _cancellationTokenSource?.Dispose(); + } + + private ReindexOrchestratorJob CreateReindexOrchestratorJob( + IFhirRuntimeConfiguration runtimeConfig = null, + int searchParameterCacheRefreshIntervalSeconds = 1, + int reindexDelayMultiplier = 1) + { + runtimeConfig ??= new AzureHealthDataServicesRuntimeConfiguration(); + + var coreFeatureConfig = Substitute.For>(); + coreFeatureConfig.Value.Returns(new CoreFeatureConfiguration + { + SearchParameterCacheRefreshIntervalSeconds = searchParameterCacheRefreshIntervalSeconds, + }); + + var operationsConfig = Substitute.For>(); + operationsConfig.Value.Returns(new OperationsConfiguration { - new ResourceSearchParameterStatus() + Reindex = new ReindexJobConfiguration { - LastUpdated = DateTime.UtcNow, - Uri = new Uri("http://hl7.org/fhir/SearchParameter/Account-status"), - Status = SearchParameterStatus.Enabled, + ReindexDelayMultiplier = reindexDelayMultiplier, }, }); - // Fix: Properly set up the mock with explicit CancellationToken argument matching - _searchParameterStatusmanager - .GetAllSearchParameterStatus(_cancellationToken) - .Returns(status); + return new ReindexOrchestratorJob( + _queueClient, + () => _searchService.CreateMockScope(), + _searchDefinitionManager, + ModelInfoProvider.Instance, + _searchParameterStatusManager, + _searchParameterOperations, + runtimeConfig, + NullLoggerFactory.Instance, + coreFeatureConfig, + operationsConfig); + } + + private async Task CreateReindexJobRecord( + uint maxResourcePerQuery = 100, + IReadOnlyDictionary paramHashMap = null, + List targetResourceTypes = null) + { + paramHashMap ??= new Dictionary { { "Patient", "patientHash" } }; + targetResourceTypes ??= new List(); + + var jobRecord = new ReindexJobRecord( + paramHashMap, + targetResourceTypes, + new List(), + new List(), + maxResourcePerQuery); + + // Enqueue the orchestrator job through the queue client + var orchestratorJobs = await _queueClient.EnqueueAsync( + (byte)QueueType.Reindex, + new[] { JsonConvert.SerializeObject(jobRecord) }, + groupId: 0, + forceOneActiveJobGroup: false, + _cancellationToken); + + // Return the enqueued job with Running status + var jobInfo = orchestratorJobs.First(); + jobInfo.Status = JobStatus.Running; + + return jobInfo; + } + + private SearchResult CreateSearchResult( + int resourceCount = 1, + string continuationToken = null, + long startSurrogateId = 1, + long endSurrogateId = 1000, + string resourceType = "Patient") + { + var resultList = new List(); - _searchDefinitionManager.AllSearchParameters.Returns(searchParameterInfos); + for (var i = 0; i < resourceCount; i++) + { + var wrapper = Substitute.For(); + var propertyInfo = wrapper.GetType().GetProperty("ResourceTypeName"); + propertyInfo?.SetValue(wrapper, resourceType); - IFhirRuntimeConfiguration fhirRuntimeConfiguration = new AzureHealthDataServicesRuntimeConfiguration(); + var entry = new SearchResultEntry(wrapper); + resultList.Add(entry); + } - _reindexOrchestratorJobTaskFactory = () => + var searchResult = new SearchResult(resultList, continuationToken, null, new List>()); + searchResult.MaxResourceSurrogateId = endSurrogateId; + searchResult.TotalCount = resourceCount; + searchResult.ReindexResult = new SearchResultReindex { - // Create a mock CoreFeatureConfiguration for the test - var coreFeatureConfig = Substitute.For>(); - coreFeatureConfig.Value.Returns(new CoreFeatureConfiguration - { - SearchParameterCacheRefreshIntervalSeconds = 1, // Use a short interval for tests - }); + Count = resourceCount, + StartResourceSurrogateId = startSurrogateId, + EndResourceSurrogateId = endSurrogateId, + }; + return searchResult; + } - // Create a mock OperationsConfiguration for the test - var operationsConfig = Substitute.For>(); - operationsConfig.Value.Returns(new OperationsConfiguration - { - Reindex = new ReindexJobConfiguration - { - ReindexDelayMultiplier = 1, // Use a short multiplier for tests - }, - }); + private SearchParameterInfo CreateSearchParameterInfo( + string url = "http://hl7.org/fhir/SearchParameter/Patient-name", + List baseResourceTypes = null, + string resourceType = "Patient") + { + baseResourceTypes ??= new List { resourceType }; - return new ReindexOrchestratorJob( - _queueClient, - () => _searchService.CreateMockScope(), - _searchDefinitionManager, - ModelInfoProvider.Instance, - _searchParameterStatusmanager, - _searchParameterOperations, - fhirRuntimeConfiguration, - NullLoggerFactory.Instance, - coreFeatureConfig, - operationsConfig); + return new SearchParameterInfo( + resourceType, + "name", + ValueSets.SearchParamType.String, + url: new Uri(url), + baseResourceTypes: baseResourceTypes) + { + IsSearchable = true, + SearchParameterStatus = SearchParameterStatus.Supported, }; } - [Theory(Skip = "Causing random timeouts")] - [InlineData(DataStore.SqlServer)] - [InlineData(DataStore.CosmosDb)] - public async Task GivenSupportedParams_WhenExecuted_ThenCorrectSearchIsPerformed(DataStore dataStore) + private async Task> WaitForJobsAsync(long groupId, TimeSpan timeout, int expectedMinimumJobs = 1) + { + var startTime = DateTime.UtcNow; + var pollInterval = TimeSpan.FromMilliseconds(100); + + while (DateTime.UtcNow - startTime < timeout) + { + var jobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, groupId, true, _cancellationToken); + var processingJobs = jobs.Where(j => j.Id != groupId).ToList(); + + if (processingJobs.Count >= expectedMinimumJobs) + { + return processingJobs; + } + + await Task.Delay(pollInterval, _cancellationToken); + } + + // Return whatever jobs we have after timeout + var finalJobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, groupId, true, _cancellationToken); + return finalJobs.Where(j => j.Id != groupId).ToList(); + } + + [Fact] + public async Task ExecuteAsync_WhenCancellationRequested_ReturnsJobCancelledResult() { - const int maxNumberOfResourcesPerQuery = 100; - const int startResourceSurrogateId = 1; - const int endResourceSurrogateId = 1000; - const string resourceTypeHash = "accountHash"; + // Arrange + var cancellationTokenSource = new CancellationTokenSource(); + cancellationTokenSource.CancelAfter(1); // Cancel after short delay - _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(resourceTypeHash); + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(); - IFhirRuntimeConfiguration fhirRuntimeConfiguration = dataStore == DataStore.SqlServer ? - new AzureHealthDataServicesRuntimeConfiguration() : - new AzureApiForFhirRuntimeConfiguration(); + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, cancellationTokenSource.Token); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + Assert.NotNull(jobResult.Error); + Assert.Contains(jobResult.Error, e => e.Diagnostics.Contains("cancelled")); + } + + [Fact] + public async Task ExecuteAsync_WithExceptionInProcessing_ReturnsErrorResult() + { + // Arrange + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(Task.FromException>( + new InvalidOperationException("Database error"))); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + Assert.NotNull(jobResult.Error); + Assert.Contains(jobResult.Error, e => e.Severity == OperationOutcomeConstants.IssueSeverity.Error); + } + + [Fact] + public async Task AddErrorResult_AddsErrorToExistingErrors() + { + // Arrange + var initialError = new OperationOutcomeIssue( + OperationOutcomeConstants.IssueSeverity.Information, + OperationOutcomeConstants.IssueType.Informational, + "Initial error"); + + var emptyStatus = new ReadOnlyCollection( + new List()); + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(emptyStatus); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List()); + + var jobInfo = await CreateReindexJobRecord(); + + jobInfo.Result = JsonConvert.SerializeObject(new ReindexOrchestratorJobResult + { + Error = new List { initialError }, + }); + + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + Assert.True(jobResult.Error.Count >= 2, "Should have at least 2 errors (initial + new)"); + } + + [Fact] + public async Task GetDerivedResourceTypes_WithResourceBaseType_ReturnsAllResourceTypes() + { + // Arrange - Create a search parameter that applies to the base Resource type + var searchParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Resource-id", + baseResourceTypes: new List { KnownResourceTypes.Resource }, + resourceType: KnownResourceTypes.Resource); + + var searchParamStatus = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + // Get all resource types from the model info provider + var allResourceTypes = ModelInfoProvider.Instance.GetResourceTypeNames().ToList(); + + // Set up search results to return data for all resource types + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(x => + { + // Parse the resource type from the query parameters + var queryParams = x.ArgAt>>(0); + var resourceTypeParam = queryParams.FirstOrDefault(p => p.Item1 == KnownQueryParameterNames.Type); + var resourceType = resourceTypeParam?.Item2 ?? "Patient"; + + return CreateSearchResult( + resourceCount: 10, + resourceType: resourceType); + }); _searchService.GetSurrogateIdRanges( Arg.Any(), @@ -158,262 +343,1692 @@ public async Task GivenSupportedParams_WhenExecuted_ThenCorrectSearchIsPerformed Arg.Any(), Arg.Any(), Arg.Any(), - Arg.Any()) - .Returns(new List<(long, long)> - { - (startResourceSurrogateId, endResourceSurrogateId), - }); + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 10) })); - // Get one search parameter and configure it such that it needs to be reindexed - var param = new SearchParameterInfo( - "Account", - "status", - ValueSets.SearchParamType.Token, - url: new Uri("http://hl7.org/fhir/SearchParameter/Account-status"), - baseResourceTypes: new List() { "Account" }) // Fix: Only include valid resource types - { - IsSearchable = true, - SearchParameterStatus = SearchParameterStatus.Supported, // This parameter needs reindexing - }; + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); - // Create search parameter status that indicates it needs reindexing - ReadOnlyCollection status = new ReadOnlyCollection(new List() - { - new ResourceSearchParameterStatus() - { - LastUpdated = DateTime.UtcNow, - Uri = new Uri("http://hl7.org/fhir/SearchParameter/Account-status"), - Status = SearchParameterStatus.Supported, - }, - }); - var searchParameterStatusmanager = Substitute.For(); - searchParameterStatusmanager.GetAllSearchParameterStatus(Arg.Any()).Returns(status); + // Act + _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); - // Set up the search definition manager properly - var searchDefinitionManager = Substitute.For(); - searchDefinitionManager.AllSearchParameters.Returns(new List { param }); + // Wait for processing jobs to be created - expect many jobs since all resource types should be included + var processingJobs = await WaitForJobsAsync( + jobInfo.GroupId, + TimeSpan.FromSeconds(60), + expectedMinimumJobs: Math.Min(10, allResourceTypes.Count)); // Expect at least 10 different resource types - // Mock GetSearchParameters to return the Account-status parameter for "Account" resource type - searchDefinitionManager.GetSearchParameters("Account").Returns(new List { param }); + // Assert + Assert.True(processingJobs.Count > 0, "Processing jobs should have been created"); - // Mock GetSearchParameter to return the specific parameter by URL - searchDefinitionManager.GetSearchParameter("http://hl7.org/fhir/SearchParameter/Account-status").Returns(param); + // Get unique resource types from the processing jobs + var resourceTypesInJobs = processingJobs + .Select(j => JsonConvert.DeserializeObject(j.Definition)) + .Select(def => def.ResourceType) + .Distinct() + .ToList(); - // Create mock dependencies for ReindexProcessingJob - var fhirDataStore = Substitute.For(); - var resourceWrapperFactory = Substitute.For(); + // Verify that a significant number of resource types were included + // Since Resource is the base type, we should have jobs for MANY resource types + Assert.True( + resourceTypesInJobs.Count >= 10, + $"Should have jobs for at least 10 different resource types when base type is Resource. Found {resourceTypesInJobs.Count}: {string.Join(", ", resourceTypesInJobs)}"); - // Set up the factory for ReindexProcessingJob - _reindexProcessingJobTaskFactory = () => new ReindexProcessingJob( - () => _searchService.CreateMockScope(), - () => fhirDataStore.CreateMockScope(), - resourceWrapperFactory, - _searchParameterOperations, - NullLogger.Instance); + // Verify that both DomainResource types AND non-DomainResource types are included + Assert.Contains("Patient", resourceTypesInJobs); // DomainResource descendant + Assert.Contains("Observation", resourceTypesInJobs); // DomainResource descendant + + // These should be included since Resource is the base type (unlike DomainResource which excludes them) + var hasAnyNonDomainResource = resourceTypesInJobs.Any(rt => + rt == "Binary" || rt == "Bundle" || rt == "Parameters"); + + Assert.True( + hasAnyNonDomainResource, + $"When base type is Resource, should include non-DomainResource types (Binary, Bundle, Parameters). Found: {string.Join(", ", resourceTypesInJobs)}"); + + // Log the actual resource types found + var expectedResourceTypes = allResourceTypes.Count; + var foundResourceTypes = resourceTypesInJobs.Count; + } - var reindexOrchestratorJobTaskFactory = () => + [Fact] + public async Task GetDerivedResourceTypes_WithDomainResourceBaseType_ReturnsApplicableResourceTypes() + { + // Arrange - Create a search parameter that applies to DomainResource + var searchParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/DomainResource-text", + baseResourceTypes: new List { KnownResourceTypes.DomainResource }, + resourceType: KnownResourceTypes.DomainResource); + + var searchParamStatus = new ResourceSearchParameterStatus { - // Create a mock CoreFeatureConfiguration for the test - var coreFeatureConfig = Substitute.For>(); - coreFeatureConfig.Value.Returns(new CoreFeatureConfiguration - { - SearchParameterCacheRefreshIntervalSeconds = 1, // Use a short interval for tests - }); + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam }); + + // Mock SearchParameterHashMap for RefreshSearchParameterCache + var paramHashMap = new Dictionary { { "Patient", "patientHash" } }; + _searchDefinitionManager.SearchParameterHashMap + .Returns(paramHashMap); - // Create a mock OperationsConfiguration for the test - var operationsConfig = Substitute.For>(); - operationsConfig.Value.Returns(new OperationsConfiguration + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + // Set up search results for various resource types + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(x => { - Reindex = new ReindexJobConfiguration - { - ReindexDelayMultiplier = 1, // Use a short multiplier for tests - }, + // Parse the resource type from the query parameters + var queryParams = x.ArgAt>>(0); + var resourceTypeParam = queryParams.FirstOrDefault(p => p.Item1 == KnownQueryParameterNames.Type); + var resourceType = resourceTypeParam?.Item2 ?? "Patient"; + + // Return count based on whether it's a DomainResource type + var isDomainResourceType = resourceType != "Binary" && + resourceType != "Bundle" && + resourceType != "Parameters"; + + return CreateSearchResult( + resourceCount: isDomainResourceType ? 10 : 0, + resourceType: resourceType); }); - return new ReindexOrchestratorJob( - _queueClient, - () => _searchService.CreateMockScope(), - searchDefinitionManager, - ModelInfoProvider.Instance, - searchParameterStatusmanager, - _searchParameterOperations, - fhirRuntimeConfiguration, - NullLoggerFactory.Instance, - coreFeatureConfig, - operationsConfig); - }; + _searchService.GetSurrogateIdRanges( + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 10) })); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + + // Wait for processing jobs to be created + var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(60), expectedMinimumJobs: 1); + + // Assert + Assert.True(processingJobs.Count > 0, "Processing jobs should have been created"); + + // Get unique resource types from the processing jobs + var resourceTypesInJobs = processingJobs + .Select(j => JsonConvert.DeserializeObject(j.Definition)) + .Select(def => def.ResourceType) + .Distinct() + .ToList(); - var expectedResourceType = "Account"; // Fix: Use the actual resource type + // Verify that multiple resource types were included + Assert.True(resourceTypesInJobs.Count >= 3, $"Should have jobs for multiple resource types derived from DomainResource. Found: {string.Join(", ", resourceTypesInJobs)}"); - // Fix: Create a proper ReindexJobRecord (this was the main issue) - ReindexJobRecord job = new ReindexJobRecord( - new Dictionary() { { "Account", resourceTypeHash } }, // Resource type hash map - new List(), // No specific target resource types (will process all applicable) - new List(), // No specific target search parameter types - new List(), // No specific search parameter resource types - maxNumberOfResourcesPerQuery); // Max resources per query + // Verify that non-DomainResource types were excluded + Assert.DoesNotContain("Binary", resourceTypesInJobs); + Assert.DoesNotContain("Bundle", resourceTypesInJobs); + Assert.DoesNotContain("Parameters", resourceTypesInJobs); + } - JobInfo jobInfo = new JobInfo() + [Fact] + public async Task GetResourceCountForQueryAsync_WithValidQuery_ReturnsSearchResult() + { + // Arrange + var searchParam = CreateSearchParameterInfo(); + var searchParamStatus = new ResourceSearchParameterStatus { - Id = 1, - Definition = JsonConvert.SerializeObject(job), - QueueType = (byte)QueueType.Reindex, - GroupId = 1, - CreateDate = DateTime.UtcNow, - Status = JobStatus.Running, + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, }; - var mockSearchResult = new SearchResult(_mockedSearchCount, new List>()); - mockSearchResult.ReindexResult = new SearchResultReindex() + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam }); + + _searchDefinitionManager.GetSearchParameters("Patient") + .Returns(new List { searchParam }); + + _searchDefinitionManager.GetSearchParameter(searchParam.Url.OriginalString) + .Returns(searchParam); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + // Mock SearchParameterHashMap for RefreshSearchParameterCache + // IMPORTANT: Return a concrete Dictionary instance, not a mock + var paramHashMap = new Dictionary { - Count = _mockedSearchCount, - StartResourceSurrogateId = startResourceSurrogateId, - EndResourceSurrogateId = endResourceSurrogateId, + { "Patient", "patientHash" }, + { "Observation", "observationHash" }, + { "Condition", "conditionHash" }, }; - // Setup search result for count queries + // Return the concrete dictionary as IReadOnlyDictionary + _searchDefinitionManager.SearchParameterHashMap + .Returns(paramHashMap); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + // Create a search result with 100 resources + var searchResult = CreateSearchResult( + resourceCount: 100, + startSurrogateId: 1, + endSurrogateId: 100, + resourceType: "Patient"); + _searchService.SearchForReindexAsync( - Arg.Is>>(l => l.Any(t => t.Item1 == "_count") && l.Any(t => t.Item1 == "_type" && t.Item2 == expectedResourceType)), + Arg.Any>>(), Arg.Any(), - true, + Arg.Any(), Arg.Any(), - true). - Returns(mockSearchResult); + Arg.Any()) + .Returns(searchResult); - var defaultSearchResult = new SearchResult(0, new List>()); - _searchService.SearchForReindexAsync( - Arg.Is>>(l => l.Any(t => t.Item1 == "_count") && l.Any(t => t.Item1 == "_type" && t.Item2 != expectedResourceType)), - Arg.Any(), - true, + _searchService.GetSurrogateIdRanges( + "Patient", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), Arg.Any(), - true). - Returns(defaultSearchResult); + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 100) })); - // Setup search result for processing queries (non-count) - _searchService.SearchForReindexAsync( + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + + // Wait for processing jobs to be created + var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 1); + + // Assert - Verify that SearchForReindexAsync was called + await _searchService.Received().SearchForReindexAsync( Arg.Any>>(), Arg.Any(), - false, + Arg.Any(), Arg.Any(), - true). - Returns(CreateSearchResult(resourceType: resourceTypeHash)); + Arg.Any()); - // Execute the orchestrator job - var orchestratorTask = reindexOrchestratorJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken); + // Assert - Verify processing jobs were created + Assert.True(processingJobs.Count > 0, "Processing jobs should have been created based on search results"); - // Simulate processing job execution by running them in parallel - _ = Task.Run( - async () => - { - await Task.Delay(5000, _cancellationToken); // Give orchestrator time to create jobs + // Assert - Verify the job definitions contain the correct resource counts + var firstJob = processingJobs.First(); + var jobDef = JsonConvert.DeserializeObject(firstJob.Definition); - // Get all processing jobs created by the orchestrator - var processingJobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, jobInfo.GroupId, true, _cancellationToken); - var childJobs = processingJobs.Where(j => j.Id != jobInfo.Id).ToList(); + Assert.NotNull(jobDef); + Assert.Equal("Patient", jobDef.ResourceType); + Assert.NotNull(jobDef.ResourceCount); + Assert.True(jobDef.ResourceCount.Count > 0, "Resource count should be greater than 0"); + Assert.Equal(1, jobDef.ResourceCount.StartResourceSurrogateId); + Assert.Equal(100, jobDef.ResourceCount.EndResourceSurrogateId); - // Execute each processing job - foreach (var childJob in childJobs) - { - try - { - childJob.Status = JobStatus.Running; - var result = await _reindexProcessingJobTaskFactory().ExecuteAsync(childJob, _cancellationToken); - childJob.Status = JobStatus.Completed; - childJob.Result = result; - await _queueClient.CompleteJobAsync(childJob, false, _cancellationToken); - } - catch (Exception ex) - { - childJob.Status = JobStatus.Failed; - childJob.Result = JsonConvert.SerializeObject(new { Error = ex.Message }); - await _queueClient.CompleteJobAsync(childJob, false, _cancellationToken); - } - } - }, - _cancellationToken); + // Assert - Verify search parameter hash is set + Assert.NotNull(jobDef.ResourceTypeSearchParameterHashMap); + Assert.Equal("patientHash", jobDef.ResourceTypeSearchParameterHashMap); - // Wait for orchestrator job to complete - var jobResult = JsonConvert.DeserializeObject(await orchestratorTask); + // Assert - Verify search parameter URLs are included + Assert.NotNull(jobDef.SearchParameterUrls); + Assert.Contains(searchParam.Url.OriginalString, jobDef.SearchParameterUrls); + } - // Verify search for count was called - await _searchService.Received().SearchForReindexAsync( + [Fact] + public async Task EnqueueQueryProcessingJobsAsync_WithValidSearchParameters_CreatesProcessingJobs() + { + var searchParam = CreateSearchParameterInfo(); + var searchParamStatus = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam }); + + _searchDefinitionManager.GetSearchParameters("Patient") + .Returns(new List { searchParam }); + + _searchDefinitionManager.GetSearchParameter(searchParam.Url.OriginalString) + .Returns(searchParam); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var searchResultWithData = CreateSearchResult(resourceCount: 250); + _searchService.SearchForReindexAsync( Arg.Any>>(), Arg.Any(), - Arg.Is(true), + Arg.Any(), Arg.Any(), - true); + Arg.Any()) + .Returns(searchResultWithData); - // Verify specific search for Account resource type - await _searchService.Received().SearchForReindexAsync( - Arg.Is>>(l => l.Any(t => t.Item1 == "_count") && l.Any(t => t.Item1 == "_type" && t.Item2 == expectedResourceType)), + var ranges = new List<(long, long)> + { + (1, 100), + (101, 200), + (201, 250), + }; + _searchService.GetSurrogateIdRanges( Arg.Any(), - true, + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), Arg.Any(), - true); + Arg.Any()) + .Returns(ranges); - var reindexJobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, 1, true, _cancellationToken); - var processingJob = reindexJobs.FirstOrDefault(j => j.Id != jobInfo.Id); + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob( + new AzureHealthDataServicesRuntimeConfiguration(), + searchParameterCacheRefreshIntervalSeconds: 0); - Assert.NotNull(processingJob); - Assert.Equal(JobStatus.Completed, processingJob.Status); + // Act: Fire off execute asynchronously without awaiting + var executeTask = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); - var processingJobDefinition = JsonConvert.DeserializeObject(processingJob.Definition); - Assert.Equal(_mockedSearchCount, processingJobDefinition.ResourceCount.Count); - Assert.Equal(expectedResourceType, processingJobDefinition.ResourceType); - Assert.Contains(param.Url.ToString(), processingJobDefinition.SearchParameterUrls); + // Assert: Check that processing jobs were created + var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 1); + Assert.True(processingJobs.Count > 0, "Processing jobs should have been enqueued"); + + // Cancel the orchestrator job to stop it from waiting for completion + await _queueClient.CancelJobByGroupIdAsync((byte)QueueType.Reindex, jobInfo.GroupId, _cancellationToken); + + // Now safely await the result + var result = await executeTask; + var jobResult = JsonConvert.DeserializeObject(result); + + Assert.NotNull(jobResult); } - [Fact(Skip = "Causing random timeouts")] - public async Task GivenNoSupportedParams_WhenExecuted_ThenJobCompletesWithNoWork() + [Fact] + public async Task UpdateSearchParameterStatus_WithPendingDisableStatus_UpdatesToDisabled() { - var job = CreateReindexJobRecord(); + // Arrange + var searchParam = CreateSearchParameterInfo(); + var searchParamStatus = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam.Url.OriginalString), + Status = SearchParameterStatus.PendingDisable, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var emptySearchResult = new SearchResult(0, new List>()); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(emptySearchResult); - _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(job.ResourceTypeSearchParameterHashMap.First().Value); + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); - JobInfo jobInfo = new JobInfo() + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync( + Arg.Is>(l => l.Contains(searchParam.Url.ToString())), + SearchParameterStatus.Disabled, + Arg.Any()); + } + + [Fact] + public async Task GivenNoSupportedParams_WhenExecuted_ThenJobCompletesWithNoWork() + { + // Arrange + var searchParam = CreateSearchParameterInfo(); + var searchParamStatus = new ResourceSearchParameterStatus { - Id = 3, - Definition = JsonConvert.SerializeObject(job), - QueueType = (byte)QueueType.Reindex, - GroupId = 3, - CreateDate = DateTime.UtcNow, - Status = JobStatus.Running, + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam.Url.OriginalString), + Status = SearchParameterStatus.PendingDelete, }; - var result = JsonConvert.DeserializeObject(await _reindexOrchestratorJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken)); - Assert.Equal("Nothing to process. Reindex complete.", result.Error.First().Diagnostics); - var jobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, jobInfo.GroupId, false, _cancellationToken); - Assert.False(jobs.Any()); + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var emptySearchResult = new SearchResult(0, new List>()); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(emptySearchResult); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync( + Arg.Is>(l => l.Contains(searchParam.Url.ToString())), + SearchParameterStatus.Deleted, + Arg.Any()); } - private SearchResult CreateSearchResult(string resourceType, string continuationToken = null, int resourceCount = 1) + [Fact] + public async Task UpdateSearchParameterStatus_WithNullReadySearchParameters_ReturnsEarly() { - var resultList = new List(); + // Arrange + var emptyStatus = new ReadOnlyCollection( + new List()); - for (var i = 0; i < resourceCount; i++) - { - var wrapper = Substitute.For(); - var propertyInfo = wrapper.GetType().GetProperty("ResourceTypeName"); - propertyInfo.SetValue(wrapper, resourceType); + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(emptyStatus); - var entry = new SearchResultEntry(wrapper); + _searchDefinitionManager.AllSearchParameters + .Returns(new List()); - resultList.Add(entry); - } + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); - var searchResult = new SearchResult(resultList, continuationToken, null, new List>()); - searchResult.MaxResourceSurrogateId = 1; - searchResult.TotalCount = resultList.Count; - return searchResult; + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + await _searchParameterStatusManager.DidNotReceive().UpdateSearchParameterStatusAsync( + Arg.Any>(), + Arg.Any(), + Arg.Any()); } - private ReindexJobRecord CreateReindexJobRecord(uint maxResourcePerQuery = 100, IReadOnlyDictionary paramHashMap = null) + [Fact] + public async Task GetValidSearchParameterUrlsForResourceType_WithValidParameters_ReturnsFilteredUrls() { - if (paramHashMap == null) + // Arrange + var patientNameParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-name", + baseResourceTypes: new List { "Patient" }, + resourceType: "Patient"); + + var patientBirthdateParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-birthdate", + baseResourceTypes: new List { "Patient" }, + resourceType: "Patient"); + + // Create a third parameter that should be filtered OUT + // (not valid for Patient even though it's in the reindex job) + var observationCodeParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Observation-code", + baseResourceTypes: new List { "Observation" }, + resourceType: "Observation"); + + var searchParamStatus1 = new ResourceSearchParameterStatus { - paramHashMap = new Dictionary() { { "Patient", "patientHash" } }; + LastUpdated = DateTime.UtcNow, + Uri = new Uri(patientNameParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(patientBirthdateParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus3 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(observationCodeParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2, searchParamStatus3 }); + + // ALL three parameters are in the reindex job + _searchDefinitionManager.AllSearchParameters + .Returns(new List { patientNameParam, patientBirthdateParam, observationCodeParam }); + + // GetSearchParameters("Patient") returns ONLY the Patient parameters + // This is the first level of filtering + _searchDefinitionManager.GetSearchParameters("Patient") + .Returns(new List { patientNameParam, patientBirthdateParam }); + + _searchDefinitionManager.GetSearchParameter(Arg.Any()) + .Returns(x => x[0] switch + { + "http://hl7.org/fhir/SearchParameter/Patient-name" => patientNameParam, + "http://hl7.org/fhir/SearchParameter/Patient-birthdate" => patientBirthdateParam, + "http://hl7.org/fhir/SearchParameter/Observation-code" => observationCodeParam, + _ => throw new SearchParameterNotSupportedException("Not found"), + }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var searchResult = CreateSearchResult(resourceCount: 10); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(searchResult); + + _searchService.GetSurrogateIdRanges( + "Patient", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 10) })); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + + // Wait for processing jobs to be created + var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 1); + + // Assert - Verify jobs were created + Assert.True(processingJobs.Count > 0, "Processing jobs should have been created"); + + var firstJob = processingJobs.First(); + var jobDef = JsonConvert.DeserializeObject(firstJob.Definition); + + // Assert - Verify SearchParameterUrls exist and are filtered correctly + Assert.NotNull(jobDef.SearchParameterUrls); + + // Assert - Verify BOTH Patient parameters are included + Assert.Contains(patientNameParam.Url.OriginalString, jobDef.SearchParameterUrls); + Assert.Contains(patientBirthdateParam.Url.OriginalString, jobDef.SearchParameterUrls); + + // Assert - Verify Observation parameter is EXCLUDED (this is the key filtering behavior!) + Assert.DoesNotContain(observationCodeParam.Url.OriginalString, jobDef.SearchParameterUrls); + + // Assert - Verify exactly 2 parameters (the two Patient parameters) + Assert.Equal(2, jobDef.SearchParameterUrls.Count); + + // Assert - Verify the job is for Patient resource type + Assert.Equal("Patient", jobDef.ResourceType); + } + + [Fact] + public async Task GetValidSearchParameterUrlsForResourceType_WithExceptionInValidation_UsesFallback() + { + // Arrange + var searchParam1 = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-name", + resourceType: "Patient"); + + var searchParam2 = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-birthdate", + resourceType: "Patient"); + + var searchParamStatus1 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam1.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam2.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2 }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam1, searchParam2 }); + + // Simulate an exception when trying to get search parameters for Patient + // This triggers the fallback behavior + _searchDefinitionManager.GetSearchParameters("Patient") + .Returns(x => throw new Exception("Validation error")); + + // The fallback should still allow GetSearchParameter to work for individual URLs + _searchDefinitionManager.GetSearchParameter(Arg.Any()) + .Returns(x => x[0] switch + { + "http://hl7.org/fhir/SearchParameter/Patient-name" => searchParam1, + "http://hl7.org/fhir/SearchParameter/Patient-birthdate" => searchParam2, + _ => throw new SearchParameterNotSupportedException("Not found"), + }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var searchResult = CreateSearchResult(resourceCount: 10); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(searchResult); + + _searchService.GetSurrogateIdRanges( + "Patient", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 10) })); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + + // Wait for processing jobs to be created + var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 1); + + // Assert - Verify that jobs were still created despite the exception + Assert.True(processingJobs.Count > 0, "Processing jobs should have been created even when validation throws exception"); + + // Assert - Verify that the fallback behavior was used + // The fallback returns ALL search parameters from _reindexJobRecord.SearchParams + var firstJob = processingJobs.First(); + var jobDef = JsonConvert.DeserializeObject(firstJob.Definition); + + Assert.NotNull(jobDef.SearchParameterUrls); + + // The fallback should include ALL search parameters from the reindex job + // Since the exception occurred, it should use _reindexJobRecord.SearchParams.ToList() + Assert.Contains(searchParam1.Url.OriginalString, jobDef.SearchParameterUrls); + Assert.Contains(searchParam2.Url.OriginalString, jobDef.SearchParameterUrls); + + // Verify that both parameters are present (the fallback includes all) + Assert.Equal(2, jobDef.SearchParameterUrls.Count); + } + + [Fact] + public async Task CheckJobRecordForAnyWork_WithZeroResources_ReturnsErrorIndicatingNoWork() + { + // Arrange + var emptyStatus = new ReadOnlyCollection( + new List()); + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(emptyStatus); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List()); + + var jobInfo = await CreateReindexJobRecord(); + + // Modify the job record to have zero resources + var jobRecord = JsonConvert.DeserializeObject(jobInfo.Definition); + jobRecord.Count = 0; + jobRecord.ResourceCounts.Clear(); + jobInfo.Definition = JsonConvert.SerializeObject(jobRecord); + + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert - Verify the observable behavior when CheckJobRecordForAnyWork returns false + Assert.NotNull(jobResult); + Assert.NotNull(jobResult.Error); + Assert.True(jobResult.Error.Count > 0, "Should have at least one error"); + + // Verify the error indicates no work to process - checking multiple possible error messages + var hasNoWorkError = jobResult.Error.Any(e => + e.Diagnostics != null && e.Diagnostics.Contains("Nothing to process", StringComparison.OrdinalIgnoreCase)); + + Assert.True(hasNoWorkError, $"Expected error indicating no resources to reindex. Actual errors: {string.Join(", ", jobResult.Error.Select(e => e.Diagnostics))}"); + } + + [Fact] + public async Task ProcessCompletedJobs_WithFailedJobs_HandlesFailed() + { + // Arrange - Create multiple search parameters + var patientNameParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-name", + resourceType: "Patient"); + + var patientBirthdateParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-birthdate", + resourceType: "Patient"); + + var observationCodeParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Observation-code", + baseResourceTypes: new List { "Observation" }, + resourceType: "Observation"); + + var searchParamStatus1 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(patientNameParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(patientBirthdateParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus3 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(observationCodeParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2, searchParamStatus3 }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { patientNameParam, patientBirthdateParam, observationCodeParam }); + + _searchDefinitionManager.GetSearchParameters("Patient") + .Returns(new List { patientNameParam, patientBirthdateParam }); + + _searchDefinitionManager.GetSearchParameters("Observation") + .Returns(new List { observationCodeParam }); + + _searchDefinitionManager.GetSearchParameter(Arg.Any()) + .Returns(x => x[0] switch + { + "http://hl7.org/fhir/SearchParameter/Patient-name" => patientNameParam, + "http://hl7.org/fhir/SearchParameter/Patient-birthdate" => patientBirthdateParam, + "http://hl7.org/fhir/SearchParameter/Observation-code" => observationCodeParam, + _ => throw new SearchParameterNotSupportedException("Not found"), + }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + // Return different results for different resource types + _searchService.SearchForReindexAsync( + Arg.Is>>(l => l.Any(t => t.Item2 == "Patient")), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(CreateSearchResult(resourceCount: 100, resourceType: "Patient")); + + _searchService.SearchForReindexAsync( + Arg.Is>>(l => l.Any(t => t.Item2 == "Observation")), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(CreateSearchResult(resourceCount: 50, resourceType: "Observation")); + + _searchService.GetSurrogateIdRanges( + "Patient", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 100) })); + + _searchService.GetSurrogateIdRanges( + "Observation", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 50) })); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act - Start the orchestrator + _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + + // Wait for processing jobs to be created + var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 2); + + // Assert - Verify jobs were created + Assert.True(processingJobs.Count > 0, "Processing jobs should have been created"); + + // **SIMULATE JOB FAILURE** - Find and fail Patient jobs specifically + var patientJobs = processingJobs + .Where(j => + { + try + { + var jobDef = JsonConvert.DeserializeObject(j.Definition); + return jobDef.ResourceType == "Patient"; + } + catch (JsonException) + { + // Skip jobs that can't be deserialized (e.g., orchestrator job) + return false; + } + }) + .ToList(); + + Assert.True(patientJobs.Count > 0, "Should have at least one Patient processing job to fail"); + + var jobToFail = patientJobs.First(); + + var jobDef = JsonConvert.DeserializeObject(jobToFail.Definition); + + // Create a failed result + var failedResult = new ReindexProcessingJobResult + { + SucceededResourceCount = 0, + FailedResourceCount = (int)jobDef.ResourceCount.Count, + }; + + jobToFail.Status = JobStatus.Failed; + jobToFail.Result = JsonConvert.SerializeObject(failedResult); + + // Update the job in the queue to mark it as failed + await _queueClient.CompleteJobAsync(jobToFail, false, _cancellationToken); + + // Wait a bit for the orchestrator to process the failed jobs + await Task.Delay(TimeSpan.FromSeconds(2), _cancellationToken); + + // Get the updated jobs + var updatedJobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, jobInfo.GroupId, true, _cancellationToken); + var failedJobs = updatedJobs.Where(j => j.Status == JobStatus.Failed && j.Id != jobInfo.Id).ToList(); + var succeededJobs = updatedJobs.Where(j => j.Status == JobStatus.Completed && j.Id != jobInfo.Id).ToList(); + + // Assert - Verify failed jobs exist + Assert.True(failedJobs.Count > 0, "Should have at least one failed job"); + + // Assert - Verify the failed job is the one we failed + var failedJobDef = JsonConvert.DeserializeObject(failedJobs.First().Definition); + Assert.Equal("Patient", failedJobDef.ResourceType); + + // Assert - Verify search parameters from failed jobs should NOT be marked as ready + // The orchestrator's ProcessCompletedJobs should: + // 1. Remove Patient search parameters from the ready list (because Patient job failed) + // 2. Keep Observation search parameters in the ready list (if Observation jobs succeeded) + + // Verify that Patient search parameters were NOT updated to Enabled + await _searchParameterStatusManager.DidNotReceive().UpdateSearchParameterStatusAsync( + Arg.Is>(l => l.Contains(patientNameParam.Url.ToString())), + SearchParameterStatus.Enabled, + Arg.Any()); + + await _searchParameterStatusManager.DidNotReceive().UpdateSearchParameterStatusAsync( + Arg.Is>(l => l.Contains(patientBirthdateParam.Url.ToString())), + SearchParameterStatus.Enabled, + Arg.Any()); + + // If we have succeeded Observation jobs, those parameters should be updated + if (succeededJobs.Any(j => + { + var def = JsonConvert.DeserializeObject(j.Definition); + return def.ResourceType == "Observation"; + })) + { + await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync( + Arg.Is>(l => l.Contains(observationCodeParam.Url.ToString())), + SearchParameterStatus.Enabled, + Arg.Any()); } - return new ReindexJobRecord(paramHashMap, new List(), new List(), new List(), maxResourcePerQuery); + // Assert - Verify the orchestrator logged the failures + // The result should contain error information about failed resources + var orchestratorJob = updatedJobs.FirstOrDefault(j => j.Id == jobInfo.Id); + if (orchestratorJob?.Result != null) + { + var orchestratorResult = JsonConvert.DeserializeObject(orchestratorJob.Result); + + // Failed jobs should be counted in FailedResources + Assert.True(orchestratorResult.FailedResources > 0, $"Expected FailedResources > 0, but got {orchestratorResult.FailedResources}"); + + // Error information should be present + Assert.NotNull(orchestratorResult.Error); + Assert.True(orchestratorResult.Error.Count > 0, "Should have error information about failed jobs"); + } + } + + [Fact] + public async Task CalculateTotalCount_WithMultipleResourceTypes_CountsAllResources() + { + // Create search parameters for multiple resource types + var patientParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-name", + resourceType: "Patient"); + + var observationParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Observation-code", + baseResourceTypes: new List { "Observation" }, + resourceType: "Observation"); + + var conditionParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Condition-code", + baseResourceTypes: new List { "Condition" }, + resourceType: "Condition"); + + var searchParamStatus1 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(patientParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(observationParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus3 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(conditionParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2, searchParamStatus3 }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { patientParam, observationParam, conditionParam }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + // Return different counts for different resource types + _searchService.SearchForReindexAsync( + Arg.Is>>(l => + l.Any(t => t.Item2 == "Patient")), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(CreateSearchResult(resourceCount: 30, resourceType: "Patient")); + + _searchService.SearchForReindexAsync( + Arg.Is>>(l => + l.Any(t => t.Item2 == "Observation")), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(CreateSearchResult(resourceCount: 25, resourceType: "Observation")); + + _searchService.SearchForReindexAsync( + Arg.Is>>(l => + l.Any(t => t.Item2 == "Condition")), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(CreateSearchResult(resourceCount: 20, resourceType: "Condition")); + + _searchService.GetSurrogateIdRanges( + "Patient", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 30) })); + + _searchService.GetSurrogateIdRanges( + "Observation", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 25) })); + + _searchService.GetSurrogateIdRanges( + "Condition", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 20) })); + + // Arrange + var jobRecord = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + _ = orchestrator.ExecuteAsync(jobRecord, _cancellationToken); + + // Assert - Verify the jobs created and their ResourceCount.Count + var processingJobs = await WaitForJobsAsync(jobRecord.GroupId, TimeSpan.FromSeconds(60), expectedMinimumJobs: 3); + + // Verify processing jobs were created + Assert.True(processingJobs.Count > 0, "Processing jobs should have been created"); + + // Group jobs by resource type and verify counts + var jobsByResourceType = processingJobs + .Select(j => JsonConvert.DeserializeObject(j.Definition)) + .GroupBy(def => def.ResourceType) + .ToDictionary(g => g.Key, g => g.Sum(def => (int)def.ResourceCount.Count)); + + // Verify we have jobs for all three resource types + Assert.True(jobsByResourceType.ContainsKey("Patient"), "Should have Patient jobs"); + Assert.True(jobsByResourceType.ContainsKey("Observation"), "Should have Observation jobs"); + Assert.True(jobsByResourceType.ContainsKey("Condition"), "Should have Condition jobs"); + + // Verify each resource type has the correct total count + Assert.Equal(30, jobsByResourceType["Patient"]); + Assert.Equal(25, jobsByResourceType["Observation"]); + Assert.Equal(20, jobsByResourceType["Condition"]); + + // Verify the total matches the sum of all resource types + var totalFromJobs = jobsByResourceType.Values.Sum(); + Assert.Equal(75, totalFromJobs); + } + + [Fact] + public async Task ProcessCompletedJobsAndDetermineReadiness_WithReadyParameters_ReturnsReadyList() + { + // Arrange - Create search parameters for Patient + var patientNameParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-name", + resourceType: "Patient"); + + var patientBirthdateParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-birthdate", + resourceType: "Patient"); + + var searchParamStatus1 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(patientNameParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(patientBirthdateParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2 }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { patientNameParam, patientBirthdateParam }); + + _searchDefinitionManager.GetSearchParameters("Patient") + .Returns(new List { patientNameParam, patientBirthdateParam }); + + _searchDefinitionManager.GetSearchParameter(Arg.Any()) + .Returns(x => x[0] switch + { + "http://hl7.org/fhir/SearchParameter/Patient-name" => patientNameParam, + "http://hl7.org/fhir/SearchParameter/Patient-birthdate" => patientBirthdateParam, + _ => throw new SearchParameterNotSupportedException("Not found"), + }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + // Track whether jobs have been marked complete to change mock behavior + bool jobsCompleted = false; + + // Create search results with actual resources to trigger job creation + // BUT after jobs complete, return zero resources to simulate successful reindexing + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(callInfo => + { + // If jobs have been completed, return zero resources (successful reindex) + if (jobsCompleted) + { + return new SearchResult(0, new List>()); + } + + // Otherwise, return resources that need reindexing + return CreateSearchResult(resourceCount: 100, resourceType: "Patient"); + }); + + _searchService.GetSurrogateIdRanges( + "Patient", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 100) })); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act - Start the orchestrator + var executeTask = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + + // Wait for processing jobs to be created + var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 1); + Assert.True(processingJobs.Count > 0, "Processing jobs should have been created"); + + // Verify the created jobs have SearchParameterUrls in their definitions + foreach (var job in processingJobs) + { + var jobDef = JsonConvert.DeserializeObject(job.Definition); + Assert.NotNull(jobDef.SearchParameterUrls); + Assert.Contains(patientNameParam.Url.ToString(), jobDef.SearchParameterUrls); + Assert.Contains(patientBirthdateParam.Url.ToString(), jobDef.SearchParameterUrls); + } + + // Mark all jobs as completed to simulate successful reindexing + foreach (var job in processingJobs) + { + var jobDef = JsonConvert.DeserializeObject(job.Definition); + + var succeededResult = new ReindexProcessingJobResult + { + SucceededResourceCount = (int)jobDef.ResourceCount.Count, + FailedResourceCount = 0, + SearchParameterUrls = jobDef.SearchParameterUrls, + }; + + job.Status = JobStatus.Completed; + job.Result = JsonConvert.SerializeObject(succeededResult); + + await _queueClient.CompleteJobAsync(job, false, _cancellationToken); + } + + // Now that jobs are completed, change mock to return zero resources + // This simulates that reindexing was successful and no resources remain + jobsCompleted = true; + + // Assert - Verify search parameters were marked as ready (updated to Enabled) + // Retry every 1 second for up to 15 seconds until the method is called + var retryCount = 0; + var maxRetries = 15; + var retryInterval = TimeSpan.FromSeconds(1); + var receivedCall = false; + + while (retryCount < maxRetries && !receivedCall) + { + try + { + await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync( + Arg.Is>(l => + l.Contains(patientNameParam.Url.ToString()) || + l.Contains(patientBirthdateParam.Url.ToString())), + SearchParameterStatus.Enabled, + Arg.Any()); + + receivedCall = true; + } + catch (NSubstitute.Exceptions.ReceivedCallsException) + { + retryCount++; + if (retryCount < maxRetries) + { + await Task.Delay(retryInterval, _cancellationToken); + } + } + } + + Assert.True(receivedCall, $"UpdateSearchParameterStatusAsync was not called after {maxRetries} retries over {maxRetries} seconds. The orchestrator may not have processed the completed jobs yet."); + + // Wait for execution to complete + try + { + await executeTask; + } + catch (OperationCanceledException) + { + // Expected + } + + // Assert - Verify all jobs completed successfully + var allJobs = await _queueClient.GetJobByGroupIdAsync((byte)QueueType.Reindex, jobInfo.GroupId, true, _cancellationToken); + var completedJobs = allJobs.Where(j => j.Status == JobStatus.Completed && j.Id != jobInfo.Id).ToList(); + + Assert.True(completedJobs.Count > 0, "Should have completed jobs"); + Assert.All(completedJobs, job => + { + var def = JsonConvert.DeserializeObject(job.Definition); + var result = JsonConvert.DeserializeObject(job.Result); + + Assert.True(result.SucceededResourceCount > 0, "Job should have succeeded resources"); + Assert.Equal(0, result.FailedResourceCount); + Assert.NotNull(result.SearchParameterUrls); + }); + + // Assert - Verify orchestrator result shows success + var orchestratorJob = allJobs.FirstOrDefault(j => j.Id == jobInfo.Id); + if (orchestratorJob?.Result != null) + { + var orchestratorResult = JsonConvert.DeserializeObject(orchestratorJob.Result); + + Assert.True(orchestratorResult.SucceededResources > 0, "Should have succeeded resources"); + Assert.Equal(0, orchestratorResult.FailedResources); + } + } + + [Fact] + public async Task HandleException_LogsAndReturnsError() + { + // Arrange + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(Task.FromException>( + new SystemException("Critical error"))); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + Assert.NotNull(jobResult.Error); + Assert.True(jobResult.Error.Count > 0); + Assert.Contains(jobResult.Error, e => e.Severity == OperationOutcomeConstants.IssueSeverity.Error); + } + + [Fact] + public async Task CreateReindexProcessingJobsAsync_WithTargetResourceTypes_FiltersCorrectly() + { + // Arrange + var targetResourceTypes = new List { "Patient" }; + + var patientParam = CreateSearchParameterInfo(resourceType: "Patient"); + var observationParam = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Observation-code", + resourceType: "Observation"); + + var searchParamStatus1 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(patientParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(observationParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2 }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { patientParam, observationParam }); + + _searchDefinitionManager.GetSearchParameters("Patient") + .Returns(new List { patientParam }); + + _searchDefinitionManager.GetSearchParameter(patientParam.Url.OriginalString) + .Returns(patientParam); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var searchResultWithData = CreateSearchResult(resourceCount: 250); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(searchResultWithData); + + var ranges = new List<(long, long)> + { + (1, 100), + (101, 200), + (201, 250), + }; + _searchService.GetSurrogateIdRanges( + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(ranges); + + var jobInfo = await CreateReindexJobRecord(targetResourceTypes: targetResourceTypes); + var orchestrator = CreateReindexOrchestratorJob( + new AzureHealthDataServicesRuntimeConfiguration(), + searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 1); + + foreach (var job in processingJobs) + { + var jobDef = JsonConvert.DeserializeObject(job.Definition); + Assert.Equal("Patient", jobDef.ResourceType); + } + } + + [Fact] + public async Task CreateReindexOrchestratorJobAsync_WithResourceTypesWithZeroCount_MarksParametersAsEnabled() + { + // Arrange + var searchParam = CreateSearchParameterInfo(); + var searchParamStatus = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + // Return zero count for the search result + var emptySearchResult = new SearchResult(0, new List>()); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(emptySearchResult); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + + await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync( + Arg.Any>(), + SearchParameterStatus.Enabled, + Arg.Any()); + } + + [Fact] + public async Task CreateReindexProcessingJobsAsync_WithCaseVariantSearchParameterUrls_ReturnsBothEntries() + { + // Arrange - Test case where we have case-variant SearchParameter URLs with same status + // Create case-variant URLs (both should be treated separately with potential duplicates) + var searchParamLowercase = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/searchparameter/patient-name", + resourceType: "Patient"); + + var searchParamMixedCase = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-name", + resourceType: "Patient"); + + var searchParamStatus1 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParamLowercase.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParamMixedCase.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2 }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParamLowercase, searchParamMixedCase }); + + _searchDefinitionManager.GetSearchParameters("Patient") + .Returns(new List { searchParamLowercase, searchParamMixedCase }); + + _searchDefinitionManager.GetSearchParameter(Arg.Any()) + .Returns(x => x[0] switch + { + "http://hl7.org/fhir/searchparameter/patient-name" => searchParamLowercase, + "http://hl7.org/fhir/SearchParameter/Patient-name" => searchParamMixedCase, + _ => throw new SearchParameterNotSupportedException("Not found"), + }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var searchResult = CreateSearchResult(resourceCount: 10); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(searchResult); + + _searchService.GetSurrogateIdRanges( + "Patient", + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(Task.FromResult>( + new List<(long StartId, long EndId)> { (1, 10) })); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + _ = orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + + var processingJobs = await WaitForJobsAsync(jobInfo.GroupId, TimeSpan.FromSeconds(30), expectedMinimumJobs: 1); + + Assert.True(processingJobs.Count > 0, "Should have created at least one processing job"); + + var firstJob = processingJobs.First(); + var jobDef = JsonConvert.DeserializeObject(firstJob.Definition); + + Assert.NotNull(jobDef.SearchParameterUrls); + Assert.Contains(searchParamLowercase.Url.OriginalString, jobDef.SearchParameterUrls); + Assert.Contains(searchParamMixedCase.Url.OriginalString, jobDef.SearchParameterUrls); + + // Verify both case variants are present + Assert.Equal(2, jobDef.SearchParameterUrls.Count(url => + url.Equals(searchParamLowercase.Url.OriginalString, StringComparison.Ordinal) || + url.Equals(searchParamMixedCase.Url.OriginalString, StringComparison.Ordinal))); + } + + [Fact] + public async Task CreateReindexProcessingJobsAsync_WithCaseVariantDifferentStatuses_ReturnsBothEntriesWithRespectiveStatuses() + { + // Arrange - Test case where we have case variant URLs with different statuses (Supported and PendingDelete) + // Create case-variant URLs with different statuses + var searchParamLowercase = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/searchparameter/patient-identifier", + resourceType: "Patient"); + + var searchParamMixedCase = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-Identifier", + resourceType: "Patient"); + + var searchParamStatus1 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParamLowercase.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParamMixedCase.Url.OriginalString), + Status = SearchParameterStatus.PendingDelete, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2 }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParamLowercase, searchParamMixedCase }); + + _searchDefinitionManager.GetSearchParameters("Patient") + .Returns(new List { searchParamLowercase, searchParamMixedCase }); + + _searchDefinitionManager.GetSearchParameter(Arg.Any()) + .Returns(x => x[0] switch + { + "http://hl7.org/fhir/searchparameter/patient-identifier" => searchParamLowercase, + "http://hl7.org/fhir/SearchParameter/Patient-Identifier" => searchParamMixedCase, + _ => throw new SearchParameterNotSupportedException("Not found"), + }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var searchResult = CreateSearchResult(resourceCount: 0); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(searchResult); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync( + Arg.Is>(l => l.Contains(searchParamLowercase.Url.ToString())), + SearchParameterStatus.Enabled, + Arg.Any()); + + await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync( + Arg.Is>(l => l.Contains(searchParamMixedCase.Url.ToString())), + SearchParameterStatus.Deleted, + Arg.Any()); + } + + [Fact] + public async Task CreateReindexProcessingJobsAsync_WithMultipleCaseVariantsAndPendingDisable_MaintainsAllVariants() + { + // Arrange - Test with PendingDisable status + var searchParam1 = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/searchparameter/patient-birthdate", + resourceType: "Patient"); + + var searchParam2 = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Patient-BirthDate", + resourceType: "Patient"); + + var searchParamStatus1 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam1.Url.OriginalString), + Status = SearchParameterStatus.PendingDisable, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam2.Url.OriginalString), + Status = SearchParameterStatus.PendingDisable, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2 }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam1, searchParam2 }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var emptySearchResult = new SearchResult(0, new List>()); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(emptySearchResult); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); + await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync( + Arg.Is>(l => l.Contains(searchParam1.Url.ToString())), + SearchParameterStatus.Disabled, + Arg.Any()); + + await _searchParameterStatusManager.Received().UpdateSearchParameterStatusAsync( + Arg.Is>(l => l.Contains(searchParam2.Url.ToString())), + SearchParameterStatus.Disabled, + Arg.Any()); + } + + [Fact] + public async Task CheckForCompletionAsync_WithCaseVariantUrls_MaintainsBothVariants() + { + // Arrange - Simulate completion checking with case variant URLs + var searchParam1 = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/searchparameter/observation-code", + resourceType: "Observation"); + + var searchParam2 = CreateSearchParameterInfo( + url: "http://hl7.org/fhir/SearchParameter/Observation-Code", + resourceType: "Observation"); + + var searchParamStatus1 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam1.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + var searchParamStatus2 = new ResourceSearchParameterStatus + { + LastUpdated = DateTime.UtcNow, + Uri = new Uri(searchParam2.Url.OriginalString), + Status = SearchParameterStatus.Supported, + }; + + _searchParameterStatusManager.GetAllSearchParameterStatus(_cancellationToken) + .Returns(new List { searchParamStatus1, searchParamStatus2 }); + + _searchDefinitionManager.AllSearchParameters + .Returns(new List { searchParam1, searchParam2 }); + + _searchDefinitionManager.GetSearchParameters("Observation") + .Returns(new List { searchParam1, searchParam2 }); + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()) + .Returns("hash"); + + var searchResult = CreateSearchResult(resourceCount: 100); + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + Arg.Any(), + Arg.Any(), + Arg.Any()) + .Returns(searchResult); + + var jobInfo = await CreateReindexJobRecord(); + var orchestrator = CreateReindexOrchestratorJob(searchParameterCacheRefreshIntervalSeconds: 0); + + // Act + var result = await orchestrator.ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + // Assert + Assert.NotNull(jobResult); } } } diff --git a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexProcessingJobTests.cs b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexProcessingJobTests.cs index cf7856b928..2eb916d4d4 100644 --- a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexProcessingJobTests.cs +++ b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Operations/Reindex/ReindexProcessingJobTests.cs @@ -125,23 +125,6 @@ private SearchResultEntry CreateSearchResultEntry(string id, string type) null)); } - private SearchResult CreateSearchResult(string continuationToken = null, int resourceCount = 1) - { - var resultList = new List(); - - for (var i = 0; i < resourceCount; i++) - { - var wrapper = Substitute.For(); - var entry = new SearchResultEntry(wrapper); - resultList.Add(entry); - } - - var searchResult = new SearchResult(resultList, continuationToken, null, new List>()); - searchResult.MaxResourceSurrogateId = 1; - searchResult.TotalCount = resultList.Count; - return searchResult; - } - [Fact] public async Task GivenSurrogateIdRange_WhenExecuted_ThenAdditionalQueryAdded() { @@ -260,5 +243,665 @@ public async Task GivenSurrogateIdRange_WhenHashDoesNotMatch_ThenRaiseError() Assert.Contains("Search Parameter hash does not match. Resubmit reindex job to try again.", exception.Message); } + + [Fact] + public async Task ExecuteAsync_WithNullJobInfo_ThrowsArgumentNullException() + { + var job = _reindexProcessingJobTaskFactory(); + await Assert.ThrowsAsync( + () => job.ExecuteAsync(null, _cancellationToken)); + } + + [Fact] + public async Task ExecuteAsync_WithValidJobInfo_ReturnsSerializedResult() + { + var expectedResourceType = "Patient"; + var job = new ReindexProcessingJobDefinition() + { + MaximumNumberOfResourcesPerQuery = 100, + MaximumNumberOfResourcesPerWrite = 100, + ResourceType = expectedResourceType, + ResourceCount = new SearchResultReindex() + { + Count = 2, + EndResourceSurrogateId = 100, + StartResourceSurrogateId = 1, + ContinuationToken = null, + }, + ResourceTypeSearchParameterHashMap = "patientHash", + SearchParameterUrls = new List() { "http://hl7.org/fhir/SearchParam/Patient-name" }, + TypeId = (int)JobType.ReindexProcessing, + }; + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(job.ResourceTypeSearchParameterHashMap); + + var jobInfo = new JobInfo() + { + Id = 1, + Definition = JsonConvert.SerializeObject(job), + QueueType = (byte)QueueType.Reindex, + GroupId = 1, + CreateDate = DateTime.UtcNow, + Status = JobStatus.Running, + }; + + var searchResultEntries = Enumerable.Range(1, 2) + .Select(i => CreateSearchResultEntry(i.ToString(), expectedResourceType)) + .ToList(); + + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + false, + Arg.Any(), + true) + .Returns(new SearchResult( + searchResultEntries, + null, + null, + new List>())); + + var result = await _reindexProcessingJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken); + + Assert.NotEmpty(result); + var jobResult = JsonConvert.DeserializeObject(result); + Assert.NotNull(jobResult); + Assert.Equal(2, jobResult.SucceededResourceCount); + } + + [Fact] + public async Task ValidateSearchParametersHash_WithEmptyCurrentHash_ThrowsException() + { + var expectedResourceType = "Observation"; + var job = new ReindexProcessingJobDefinition() + { + MaximumNumberOfResourcesPerQuery = 100, + MaximumNumberOfResourcesPerWrite = 100, + ResourceType = expectedResourceType, + ResourceCount = new SearchResultReindex() + { + Count = 1, + EndResourceSurrogateId = 100, + StartResourceSurrogateId = 1, + }, + ResourceTypeSearchParameterHashMap = "observationHash", + SearchParameterUrls = new List() { "http://hl7.org/fhir/SearchParam/Observation-code" }, + TypeId = (int)JobType.ReindexProcessing, + }; + + // Return empty hash to trigger validation error + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(string.Empty); + + var jobInfo = new JobInfo() + { + Id = 1, + Definition = JsonConvert.SerializeObject(job), + QueueType = (byte)QueueType.Reindex, + GroupId = 1, + CreateDate = DateTime.UtcNow, + Status = JobStatus.Running, + }; + + var exception = await Assert.ThrowsAsync( + () => _reindexProcessingJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken)); + + Assert.Contains("hash does not match", exception.Message); + } + + [Fact] + public async Task ValidateSearchParametersHash_WithNullCurrentHash_ThrowsException() + { + var expectedResourceType = "Condition"; + var job = new ReindexProcessingJobDefinition() + { + MaximumNumberOfResourcesPerQuery = 100, + MaximumNumberOfResourcesPerWrite = 100, + ResourceType = expectedResourceType, + ResourceCount = new SearchResultReindex() + { + Count = 1, + EndResourceSurrogateId = 100, + StartResourceSurrogateId = 1, + }, + ResourceTypeSearchParameterHashMap = "conditionHash", + SearchParameterUrls = new List() { "http://hl7.org/fhir/SearchParam/Condition-code" }, + TypeId = (int)JobType.ReindexProcessing, + }; + + // Return null hash to trigger validation error + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns((string)null); + + var jobInfo = new JobInfo() + { + Id = 1, + Definition = JsonConvert.SerializeObject(job), + QueueType = (byte)QueueType.Reindex, + GroupId = 1, + CreateDate = DateTime.UtcNow, + Status = JobStatus.Running, + }; + + var exception = await Assert.ThrowsAsync( + () => _reindexProcessingJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken)); + + Assert.Contains("hash does not match", exception.Message); + } + + [Fact] + public async Task ProcessSearchResultsAsync_WithValidResults_UpdatesAllResources() + { + var resourceType = "Patient"; + var batchSize = 2; + var resources = new List() + { + new ResourceWrapper( + "1", + "1", + resourceType, + new RawResource("data1", FhirResourceFormat.Json, isMetaSet: false), + null, + DateTimeOffset.MinValue, + false, + null, + null, + null), + new ResourceWrapper( + "2", + "1", + resourceType, + new RawResource("data2", FhirResourceFormat.Json, isMetaSet: false), + null, + DateTimeOffset.MinValue, + false, + null, + null, + null), + }; + + var searchResults = new List() + { + new SearchResultEntry(resources[0]), + new SearchResultEntry(resources[1]), + }; + + var searchResult = new SearchResult( + searchResults, + null, + null, + new List>()); + + var paramHashMap = new Dictionary + { + { resourceType, "patientHash" }, + }; + + var job = _reindexProcessingJobTaskFactory(); + + await job.ProcessSearchResultsAsync(searchResult, paramHashMap, batchSize, _cancellationToken); + + // Verify that bulk update was called with the correct batch + await _fhirDataStore.Received(1).BulkUpdateSearchParameterIndicesAsync( + Arg.Is>(r => r.Count == 2), + Arg.Any()); + } + + [Fact] + public async Task ProcessSearchResultsAsync_WithZeroBatchSize_SetsDefaultBatchSize() + { + var resourceType = "Observation"; + var resources = new List() + { + new ResourceWrapper( + "1", + "1", + resourceType, + new RawResource("data1", FhirResourceFormat.Json, isMetaSet: false), + null, + DateTimeOffset.MinValue, + false, + null, + null, + null), + }; + + var searchResults = new List() + { + new SearchResultEntry(resources[0]), + }; + + var searchResult = new SearchResult( + searchResults, + null, + null, + new List>()); + + var paramHashMap = new Dictionary + { + { resourceType, "observationHash" }, + }; + + var job = _reindexProcessingJobTaskFactory(); + + // Pass zero batch size - should default to 500 + await job.ProcessSearchResultsAsync(searchResult, paramHashMap, 0, _cancellationToken); + + await _fhirDataStore.Received(1).BulkUpdateSearchParameterIndicesAsync( + Arg.Any>(), + Arg.Any()); + } + + [Fact] + public async Task ProcessSearchResultsAsync_WithMultipleBatches_ProcessesInBatches() + { + var resourceType = "Patient"; + var batchSize = 2; + + // Create 5 resources to test batching + var resources = new List(); + for (int i = 1; i <= 5; i++) + { + resources.Add(new ResourceWrapper( + i.ToString(), + "1", + resourceType, + new RawResource($"data{i}", FhirResourceFormat.Json, isMetaSet: false), + null, + DateTimeOffset.MinValue, + false, + null, + null, + null)); + } + + var searchResults = resources.Select(r => new SearchResultEntry(r)).ToList(); + + var searchResult = new SearchResult( + searchResults, + null, + null, + new List>()); + + var paramHashMap = new Dictionary + { + { resourceType, "patientHash" }, + }; + + var job = _reindexProcessingJobTaskFactory(); + + await job.ProcessSearchResultsAsync(searchResult, paramHashMap, batchSize, _cancellationToken); + + // Should be called 3 times: batch 1 (2 resources), batch 2 (2 resources), batch 3 (1 resource) + await _fhirDataStore.Received(3).BulkUpdateSearchParameterIndicesAsync( + Arg.Any>(), + Arg.Any()); + } + + [Fact] + public async Task ProcessSearchResultsAsync_WithMissingHashMap_UsesEmptyString() + { + var resourceType = "Patient"; + var batchSize = 10; + + var resources = new List() + { + new ResourceWrapper( + "1", + "1", + resourceType, + new RawResource("data1", FhirResourceFormat.Json, isMetaSet: false), + null, + DateTimeOffset.MinValue, + false, + null, + null, + null), + }; + + var searchResults = new List() + { + new SearchResultEntry(resources[0]), + }; + + var searchResult = new SearchResult( + searchResults, + null, + null, + new List>()); + + // Empty hash map - should use empty string for missing resource types + var paramHashMap = new Dictionary(); + + var job = _reindexProcessingJobTaskFactory(); + + await job.ProcessSearchResultsAsync(searchResult, paramHashMap, batchSize, _cancellationToken); + + await _fhirDataStore.Received(1).BulkUpdateSearchParameterIndicesAsync( + Arg.Is>(r => + r.First().SearchParameterHash == string.Empty), + Arg.Any()); + } + + [Fact] + public async Task ProcessSearchResultsAsync_WithCancellationToken_StopsProcessing() + { + var resourceType = "Patient"; + var batchSize = 10; + + var resources = new List() + { + new ResourceWrapper( + "1", + "1", + resourceType, + new RawResource("data1", FhirResourceFormat.Json, isMetaSet: false), + null, + DateTimeOffset.MinValue, + false, + null, + null, + null), + }; + + var searchResults = new List() + { + new SearchResultEntry(resources[0]), + }; + + var searchResult = new SearchResult( + searchResults, + null, + null, + new List>()); + + var paramHashMap = new Dictionary + { + { resourceType, "patientHash" }, + }; + + var cancellationTokenSource = new CancellationTokenSource(); + cancellationTokenSource.Cancel(); + + var job = _reindexProcessingJobTaskFactory(); + + await job.ProcessSearchResultsAsync(searchResult, paramHashMap, batchSize, cancellationTokenSource.Token); + + // Should not call bulk update when cancelled + await _fhirDataStore.DidNotReceive().BulkUpdateSearchParameterIndicesAsync( + Arg.Any>(), + Arg.Any()); + } + + [Fact] + public async Task ProcessQueryAsync_WithNullSearchResult_ThrowsOperationFailedException() + { + var expectedResourceType = "Patient"; + var job = new ReindexProcessingJobDefinition() + { + MaximumNumberOfResourcesPerQuery = 100, + MaximumNumberOfResourcesPerWrite = 100, + ResourceType = expectedResourceType, + ResourceCount = new SearchResultReindex() + { + Count = 1, + EndResourceSurrogateId = 100, + StartResourceSurrogateId = 1, + }, + ResourceTypeSearchParameterHashMap = "patientHash", + SearchParameterUrls = new List() { "http://hl7.org/fhir/SearchParam/Patient-name" }, + TypeId = (int)JobType.ReindexProcessing, + }; + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(job.ResourceTypeSearchParameterHashMap); + + var jobInfo = new JobInfo() + { + Id = 1, + Definition = JsonConvert.SerializeObject(job), + QueueType = (byte)QueueType.Reindex, + GroupId = 1, + CreateDate = DateTime.UtcNow, + Status = JobStatus.Running, + }; + + // Return null search result + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + false, + Arg.Any(), + true) + .Returns((SearchResult)null); + + // When null search result is returned, the job should handle it gracefully and return error in result + var result = await _reindexProcessingJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + Assert.NotNull(jobResult.Error); + Assert.Contains("null search result", jobResult.Error); + } + + [Fact] + public async Task ProcessQueryAsync_WithSearchServiceException_ThrowsReindexException() + { + var expectedResourceType = "Patient"; + var job = new ReindexProcessingJobDefinition() + { + MaximumNumberOfResourcesPerQuery = 100, + MaximumNumberOfResourcesPerWrite = 100, + ResourceType = expectedResourceType, + ResourceCount = new SearchResultReindex() + { + Count = 1, + EndResourceSurrogateId = 100, + StartResourceSurrogateId = 1, + }, + ResourceTypeSearchParameterHashMap = "patientHash", + SearchParameterUrls = new List() { "http://hl7.org/fhir/SearchParam/Patient-name" }, + TypeId = (int)JobType.ReindexProcessing, + }; + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(job.ResourceTypeSearchParameterHashMap); + + var jobInfo = new JobInfo() + { + Id = 1, + Definition = JsonConvert.SerializeObject(job), + QueueType = (byte)QueueType.Reindex, + GroupId = 1, + CreateDate = DateTime.UtcNow, + Status = JobStatus.Running, + }; + + // Throw exception from search service + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + false, + Arg.Any(), + true) + .Returns(Task.FromException(new InvalidOperationException("Search service error"))); + + // When search service throws an exception, the job should handle it gracefully and return error in result + var result = await _reindexProcessingJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + Assert.NotNull(jobResult.Error); + Assert.Contains("Error running reindex query", jobResult.Error); + } + + [Fact] + public async Task ProcessQueryAsync_WithGeneralException_CatchesAndSetsError() + { + var expectedResourceType = "Patient"; + var job = new ReindexProcessingJobDefinition() + { + MaximumNumberOfResourcesPerQuery = 100, + MaximumNumberOfResourcesPerWrite = 100, + ResourceType = expectedResourceType, + ResourceCount = new SearchResultReindex() + { + Count = 1, + EndResourceSurrogateId = 100, + StartResourceSurrogateId = 1, + }, + ResourceTypeSearchParameterHashMap = "patientHash", + SearchParameterUrls = new List() { "http://hl7.org/fhir/SearchParam/Patient-name" }, + TypeId = (int)JobType.ReindexProcessing, + }; + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(job.ResourceTypeSearchParameterHashMap); + + var jobInfo = new JobInfo() + { + Id = 1, + Definition = JsonConvert.SerializeObject(job), + QueueType = (byte)QueueType.Reindex, + GroupId = 1, + CreateDate = DateTime.UtcNow, + Status = JobStatus.Running, + }; + + var searchResultEntries = Enumerable.Range(1, 1) + .Select(i => CreateSearchResultEntry(i.ToString(), expectedResourceType)) + .ToList(); + + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + false, + Arg.Any(), + true) + .Returns(new SearchResult( + searchResultEntries, + null, + null, + new List>())); + + // Throw general exception from bulk update + _fhirDataStore.BulkUpdateSearchParameterIndicesAsync( + Arg.Any>(), + Arg.Any()) + .Returns(Task.FromException(new InvalidOperationException("General error during bulk update"))); + + var result = await _reindexProcessingJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + Assert.NotNull(jobResult.Error); + Assert.Contains("General error", jobResult.Error); + Assert.Equal(1, jobResult.FailedResourceCount); + } + + [Fact] + public async Task GetResourcesToReindexAsync_WithContinuationToken_IncludesTokenInQuery() + { + var expectedResourceType = "Patient"; + var continuationToken = "test-continuation-token"; + var job = new ReindexProcessingJobDefinition() + { + MaximumNumberOfResourcesPerQuery = 100, + MaximumNumberOfResourcesPerWrite = 100, + ResourceType = expectedResourceType, + ResourceCount = new SearchResultReindex() + { + Count = 5, + EndResourceSurrogateId = 500, + StartResourceSurrogateId = 1, + ContinuationToken = continuationToken, + }, + ResourceTypeSearchParameterHashMap = "patientHash", + SearchParameterUrls = new List() { "http://hl7.org/fhir/SearchParam/Patient-name" }, + TypeId = (int)JobType.ReindexProcessing, + }; + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(job.ResourceTypeSearchParameterHashMap); + + var jobInfo = new JobInfo() + { + Id = 1, + Definition = JsonConvert.SerializeObject(job), + QueueType = (byte)QueueType.Reindex, + GroupId = 1, + CreateDate = DateTime.UtcNow, + Status = JobStatus.Running, + }; + + var searchResultEntries = Enumerable.Range(1, 5) + .Select(i => CreateSearchResultEntry(i.ToString(), expectedResourceType)) + .ToList(); + + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + false, + Arg.Any(), + true) + .Returns(new SearchResult( + searchResultEntries, + null, + null, + new List>())); + + var result = await _reindexProcessingJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + Assert.Equal(5, jobResult.SucceededResourceCount); + } + + [Fact] + public async Task GetResourcesToReindexAsync_WithSurrogateIdRange_IncludesRangeInQuery() + { + var expectedResourceType = "Patient"; + var startId = 100L; + var endId = 500L; + var job = new ReindexProcessingJobDefinition() + { + MaximumNumberOfResourcesPerQuery = 100, + MaximumNumberOfResourcesPerWrite = 100, + ResourceType = expectedResourceType, + ResourceCount = new SearchResultReindex() + { + Count = 3, + EndResourceSurrogateId = endId, + StartResourceSurrogateId = startId, + ContinuationToken = null, + }, + ResourceTypeSearchParameterHashMap = "patientHash", + SearchParameterUrls = new List() { "http://hl7.org/fhir/SearchParam/Patient-name" }, + TypeId = (int)JobType.ReindexProcessing, + }; + + _searchParameterOperations.GetResourceTypeSearchParameterHashMap(Arg.Any()).Returns(job.ResourceTypeSearchParameterHashMap); + + var jobInfo = new JobInfo() + { + Id = 1, + Definition = JsonConvert.SerializeObject(job), + QueueType = (byte)QueueType.Reindex, + GroupId = 1, + CreateDate = DateTime.UtcNow, + Status = JobStatus.Running, + }; + + var searchResultEntries = Enumerable.Range(1, 3) + .Select(i => CreateSearchResultEntry(i.ToString(), expectedResourceType)) + .ToList(); + + _searchService.SearchForReindexAsync( + Arg.Any>>(), + Arg.Any(), + false, + Arg.Any(), + true) + .Returns(new SearchResult( + searchResultEntries, + null, + null, + new List>())); + + var result = await _reindexProcessingJobTaskFactory().ExecuteAsync(jobInfo, _cancellationToken); + var jobResult = JsonConvert.DeserializeObject(result); + + Assert.Equal(3, jobResult.SucceededResourceCount); + } } } diff --git a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Resources/Upsert/BulkUpdateServiceTests.cs b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Resources/Upsert/BulkUpdateServiceTests.cs index 04a3feaf91..801c5c6928 100644 --- a/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Resources/Upsert/BulkUpdateServiceTests.cs +++ b/src/Microsoft.Health.Fhir.Shared.Core.UnitTests/Features/Resources/Upsert/BulkUpdateServiceTests.cs @@ -19,6 +19,7 @@ using Microsoft.Health.Core.Features.Context; using Microsoft.Health.Core.Features.Security; using Microsoft.Health.Extensions.DependencyInjection; +using Microsoft.Health.Extensions.Xunit; using Microsoft.Health.Fhir.Core.Configs; using Microsoft.Health.Fhir.Core.Exceptions; using Microsoft.Health.Fhir.Core.Extensions; @@ -386,7 +387,7 @@ public async Task UpdateMultipleAsync_WhenSingleMatchAndMultipleIncludePagesWith Assert.True(result.ResourcesIgnored["Observation"] == 1); // Observations ignored as no applicable patch request } - [Theory] + [RetryTheory(MaxRetries = 3)] [InlineData(true)] [InlineData(false)] public async Task UpdateMultipleAsync_WhenSingleMatchAndMoreThanMaxParallelThreadsIncludePagesWithGivenReadNextPage_ResourcesAreUpdated(bool readNextPage) @@ -614,7 +615,7 @@ public async Task UpdateMultipleAsync__WhenMultipleMatchAndMultipleIncludePagesW } } - [Theory] + [RetryTheory] [InlineData(true)] [InlineData(false)] public async Task UpdateMultipleAsync__WhenMoreThanMaxParallelThreadsMatchAndIncludePagesWithGivenReadNextPage_ResourcesAreUpdated(bool readNextPage) diff --git a/src/Microsoft.Health.Fhir.Shared.Web/appsettings.json b/src/Microsoft.Health.Fhir.Shared.Web/appsettings.json index 003ffb8e35..eb1f4d2286 100644 --- a/src/Microsoft.Health.Fhir.Shared.Web/appsettings.json +++ b/src/Microsoft.Health.Fhir.Shared.Web/appsettings.json @@ -70,7 +70,8 @@ "Enabled": true, "ConsecutiveFailuresThreshold": 5, "MaximumNumberOfResourcesPerQuery": 10000, - "ReindexDelayMultiplier": 3 + "ReindexDelayMultiplier": 3, + "MaxRunningTaskCount": 4 }, "ConvertData": { "Enabled": false, diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Microsoft.Health.Fhir.Shared.Tests.E2E.projitems b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Microsoft.Health.Fhir.Shared.Tests.E2E.projitems index 3397ee9c98..71b8da03cd 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Microsoft.Health.Fhir.Shared.Tests.E2E.projitems +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Microsoft.Health.Fhir.Shared.Tests.E2E.projitems @@ -92,6 +92,8 @@ + + @@ -143,6 +145,7 @@ + PreserveNewest diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/BulkUpdateTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/BulkUpdateTests.cs index 776a8ce74e..52426e0492 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/BulkUpdateTests.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/BulkUpdateTests.cs @@ -152,12 +152,13 @@ public async Task GivenBulkUpdateRequestOnSystemLevel_WhenCompleted_ThenExcluded expectedResults.ResourcesUpdated.Add("Patient", 2); expectedResults.ResourcesUpdated.Add("Location", 1); expectedResults.ResourcesUpdated.Add("Organization", 1); - expectedResults.ResourcesIgnored.Add("StructureDefinition", 2); - expectedResults.ResourcesIgnored.Add("SearchParameter", 2); + expectedResults.ResourcesIgnored.Add("StructureDefinition", 4); // Changed: use 4 StructureDefinitions instead of 2 + 2 SearchParameters var tag = new Coding(string.Empty, Guid.NewGuid().ToString()); // Create resources of different types with the same tag + // Use StructureDefinition resources which are excluded from bulk update but don't have + // side effects like SearchParameter (which pollutes the SearchParam table and causes test conflicts) var structureDefinition = Samples.GetJsonSample("StructureDefinition-us-core-birthsex"); structureDefinition.Meta = new Meta(); structureDefinition.Meta.Tag.Add(tag); @@ -168,24 +169,15 @@ public async Task GivenBulkUpdateRequestOnSystemLevel_WhenCompleted_ThenExcluded structureDefinition.Meta.Tag.Add(tag); await _fhirClient.CreateAsync(structureDefinition); - var randomName = Guid.NewGuid().ToString().ComputeHash()[28..].ToLower(); - var searchParam = Samples.GetJsonSample("SearchParameter-Patient-foo"); - searchParam.Meta = new Meta(); - searchParam.Meta.Tag.Add(tag); - searchParam.Name = randomName; - searchParam.Url = searchParam.Url.Replace("foo", randomName); - searchParam.Code = randomName; - searchParam.Id = randomName; - await _fhirClient.CreateAsync(searchParam); - - searchParam = Samples.GetJsonSample("SearchParameter-SpecimenStatus"); - searchParam.Meta = new Meta(); - searchParam.Meta.Tag.Add(tag); - searchParam.Name = randomName; - searchParam.Url = searchParam.Url.Replace("foo", randomName); - searchParam.Code = randomName; - searchParam.Id = randomName; - await _fhirClient.CreateAsync(searchParam); + structureDefinition = Samples.GetJsonSample("StructureDefinition-us-core-race"); + structureDefinition.Meta = new Meta(); + structureDefinition.Meta.Tag.Add(tag); + await _fhirClient.CreateAsync(structureDefinition); + + structureDefinition = Samples.GetJsonSample("StructureDefinition-us-core-careplan"); + structureDefinition.Meta = new Meta(); + structureDefinition.Meta.Tag.Add(tag); + await _fhirClient.CreateAsync(structureDefinition); // Create resources of different types with the same tag await _fhirClient.CreateResourcesAsync(2, tag.Code); diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/BundleTransactionTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/BundleTransactionTests.cs index 0b3488e3ae..65077c7186 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/BundleTransactionTests.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/BundleTransactionTests.cs @@ -9,6 +9,7 @@ using System.Net; using Hl7.Fhir.Model; using Hl7.Fhir.Utility; +using Microsoft.Health.Extensions.Xunit; using Microsoft.Health.Fhir.Client; using Microsoft.Health.Fhir.Core.Extensions; using Microsoft.Health.Fhir.Core.Features.Resources; @@ -102,7 +103,7 @@ public async Task GivenABundleWithInvalidRoutes_WhenSubmittingATransaction_ThenB ValidateOperationOutcome(expectedDiagnostics, expectedCodeType, fhirException.OperationOutcome); } - [Theory] + [RetryTheory] [Trait(Traits.Priority, Priority.One)] [InlineData(FhirBundleProcessingLogic.Parallel)] [InlineData(FhirBundleProcessingLogic.Sequential)] diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/HistoryTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/HistoryTests.cs index 768389f519..caa67fbb10 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/HistoryTests.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/HistoryTests.cs @@ -14,6 +14,7 @@ using System.Web; using Hl7.Fhir.Model; using Hl7.Fhir.Serialization; +using Microsoft.Health.Extensions.Xunit; using Microsoft.Health.Fhir.Client; using Microsoft.Health.Fhir.Core.Extensions; using Microsoft.Health.Fhir.Tests.Common; @@ -93,7 +94,7 @@ public async Task GivenATypeAndId_WhenGettingResourceHistory_TheServerShouldRetu userMessage: $"Record 0's latest update ({readResponse[0].Resource.Meta.LastUpdated}) is not greater or equal than Record's 1 latest update ({readResponse[1].Resource.Meta.LastUpdated})."); } - [Fact] + [RetryFact] [Trait(Traits.Priority, Priority.One)] public async Task GivenTestResourcesWithUpdatesAndDeletes_WhenGettingResourceHistoryCount_TheServerShouldReturnCorrectCount() { diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Reindex/ReindexTestFixture.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Reindex/ReindexTestFixture.cs new file mode 100644 index 0000000000..c64989b557 --- /dev/null +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Reindex/ReindexTestFixture.cs @@ -0,0 +1,80 @@ +// ------------------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. +// ------------------------------------------------------------------------------------------------- + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Threading.Tasks; +using Hl7.Fhir.Model; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Microsoft.Health.Fhir.Client; +using Microsoft.Health.Fhir.Core.Configs; +using Microsoft.Health.Fhir.Tests.Common.FixtureParameters; +using Microsoft.Health.Fhir.Tests.E2E.Rest; +using Microsoft.Health.JobManagement; +using Task = System.Threading.Tasks.Task; + +namespace Microsoft.Health.Fhir.Tests.E2E.Rest.Reindex +{ + /// + /// Extended HTTP integration test fixture that overrides the reindex delay multiplier for faster E2E tests. + /// Uses the minimum sync interval between instances to optimize test execution time while maintaining + /// consistency with production behavior. + /// + public class ReindexTestFixture : HttpIntegrationTestFixture + { + public ReindexTestFixture(DataStore dataStore, Format format, TestFhirServerFactory testFhirServerFactory) + : base(dataStore, format, testFhirServerFactory) + { + } + + /// + /// Override configuration settings for faster reindex testing. + /// Sets the ReindexDelayMultiplier to 1 (minimum) to reduce delay while maintaining + /// the SearchParameterCacheRefreshIntervalSeconds for consistency with production behavior. + /// Also initializes the test fixture by cleaning up test data resources. + /// + protected override async Task OnInitializedAsync() + { + await base.OnInitializedAsync(); + + // Override the reindex delay configuration if the server is in-process + if (IsUsingInProcTestServer && TestFhirServer is InProcTestFhirServer inProcServer) + { + var serviceProvider = inProcServer.Server.Services; + + try + { + // Update OperationsConfiguration for ReindexDelayMultiplier + // Setting to 1 uses the minimum delay while maintaining the sync interval + var operationsOptions = serviceProvider.GetRequiredService>(); + var coreConfigurations = serviceProvider.GetRequiredService>(); + if (operationsOptions?.Value?.Reindex != null) + { + operationsOptions.Value.Reindex.ReindexDelayMultiplier = 1; + coreConfigurations.Value.SearchParameterCacheRefreshIntervalSeconds = 2; + } + + // Override job hosting polling frequency for faster dequeuing + var jobHosting = serviceProvider.GetService(); + if (jobHosting != null) + { + jobHosting.PollingFrequencyInSeconds = 2; + } + } + catch (Exception ex) + { + var logger = serviceProvider.GetService>(); + logger?.LogWarning(ex, "Failed to configure reindex delay multiplier, using default values"); + + // Continue with default configuration if override fails + } + } + } + } +} diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Reindex/ReindexTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Reindex/ReindexTests.cs new file mode 100644 index 0000000000..ed0f8524c7 --- /dev/null +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Reindex/ReindexTests.cs @@ -0,0 +1,1299 @@ +// ------------------------------------------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License (MIT). See LICENSE in the repo root for license information. +// ------------------------------------------------------------------------------------------------- + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Net; +using System.Threading; +using System.Threading.Tasks; +using Hl7.Fhir.Model; +using Microsoft.Health.Extensions.Xunit; +using Microsoft.Health.Fhir.Client; +using Microsoft.Health.Fhir.Core.Features.Operations; +using Microsoft.Health.Fhir.Tests.Common; +using Microsoft.Health.Fhir.Tests.Common.FixtureParameters; +using Microsoft.Health.Test.Utilities; +using Xunit; +using Task = System.Threading.Tasks.Task; + +namespace Microsoft.Health.Fhir.Tests.E2E.Rest.Reindex +{ + [CollectionDefinition(Categories.IndexAndReindex, DisableParallelization = true)] + [Trait(Traits.OwningTeam, OwningTeam.Fhir)] + [Trait(Traits.Category, Categories.IndexAndReindex)] + [HttpIntegrationFixtureArgumentSets(DataStore.All, Format.Json)] + public class ReindexTests : IClassFixture + { + private readonly ReindexTestFixture _fixture; + + public ReindexTests(ReindexTestFixture fixture) + { + _fixture = fixture; + } + + [RetryFact(MaxRetries = 3, DelayBetweenRetriesMs = 30000, RetryOnAssertionFailure = true)] + public async Task GivenReindexJobWithMixedZeroAndNonZeroCountResources_WhenReindexCompletes_ThenSearchParametersShouldWork() + { + // Cancel any running reindex jobs before starting this test + await CancelAnyRunningReindexJobsAsync(); + + // Scenario 1: Test that search parameter status is only updated when ALL jobs complete successfully + // This tests both zero-count and non-zero-count resource scenarios with multiple resource types + var mixedBaseSearchParam = new SearchParameter(); + var personSearchParam = new SearchParameter(); + var randomSuffix = Guid.NewGuid().ToString("N").Substring(0, 8); + var testResources = new List<(string resourceType, string resourceId)>(); + var supplyDeliveryCount = 2000; + var personCount = 1000; + (FhirResponse response, Uri jobUri) value = default; + + try + { + // Set up test data using the common setup method + System.Diagnostics.Debug.WriteLine($"Setting up test data for SupplyDelivery and Person resources..."); + + // Setup Persons first, then SupplyDeliveries sequentially (not in parallel) + // This ensures Persons are fully created before SupplyDeliveries start + var (personResources, finalPersonCount) = await SetupTestDataAsync("Person", personCount, randomSuffix, CreatePersonResourceAsync); + testResources.AddRange(personResources); + + // CRITICAL: Verify we got what we expected + Assert.True( + finalPersonCount >= personCount, + $"Failed to create sufficient Person resources. Expected: {personCount}, Got: {finalPersonCount}"); + + var (supplyDeliveryResources, finalSupplyDeliveryCount) = await SetupTestDataAsync("SupplyDelivery", supplyDeliveryCount, randomSuffix, CreateSupplyDeliveryResourceAsync); + testResources.AddRange(supplyDeliveryResources); + + // CRITICAL: Verify we got what we expected + Assert.True( + finalSupplyDeliveryCount >= supplyDeliveryCount, + $"Failed to create sufficient SupplyDelivery resources. Expected: {supplyDeliveryCount}, Got: {finalSupplyDeliveryCount}"); + + System.Diagnostics.Debug.WriteLine($"Test data setup complete - SupplyDelivery: {finalSupplyDeliveryCount}, Person: {finalPersonCount}"); + + // Create a single search parameter that applies to BOTH SupplyDelivery and Immunization + // This allows us to test the scenario where one resource type has data and another has none + mixedBaseSearchParam = await CreateCustomSearchParameterAsync( + $"custom-mixed-base-{randomSuffix}", + ["SupplyDelivery", "Immunization"], // Applies to both resource types + "SupplyDelivery.status", // Valid for SupplyDelivery + SearchParamType.Token); + Assert.NotNull(mixedBaseSearchParam); + + // Create a separate search parameter specifically for Person resources + // This will create a separate reindex job for Person resources + personSearchParam = await CreateCustomSearchParameterAsync( + $"custom-person-name-{randomSuffix}", + ["Person"], // Applies only to Person + "Person.name.given", // Valid for Person name + SearchParamType.String); + Assert.NotNull(personSearchParam); + + // Create reindex job targeting both search parameters + // This will trigger multiple reindex jobs (one for SupplyDelivery/Immunization, one for Person) + var parameters = new Parameters + { + Parameter = new List + { + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerQuery", + Value = new Integer(500), + }, + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerWrite", + Value = new Integer(500), + }, + }, + }; + + value = await _fixture.TestFhirClient.PostReindexJobAsync(parameters); + + Assert.Equal(HttpStatusCode.Created, value.response.Response.StatusCode); + Assert.NotNull(value.jobUri); + + // Wait for job to complete (this will wait for all sub-jobs to complete) + var jobStatus = await WaitForJobCompletionAsync(value.jobUri, TimeSpan.FromSeconds(300)); + Assert.True( + jobStatus == OperationStatus.Completed, + $"Expected Completed, got {jobStatus}"); + + await Task.Delay(TimeSpan.FromMinutes(1)); + + // Verify search parameter is working for SupplyDelivery (which has data) + // Use the ACTUAL count we got, not the desired count + await VerifySearchParameterIsWorkingAsync( + $"SupplyDelivery?{mixedBaseSearchParam.Code}=in-progress", + mixedBaseSearchParam.Code, + expectedResourceType: "SupplyDelivery", + shouldFindRecords: true); + + // Verify search parameter is working for Immunization (which has no data) + // We expect no immunization records to be returned (empty result set) + await VerifySearchParameterIsWorkingAsync( + $"Immunization?{mixedBaseSearchParam.Code}=207", + mixedBaseSearchParam.Code, + expectedResourceType: "Immunization", + shouldFindRecords: false); + + // Verify search parameter is working for Person (which has data) + // Use the ACTUAL count we got, not the desired count + await VerifySearchParameterIsWorkingAsync( + $"Person?{personSearchParam.Code}=Test", + personSearchParam.Code, + expectedResourceType: "Person", + shouldFindRecords: true); + } + finally + { + // Cleanup all test data including resources and search parameters + System.Diagnostics.Debug.WriteLine($"Starting cleanup of {testResources.Count} test resources..."); + await CleanupTestDataAsync(testResources, mixedBaseSearchParam, personSearchParam); + System.Diagnostics.Debug.WriteLine("Cleanup completed"); + } + } + + [RetryFact(MaxRetries = 3, DelayBetweenRetriesMs = 30000, RetryOnAssertionFailure = true)] + public async Task GivenReindexJobWithResourceAndAddedAfterSingleCustomSearchParameterAndBeforeReindex_WhenReindexCompletes_ThenSearchParameterShouldWork() + { + // Cancel any running reindex jobs before starting this test + await CancelAnyRunningReindexJobsAsync(); + + // Scenario 2: Test that search parameter with invalid expression fails indexing + // This validates that indexing failures prevent status updates + var randomSuffix = Guid.NewGuid().ToString("N").Substring(0, 8); + var specimenSearchParam = new SearchParameter(); + var immunizationSearchParam = new SearchParameter(); + var testResources = new List<(string resourceType, string resourceId)>(); + (FhirResponse response, Uri jobUri) value = default; + + try + { + // Create custom search parameters - one with valid FHIRPath expression + specimenSearchParam = await CreateCustomSearchParameterAsync($"custom-parameter-before-specimen-{randomSuffix}", ["Specimen"], "Specimen.type", SearchParamType.Token); + + // Create test resources that will be indexed using SetupTestDataAsync + var (specimenResources, finalSpecimenCount) = await SetupTestDataAsync("Specimen", 1, randomSuffix, CreateSpecimenResourceAsync); + testResources.AddRange(specimenResources); + + // Create reindex job targeting both search parameters + var parameters = new Parameters + { + Parameter = new List + { + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerQuery", + Value = new Integer(10000), + }, + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerWrite", + Value = new Integer(1000), + }, + }, + }; + + value = await _fixture.TestFhirClient.PostReindexJobAsync(parameters); + + Assert.Equal(HttpStatusCode.Created, value.response.Response.StatusCode); + Assert.NotNull(value.jobUri); + + // Wait for job to complete + var jobStatus = await WaitForJobCompletionAsync(value.jobUri, TimeSpan.FromSeconds(300)); + Assert.True( + jobStatus == OperationStatus.Completed, + $"Expected Completed, got {jobStatus}"); + + // The valid search parameter should still be usable + await VerifySearchParameterIsWorkingAsync( + $"Specimen?{specimenSearchParam.Code}=119295008", + specimenSearchParam.Code, + "Specimen", + true); + } + finally + { + // Cleanup all test data including resources and search parameters + await CleanupTestDataAsync(testResources, specimenSearchParam, immunizationSearchParam); + } + } + + [RetryFact(MaxRetries = 3, DelayBetweenRetriesMs = 30000, RetryOnAssertionFailure = true)] + public async Task GivenReindexJobWithResourceAndAddedAfterMultiCustomSearchParameterAndBeforeReindex_WhenReindexCompletes_ThenSearchParametersShouldWork() + { + // Cancel any running reindex jobs before starting this test + await CancelAnyRunningReindexJobsAsync(); + + // Scenario 2: Test that search parameter with invalid expression fails indexing + // This validates that indexing failures prevent status updates + var randomSuffix = Guid.NewGuid().ToString("N").Substring(0, 8); + var specimenSearchParam = new SearchParameter(); + var immunizationSearchParam = new SearchParameter(); + var testResources = new List<(string resourceType, string resourceId)>(); + (FhirResponse response, Uri jobUri) value = default; + + try + { + // Create custom search parameters - one with valid FHIRPath expression + specimenSearchParam = await CreateCustomSearchParameterAsync($"custom-parameter-before-specimen-{randomSuffix}", ["Specimen"], "Specimen.type", SearchParamType.Token); + immunizationSearchParam = await CreateCustomSearchParameterAsync($"custom-parameter-before-imm-{randomSuffix}", ["Immunization"], "Immunization.vaccineCode", SearchParamType.Token); + + // Create test resources that will be indexed using SetupTestDataAsync + var (specimenResources, finalSpecimenCount) = await SetupTestDataAsync("Specimen", 1, randomSuffix, CreateSpecimenResourceAsync); + testResources.AddRange(specimenResources); + + // Create reindex job targeting both search parameters + var parameters = new Parameters + { + Parameter = new List + { + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerQuery", + Value = new Integer(10000), + }, + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerWrite", + Value = new Integer(1000), + }, + }, + }; + + value = await _fixture.TestFhirClient.PostReindexJobAsync(parameters); + + Assert.Equal(HttpStatusCode.Created, value.response.Response.StatusCode); + Assert.NotNull(value.jobUri); + + // Wait for job to complete + var jobStatus = await WaitForJobCompletionAsync(value.jobUri, TimeSpan.FromSeconds(300)); + Assert.True( + jobStatus == OperationStatus.Completed, + $"Expected Completed, got {jobStatus}"); + + // Add delay to allow search parameter cache to refresh + + // The valid search parameter should still be usable + await VerifySearchParameterIsWorkingAsync( + $"Specimen?{specimenSearchParam.Code}=119295008", + specimenSearchParam.Code, + "Specimen", + true); + } + finally + { + // Cleanup all test data including resources and search parameters + await CleanupTestDataAsync(testResources, specimenSearchParam, immunizationSearchParam); + } + } + + [RetryFact(MaxRetries = 3, RetryOnAssertionFailure = true)] + public async Task GivenReindexWithCaseVariantSearchParameterUrls_WhenBothHaveSameStatus_ThenBothShouldBeProcessedCorrectly() + { + // Cancel any running reindex jobs before starting this test + await CancelAnyRunningReindexJobsAsync(); + + // Scenario 3a: Case variant search parameter URLs with same status (Supported, Supported) + // Both should be treated as separate entries and processed correctly + var randomSuffix = Guid.NewGuid().ToString("N").Substring(0, 8); + var lowerCaseParam = new SearchParameter(); + var upperCaseParam = new SearchParameter(); + var testResources = new List<(string resourceType, string resourceId)>(); + (FhirResponse response, Uri jobUri) value = default; + + try + { + // Create test resource using SetupTestDataAsync + var (specimenResources, finalSpecimenCount) = await SetupTestDataAsync("Specimen", 1, randomSuffix, CreateSpecimenResourceAsync); + testResources.AddRange(specimenResources); + + // Create search parameters with unique codes + lowerCaseParam = await CreateCustomSearchParameterAsync($"custom-case-sensitive-{randomSuffix}", ["Specimen"], "Specimen.type", SearchParamType.Token); + upperCaseParam = await CreateCustomSearchParameterAsync($"custom-case-Sensitive-{randomSuffix}", ["Specimen"], "Specimen.status", SearchParamType.Token); + Assert.NotNull(lowerCaseParam); + Assert.NotNull(upperCaseParam); + + // Create reindex job targeting both case-variant search parameters + var parameters = new Parameters + { + Parameter = new List + { + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerQuery", + Value = new Integer(10000), + }, + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerWrite", + Value = new Integer(1000), + }, + }, + }; + + value = await _fixture.TestFhirClient.PostReindexJobAsync(parameters); + + Assert.Equal(HttpStatusCode.Created, value.response.Response.StatusCode); + Assert.NotNull(value.jobUri); + + // Wait for job completion + var jobStatus = await WaitForJobCompletionAsync(value.jobUri, TimeSpan.FromSeconds(300)); + Assert.True( + jobStatus == OperationStatus.Completed, + $"Expected Completed, got {jobStatus}"); + + // Verify both search parameters are working after reindex + await VerifySearchParameterIsWorkingAsync( + $"Specimen?{lowerCaseParam.Code}=119295008", + lowerCaseParam.Code); + await VerifySearchParameterIsWorkingAsync( + $"Specimen?{upperCaseParam.Code}=available", + upperCaseParam.Code); + } + finally + { + // Cleanup all test data including resources and search_parameters + await CleanupTestDataAsync(testResources, lowerCaseParam, upperCaseParam); + } + } + + [RetryFact(MaxRetries = 3, DelayBetweenRetriesMs = 30000, RetryOnAssertionFailure = true)] + public async Task GivenReindexWithCaseVariantSearchParameterUrls_WhenHavingDifferentStatuses_ThenBothSearchParametersShouldWork() + { + // Cancel any running reindex jobs before starting this test + await CancelAnyRunningReindexJobsAsync(); + + // Scenario 3b: Case variant search parameter URLs with different statuses + // Verify both are set to the correct status when all jobs complete + var randomSuffix = Guid.NewGuid().ToString("N").Substring(0, 8); + var specimenTypeParam = new SearchParameter(); + var specimenStatusParam = new SearchParameter(); + var testResources = new List<(string resourceType, string resourceId)>(); + (FhirResponse response, Uri jobUri) value = default; + + try + { + // Create test resource using SetupTestDataAsync + var (specimenResources, finalSpecimenCount) = await SetupTestDataAsync("Specimen", 1, randomSuffix, CreateSpecimenResourceAsync); + testResources.AddRange(specimenResources); + + // Create custom search parameters with different expressions and unique codes + specimenTypeParam = await CreateCustomSearchParameterAsync($"custom-diff-type-{randomSuffix}", ["Specimen"], "Specimen.type", SearchParamType.Token); + specimenStatusParam = await CreateCustomSearchParameterAsync($"custom-diff-status-{randomSuffix}", ["Specimen"], "Specimen.status", SearchParamType.Token); + Assert.NotNull(specimenTypeParam); + Assert.NotNull(specimenStatusParam); + + // Create reindex job + var parameters = new Parameters + { + Parameter = new List + { + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerQuery", + Value = new Integer(10000), + }, + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerWrite", + Value = new Integer(1000), + }, + }, + }; + + value = await _fixture.TestFhirClient.PostReindexJobAsync(parameters); + + Assert.Equal(HttpStatusCode.Created, value.response.Response.StatusCode); + Assert.NotNull(value.jobUri); + + // Wait for job completion + var jobStatus = await WaitForJobCompletionAsync(value.jobUri, TimeSpan.FromSeconds(300)); + Assert.True( + jobStatus == OperationStatus.Completed, + $"Expected Completed, got {jobStatus}"); + + await Task.Delay(5000); // Wait 5 seconds for cache refresh + + // Verify both search parameters are working after reindex + await VerifySearchParameterIsWorkingAsync( + $"Specimen?{specimenTypeParam.Code}=119295008", + specimenTypeParam.Code); + await VerifySearchParameterIsWorkingAsync( + $"Specimen?{specimenStatusParam.Code}=available", + specimenStatusParam.Code); + } + finally + { + // Cleanup all test data including resources and search parameters + await CleanupTestDataAsync(testResources, specimenTypeParam, specimenStatusParam); + } + } + + [RetryFact(MaxRetries = 3, DelayBetweenRetriesMs = 30000, RetryOnAssertionFailure = true)] + public async Task GivenSearchParameterAddedAndReindexed_WhenSearchParameterIsDeleted_ThenAfterReindexSearchParameterShouldNotBeSupported() + { + // Cancel any running reindex jobs before starting this test + await CancelAnyRunningReindexJobsAsync(); + + // Comprehensive lifecycle test: + // 1. Create a Specimen record with specific data + // 2. Add a custom search parameter + // 3. Reindex to index the new search parameter + // 4. Verify the search parameter works and returns the Specimen + // 5. Delete the search parameter + // 6. Reindex to remove the search parameter from the index + // 7. Verify the search parameter is no longer supported (returns not-supported error) + var randomSuffix = Guid.NewGuid().ToString("N").Substring(0, 8); + var searchParam = new SearchParameter(); + var specimenId = string.Empty; + var testResources = new List<(string resourceType, string resourceId)>(); + (FhirResponse response, Uri jobUri) reindexRequest1 = default; + (FhirResponse response, Uri jobUri) reindexRequest2 = default; + + try + { + // Step 1: Create a Specimen with specific data that matches our search parameter expression + var specimenType = $"LifecycleTest{randomSuffix}"; + var (specimenResources, finalSpecimenCount) = await SetupTestDataAsync("Specimen", 1, randomSuffix, CreateSpecimenResourceAsync); + testResources.AddRange(specimenResources); + + if (specimenResources.Count > 0) + { + specimenId = specimenResources[0].resourceId; + System.Diagnostics.Debug.WriteLine($"Created specimen with ID {specimenId} and type {specimenType}"); + } + + // Step 2: Create a custom search parameter for Specimen.type + searchParam = await CreateCustomSearchParameterAsync( + $"custom-lifecycle-{randomSuffix}", + ["Specimen"], + "Specimen.type", + SearchParamType.Token); + Assert.NotNull(searchParam); + System.Diagnostics.Debug.WriteLine($"Created search parameter with code {searchParam.Code} and ID {searchParam.Id}"); + + // Step 3: Reindex to index the newly created search parameter + var reindexParams = new Parameters + { + Parameter = new List + { + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerQuery", + Value = new Integer(10000), + }, + new Parameters.ParameterComponent + { + Name = "maximumNumberOfResourcesPerWrite", + Value = new Integer(1000), + }, + }, + }; + + reindexRequest1 = await _fixture.TestFhirClient.PostReindexJobAsync(reindexParams); + Assert.Equal(HttpStatusCode.Created, reindexRequest1.response.Response.StatusCode); + Assert.NotNull(reindexRequest1.jobUri); + System.Diagnostics.Debug.WriteLine("Started first reindex job to index the new search parameter"); + + var jobStatus1 = await WaitForJobCompletionAsync(reindexRequest1.jobUri, TimeSpan.FromSeconds(240)); + Assert.True( + jobStatus1 == OperationStatus.Completed, + $"First reindex job should complete successfully, but got {jobStatus1}"); + System.Diagnostics.Debug.WriteLine("First reindex job completed successfully"); + + // Step 4: Verify the search parameter works by searching for the specimen + var searchQuery = $"Specimen?{searchParam.Code}=119295008"; + await VerifySearchParameterIsWorkingAsync( + searchQuery, + searchParam.Code, + expectedResourceType: "Specimen", + shouldFindRecords: true); + + // Step 5: Delete the search parameter + await _fixture.TestFhirClient.DeleteAsync($"SearchParameter/{searchParam.Id}"); + System.Diagnostics.Debug.WriteLine($"Deleted search parameter {searchParam.Code} (ID: {searchParam.Id})"); + + // Step 6: Reindex to remove the search parameter from the index + reindexRequest2 = await _fixture.TestFhirClient.PostReindexJobAsync(reindexParams); + Assert.Equal(HttpStatusCode.Created, reindexRequest2.response.Response.StatusCode); + Assert.NotNull(reindexRequest2.jobUri); + System.Diagnostics.Debug.WriteLine("Started second reindex job to remove the deleted search parameter"); + + var jobStatus2 = await WaitForJobCompletionAsync(reindexRequest2.jobUri, TimeSpan.FromSeconds(240)); + Assert.True( + jobStatus2 == OperationStatus.Completed, + $"Second reindex job should complete successfully, but got {jobStatus2}"); + System.Diagnostics.Debug.WriteLine("Second reindex job completed successfully"); + + // Step 7: Verify the search parameter is no longer supported + var postDeleteSearchResponse = await _fixture.TestFhirClient.SearchAsync(searchQuery); + Assert.NotNull(postDeleteSearchResponse); + System.Diagnostics.Debug.WriteLine($"Executed search query after deletion: {searchQuery}"); + + // Verify that a "NotSupported" error is now present + var hasNotSupportedErrorAfterDelete = HasNotSupportedError(postDeleteSearchResponse.Resource); + + Assert.True( + hasNotSupportedErrorAfterDelete, + $"Search parameter {searchParam.Code} should NOT be supported after deletion and reindex. Got 'NotSupported' error in response."); + System.Diagnostics.Debug.WriteLine($"Search parameter {searchParam.Code} correctly returns 'NotSupported' error after deletion"); + } + finally + { + // Cleanup any remaining resources + await CleanupTestDataAsync(testResources, searchParam); + } + } + + private async Task CreatePersonResourceAsync(string id, string name) + { + var person = new Person + { + Id = id, + Name = new List + { + new HumanName { Given = new[] { name } }, + }, + }; + + // Return the person object without posting - will be posted in parallel batches + return await Task.FromResult(person); + } + + /// + /// Helper method to create and post a resource in a single operation + /// Returns a tuple indicating success and the resource/ID + /// + private async Task<(bool success, T resource, string id)> CreateAndPostResourceWithStatusAsync(string id, string name, Func> createResourceFunc) + where T : Resource + { + try + { + var resource = await createResourceFunc(id, name); + + // Post the resource using the client's CreateAsync method + var response = await _fixture.TestFhirClient.CreateAsync(resource); + + if (response?.Resource != null && !string.IsNullOrEmpty(response.Resource.Id)) + { + System.Diagnostics.Debug.WriteLine($"Successfully created {typeof(T).Name}/{response.Resource.Id}"); + return (true, response.Resource, response.Resource.Id); + } + + System.Diagnostics.Debug.WriteLine($"Failed to create resource {id}: Response was null or had no ID"); + return (false, resource, id); + } + catch (Exception ex) + { + System.Diagnostics.Debug.WriteLine($"Failed to create resource {id}: {ex.Message}"); + return (false, null, id); + } + } + + /// + /// Helper method to create and post a resource in a single operation + /// + private async Task CreateAndPostResourceAsync(string id, string name, Func> createResourceFunc) + where T : Resource + { + var resource = await createResourceFunc(id, name); + + try + { + // Post the resource using the client's CreateAsync method + var response = await _fixture.TestFhirClient.CreateAsync(resource); + return response; + } + catch (Exception ex) + { + System.Diagnostics.Debug.WriteLine($"Failed to create resource {id}: {ex.Message}"); + return resource; // Return the original resource even on failure so ID can be tracked + } + } + + private async Task CreateCustomSearchParameterAsync(string code, string[] baseResourceTypes, string expression, SearchParamType searchParamType = SearchParamType.String) + { +#if R5 + var baseResourceTypeList = new List(); +#else + var baseResourceTypeList = new List(); +#endif + + if (baseResourceTypes != null && baseResourceTypes.Length > 0) + { + foreach (var resourceType in baseResourceTypes) + { +#if R5 + if (Enum.TryParse(resourceType, ignoreCase: true, out var parsedType)) +#else + if (Enum.TryParse(resourceType, ignoreCase: true, out var parsedType)) +#endif + { + baseResourceTypeList.Add(parsedType); + } + } + } + + var searchParam = new SearchParameter + { + Url = $"http://example.org/fhir/SearchParameter/{code}", + Name = code, + Status = PublicationStatus.Active, + Code = code, + Type = searchParamType, + Expression = expression, + Description = new Markdown($"Custom search parameter for {string.Join(", ", baseResourceTypes ?? Array.Empty())}"), + Version = "1.0.0", + Base = baseResourceTypeList, + }; + + try + { + var result = await _fixture.TestFhirClient.CreateAsync(searchParam); + return result; + } + catch (Exception ex) + { + // Log the exception for debugging + System.Diagnostics.Debug.WriteLine($"Failed to create search parameter: {ex.Message}"); + throw; + } + } + + private async Task CreateObservationResourceAsync(string id, string personId) + { + var observation = new Observation + { + Id = id, + Status = ObservationStatus.Final, + Code = new CodeableConcept("http://loinc.org", "1234-5"), + Subject = new ResourceReference($"Person/{personId}"), + }; + + try + { + var result = await _fixture.TestFhirClient.CreateAsync(observation); + return result; + } + catch + { + return observation; + } + } + + private async Task CreateSpecimenResourceAsync(string id, string name) + { + var specimen = new Specimen + { + Id = id, + Status = Specimen.SpecimenStatus.Available, + Type = new CodeableConcept("http://snomed.info/sct", "119295008", "Specimen"), + Subject = new ResourceReference($"Patient/{id}"), + }; + + // Return the specimen object without posting - bundle transaction will handle the post + return await Task.FromResult(specimen); + } + + private async Task CreateSupplyDeliveryResourceAsync(string id, string patientId) + { + var supplyDelivery = new SupplyDelivery + { + Id = id, + Status = SupplyDelivery.SupplyDeliveryStatus.InProgress, + Patient = new ResourceReference($"Patient/{patientId}"), + }; + + // Return the supply delivery object without posting - will be posted in parallel batches + return await Task.FromResult(supplyDelivery); + } + + /// + /// Cleanup method that handles both test data resources (Person, Observation, etc.) and search parameters. + /// Ensures all created resources are properly deleted using parallel batch processing for improved performance. + /// + private async Task CleanupTestDataAsync(List<(string resourceType, string resourceId)> testResources, params SearchParameter[] searchParameters) + { + // Delete test data resources (Person, Observation, etc.) in parallel batches + if (testResources != null && testResources.Count > 0) + { + System.Diagnostics.Debug.WriteLine($"Starting cleanup of {testResources.Count} test resources..."); + + const int batchSize = 500; // Process 500 resources at a time in parallel + int totalDeleted = 0; + int totalFailed = 0; + + for (int batchStart = 0; batchStart < testResources.Count; batchStart += batchSize) + { + int currentBatchSize = Math.Min(batchSize, testResources.Count - batchStart); + var batchTasks = new List(); + + // Create deletion tasks for this batch + for (int i = 0; i < currentBatchSize; i++) + { + var (resourceType, resourceId) = testResources[batchStart + i]; + + if (!string.IsNullOrEmpty(resourceId)) + { + batchTasks.Add(DeleteResourceAsync(resourceType, resourceId)); + } + } + + try + { + // Execute all deletes in parallel + await Task.WhenAll(batchTasks); + totalDeleted += currentBatchSize; + + System.Diagnostics.Debug.WriteLine($"Deleted batch {(batchStart / batchSize) + 1}: {currentBatchSize} resources (total: {totalDeleted}/{testResources.Count})"); + } + catch (Exception ex) + { + totalFailed += currentBatchSize; + System.Diagnostics.Debug.WriteLine($"Failed to delete batch at offset {batchStart}: {ex.Message}"); + + // Continue with next batch instead of failing completely + } + } + + System.Diagnostics.Debug.WriteLine($"Cleanup completed: {totalDeleted} deleted successfully, {totalFailed} failed"); + } + + await Task.Delay(TimeSpan.FromSeconds(3)); + + // Delete search parameters (soft delete with reindex finalization) + await CleanupSearchParametersAsync(searchParameters); + } + + /// + /// Helper method to delete a single resource with error handling + /// + private async Task DeleteResourceAsync(string resourceType, string resourceId) + { + try + { + await _fixture.TestFhirClient.DeleteAsync($"{resourceType}/{resourceId}"); + System.Diagnostics.Debug.WriteLine($"Deleted {resourceType}/{resourceId}"); + } + catch (Exception ex) + { + System.Diagnostics.Debug.WriteLine($"Failed to delete {resourceType}/{resourceId}: {ex.Message}"); + + // Don't throw - allow other deletions to continue + } + } + + private async Task CleanupSearchParametersAsync(params SearchParameter[] searchParameters) + { + // Validate input + if (searchParameters == null || searchParameters.Length == 0) + { + return; + } + + // Delete created search parameters + foreach (var param in searchParameters) + { + try + { + if (param != null && !string.IsNullOrEmpty(param.Id)) + { + // Attempt to delete the search parameter + // Note: This performs a soft delete, the parameter may be in PendingDelete status + await _fixture.TestFhirClient.DeleteAsync($"SearchParameter/{param.Id}"); + } + } + catch (Exception ex) + { + // Log but continue cleanup even if delete fails + System.Diagnostics.Debug.WriteLine($"Failed to delete SearchParameter/{param?.Id}: {ex.Message}"); + } + } + + // Allow time for soft deletes to be processed + await Task.Delay(500); + + // Note: Final reindex is handled by ReindexTestFixture.OnDisposedAsync() which runs + // once at the very end after all tests complete. This eliminates the need to reindex + // after each individual test cleanup. + } + + private async Task WaitForJobCompletionAsync(Uri jobUri, TimeSpan timeout) + { + var stopwatch = Stopwatch.StartNew(); + OperationStatus lastStatus = OperationStatus.Queued; + + while (stopwatch.Elapsed < timeout) + { + try + { + var jobResponse = await _fixture.TestFhirClient.CheckJobAsync(jobUri); + + if (jobResponse.Resource?.Parameter != null) + { + var statusParam = jobResponse.Resource.Parameter + .FirstOrDefault(p => p.Name == "status"); + + if (statusParam?.Value != null) + { + string statusString = null; + + // Handle both FhirString and Code value types + if (statusParam.Value is FhirString fhirString) + { + statusString = fhirString.Value; + } + else if (statusParam.Value is Code code) + { + statusString = code.Value; + } + + if (!string.IsNullOrEmpty(statusString) && + Enum.TryParse(statusString, true, out var status)) + { + lastStatus = status; + + if (status == OperationStatus.Completed || + status == OperationStatus.Failed || + status == OperationStatus.Canceled) + { + return status; + } + } + } + } + } + catch + { + // Continue polling even if there are errors + } + + await Task.Delay(1000); + } + + return lastStatus; + } + + /// + /// Checks if a search response contains a "NotSupported" error in an OperationOutcome. + /// This indicates that a search parameter is not supported (likely because it was deleted or failed to index). + /// + /// The search response bundle to check + /// True if a "NotSupported" error is found, false otherwise + private static bool HasNotSupportedError(Bundle searchResponse) + { + return searchResponse?.Entry?.Any(e => + e.Resource is OperationOutcome oo && + oo.Issue?.Any(i => i.Code?.ToString() == "NotSupported") == true) ?? false; + } + + /// + /// Verifies that a search parameter is properly indexed and working by executing a search query with retry logic. + /// Retries handle timing issues with search parameter cache refresh after reindex operations. + /// If the search parameter is not indexed, the response will contain an OperationOutcome with code "NotSupported". + /// This method ensures the search parameter is fully functional after reindex operations and validates + /// that results are returned (or not found, depending on the scenario). + /// + /// Handles pagination by following NextLink entries until all results are retrieved. + /// + /// The search query to execute (e.g., "Patient?code=value") + /// The search parameter code for error messaging + /// The resource type being searched for (e.g., "Patient") + /// Whether we expect to find records. True if data should be returned, false if expecting empty result + /// Maximum number of retry attempts if the search parameter is not ready + /// Delay in milliseconds between retry attempts + private async Task VerifySearchParameterIsWorkingAsync( + string searchQuery, + string searchParameterCode, + string expectedResourceType = null, + bool shouldFindRecords = true, + int maxRetries = 9, + int retryDelayMs = 20000) + { + Exception lastException = null; + + for (int attempt = 1; attempt <= maxRetries; attempt++) + { + try + { + // Use a reasonable default page size + const int pageSize = 100; + + // Add _count parameter to control pagination + var queryWithCount = searchQuery.Contains("?") + ? $"{searchQuery}&_count={pageSize}" + : $"{searchQuery}?_count={pageSize}"; + + var allResourcesFound = new List(); + string nextLink = queryWithCount; + int pageCount = 0; + int totalEntriesRetrieved = 0; + + System.Diagnostics.Debug.WriteLine( + $"Search parameter {searchParameterCode} - attempt {attempt} of {maxRetries}, starting search with page size {pageSize}."); + + // Follow pagination until we get all results + while (!string.IsNullOrEmpty(nextLink)) + { + pageCount++; + System.Diagnostics.Debug.WriteLine($"Search parameter {searchParameterCode} - fetching page {pageCount}"); + + var searchResponse = await _fixture.TestFhirClient.SearchAsync(nextLink); + Assert.NotNull(searchResponse); + + // Verify the search parameter is supported (no "NotSupported" in the response) + var hasNotSupportedError = HasNotSupportedError(searchResponse.Resource); + + Assert.False( + hasNotSupportedError, + $"Search parameter {searchParameterCode} should be supported after reindex for {expectedResourceType}. Got 'NotSupported' error in response."); + + // Collect all entries from this page + if (searchResponse.Resource?.Entry != null) + { + allResourcesFound.AddRange(searchResponse.Resource.Entry); + totalEntriesRetrieved += searchResponse.Resource.Entry.Count; + } + + // Check if there's a next link for pagination + nextLink = searchResponse.Resource?.NextLink?.OriginalString; + System.Diagnostics.Debug.WriteLine( + $"Search parameter {searchParameterCode} - page {pageCount} returned {searchResponse.Resource?.Entry?.Count ?? 0} entries (total so far: {totalEntriesRetrieved}). Next link: {(string.IsNullOrEmpty(nextLink) ? "none" : "present")}"); + } + + System.Diagnostics.Debug.WriteLine( + $"Search parameter {searchParameterCode} is working - search executed successfully without 'NotSupported' error on attempt {attempt}. Total pages fetched: {pageCount}, Total entries retrieved: {totalEntriesRetrieved}"); + + // Validate record expectations if specified + if (!string.IsNullOrEmpty(expectedResourceType)) + { + var resourcesFound = allResourcesFound + .Where(e => e.Resource?.TypeName == expectedResourceType) + .ToList(); + + if (shouldFindRecords) + { + Assert.NotEmpty(resourcesFound); + Assert.True( + resourcesFound.Count > 0, + $"Expected to find {expectedResourceType} records for search parameter {searchParameterCode}, but none were returned."); + + System.Diagnostics.Debug.WriteLine( + $"Search parameter {searchParameterCode} correctly returned {resourcesFound.Count} {expectedResourceType} record(s) across {pageCount} page(s)"); + } + else + { + Assert.Empty(resourcesFound); + Assert.True( + resourcesFound.Count == 0, + $"Expected no {expectedResourceType} records for search parameter {searchParameterCode}, but found {resourcesFound.Count}."); + System.Diagnostics.Debug.WriteLine( + $"Search parameter {searchParameterCode} correctly returned no {expectedResourceType} records"); + } + } + + // Log the successful search query for reference + System.Diagnostics.Debug.WriteLine($"Search query: {searchQuery} executed successfully on attempt {attempt} across {pageCount} page(s)."); + + // Success - return without retrying + return; + } + catch (Exception ex) + { + lastException = ex; + + if (attempt < maxRetries) + { + System.Diagnostics.Debug.WriteLine( + $"Search parameter {searchParameterCode} verification failed on attempt {attempt} of {maxRetries}. " + + $"Retrying in {retryDelayMs}ms. Error: {ex.Message}"); + + await Task.Delay(retryDelayMs); + } + } + } + + // All retries exhausted, throw the last exception with context + throw new Xunit.Sdk.XunitException( + $"Search parameter {searchParameterCode} should be usable after reindex (failed after {maxRetries} attempts). Error: {lastException?.Message}", + lastException); + } + + /// + /// Gets the current count of resources for a specific resource type. + /// + /// The FHIR resource type to count (e.g., "Person", "Specimen") + /// The total count of resources matching the criteria + private async Task GetResourceCountAsync(string resourceType) + { + string query = $"{resourceType}?_summary=count"; + + Bundle bundle = await _fixture.TestFhirClient.SearchAsync(query); + return bundle.Total ?? 0; + } + + /// + /// Sets up test data by creating the specified number of resources. + /// Returns the list of created resource IDs for cleanup. + /// Uses parallel individual creates for improved performance with retry logic. + /// + /// The FHIR resource type to create (e.g., "Person", "Specimen") + /// The number of resources to create + /// Random suffix for unique resource IDs + /// Function to create a single resource given an ID and name + /// A tuple containing the list of created resource IDs and the count of successfully created resources + private async Task<(List<(string resourceType, string resourceId)> createdResources, int finalCount)> SetupTestDataAsync( + string resourceType, + int desiredCount, + string randomSuffix, + Func> createResourceFunc) + where T : Resource + { + var createdResources = new List<(string resourceType, string resourceId)>(); + + System.Diagnostics.Debug.WriteLine($"Creating {desiredCount} new {resourceType} resources..."); + + // Create resources in batches using parallel individual creates for better performance + const int batchSize = 500; // Process 500 resources at a time in parallel + const int maxCreateRetries = 3; // Retry failed creates up to 3 times + int totalCreated = 0; + var failedIds = new List(); + + for (int batchStart = 0; batchStart < desiredCount; batchStart += batchSize) + { + int currentBatchSize = Math.Min(batchSize, desiredCount - batchStart); + + // Track which resources in this batch need to be created/retried + var resourcesToCreateInBatch = new List<(int index, string id, string name)>(); + for (int i = 0; i < currentBatchSize; i++) + { + int index = batchStart + i; + string id = $"{resourceType.ToLowerInvariant()}-{randomSuffix}-{index}"; + string name = $"Test {resourceType} {index}"; + resourcesToCreateInBatch.Add((index, id, name)); + } + + // Retry failed creates up to maxCreateRetries times + for (int retryAttempt = 0; retryAttempt < maxCreateRetries && resourcesToCreateInBatch.Any(); retryAttempt++) + { + if (retryAttempt > 0) + { + // Exponential backoff for retries + var delayMs = 1000 * (int)Math.Pow(2, retryAttempt - 1); // 1s, 2s, 4s + System.Diagnostics.Debug.WriteLine($"Retrying {resourcesToCreateInBatch.Count} failed resources after {delayMs}ms delay (attempt {retryAttempt + 1}/{maxCreateRetries})"); + await Task.Delay(delayMs); + } + + var batchTasks = resourcesToCreateInBatch + .Select(r => CreateAndPostResourceWithStatusAsync(r.id, r.name, createResourceFunc)) + .ToList(); + + try + { + // Execute all creates in parallel + var results = await Task.WhenAll(batchTasks); + + // Track successes and failures + var nextRetryBatch = new List<(int index, string id, string name)>(); + for (int i = 0; i < results.Length; i++) + { + var (success, resource, id) = results[i]; + var originalResource = resourcesToCreateInBatch[i]; + + if (success && resource != null && !string.IsNullOrEmpty(resource.Id)) + { + createdResources.Add((resourceType, resource.Id)); + totalCreated++; + } + else + { + // Queue for retry if we haven't exhausted retries + if (retryAttempt < maxCreateRetries - 1) + { + nextRetryBatch.Add(originalResource); + } + else + { + // Final failure after all retries + failedIds.Add(id); + } + } + } + + resourcesToCreateInBatch = nextRetryBatch; + + System.Diagnostics.Debug.WriteLine( + $"Batch {(batchStart / batchSize) + 1} attempt {retryAttempt + 1}: " + + $"{totalCreated} total created, {resourcesToCreateInBatch.Count} pending retry, " + + $"{failedIds.Count} permanently failed"); + } + catch (Exception ex) + { + System.Diagnostics.Debug.WriteLine($"Failed to create batch at offset {batchStart}: {ex.Message}"); + if (retryAttempt == maxCreateRetries - 1) + { + // Add all remaining to failed list + failedIds.AddRange(resourcesToCreateInBatch.Select(r => r.id)); + } + } + } + + // Delay between batches to avoid overwhelming the server + if (batchStart + batchSize < desiredCount) + { + await Task.Delay(500); + } + } + + // Report on any failures + if (failedIds.Any()) + { + System.Diagnostics.Debug.WriteLine($"WARNING: {failedIds.Count} resources failed to create after {maxCreateRetries} retries"); + System.Diagnostics.Debug.WriteLine($"Failed IDs (first 10): {string.Join(", ", failedIds.Take(10))}"); + } + + // Calculate acceptable threshold (allow 5% failure rate for transient issues) + var acceptableMinimum = (int)(desiredCount * 0.95); + + // Assert that we created enough resources + if (totalCreated < acceptableMinimum) + { + var errorMsg = $"CRITICAL: Failed to create sufficient {resourceType} resources. " + + $"Desired: {desiredCount}, Acceptable minimum: {acceptableMinimum}, " + + $"Successfully created: {totalCreated}, Failed: {failedIds.Count}"; + System.Diagnostics.Debug.WriteLine(errorMsg); + Assert.Fail(errorMsg); + } + else if (totalCreated < desiredCount) + { + // Log warning but don't fail + System.Diagnostics.Debug.WriteLine( + $"WARNING: Created {totalCreated}/{desiredCount} {resourceType} resources " + + $"(within acceptable threshold of {acceptableMinimum})"); + } + + System.Diagnostics.Debug.WriteLine($"Successfully created {totalCreated} new {resourceType} resources."); + + // Return the ACTUAL count of resources we created and have IDs for + return (createdResources, totalCreated); + } + + /// + /// Checks for any running reindex jobs and cancels them before starting a new test. + /// This ensures test isolation and prevents conflicts between concurrent reindex operations. + /// Uses GET $reindex to check for active jobs (works for both SQL Server and Cosmos DB). + /// + /// Cancellation token + private async Task CancelAnyRunningReindexJobsAsync(CancellationToken cancellationToken = default) + { + try + { + System.Diagnostics.Debug.WriteLine("Checking for any running reindex jobs via GET $reindex..."); + + // Use GET to $reindex to check for active jobs + var response = await _fixture.TestFhirClient.HttpClient.GetAsync("$reindex", cancellationToken); + + if (!response.IsSuccessStatusCode) + { + System.Diagnostics.Debug.WriteLine($"GET $reindex returned non-success status: {response.StatusCode}. No active jobs to cancel."); + return; + } + + // Parse the response as a Parameters resource + var content = await response.Content.ReadAsStringAsync(); + + // Deserialize the Parameters resource + Parameters parameters = null; + try + { + // Use the FHIR parser to deserialize the response + var parser = new Hl7.Fhir.Serialization.FhirJsonParser(); + parameters = parser.Parse(content); + } + catch (Exception parseEx) + { + System.Diagnostics.Debug.WriteLine($"Failed to parse $reindex response as Parameters: {parseEx.Message}"); + return; + } + + if (parameters?.Parameter == null || !parameters.Parameter.Any()) + { + System.Diagnostics.Debug.WriteLine("No parameters found in $reindex response. No active jobs to cancel."); + return; + } + + // Extract job ID and status from the parameters + var idParam = parameters.Parameter.FirstOrDefault(p => p.Name == "id"); + var statusParam = parameters.Parameter.FirstOrDefault(p => p.Name == "status"); + + if (idParam?.Value == null) + { + System.Diagnostics.Debug.WriteLine("No job ID found in $reindex response. No active jobs to cancel."); + return; + } + + // Extract the job ID + string jobId = null; + if (idParam.Value is FhirString fhirString) + { + jobId = fhirString.Value; + } + else if (idParam.Value is Integer integer) + { + jobId = integer.Value.ToString(); + } + else + { + jobId = idParam.Value.ToString(); + } + + if (string.IsNullOrEmpty(jobId)) + { + System.Diagnostics.Debug.WriteLine("Job ID is empty. No active jobs to cancel."); + return; + } + + // Job is active (Running or Queued), cancel it + System.Diagnostics.Debug.WriteLine($"Job {jobId} is active. Attempting to cancel..."); + + // Use the correct URI format: /_operations/reindex/{jobId} + var jobUri = new Uri($"{_fixture.TestFhirClient.HttpClient.BaseAddress}_operations/reindex/{jobId}"); + + // Send DELETE request to cancel the job + using var deleteRequest = new System.Net.Http.HttpRequestMessage + { + Method = System.Net.Http.HttpMethod.Delete, + RequestUri = jobUri, + }; + + var cancelResponse = await _fixture.TestFhirClient.HttpClient.SendAsync(deleteRequest, cancellationToken); + System.Diagnostics.Debug.WriteLine($"Cancel request for job {jobId} completed with status: {cancelResponse.StatusCode}"); + + // Wait for the job to reach a terminal status + if (cancelResponse.IsSuccessStatusCode) + { + System.Diagnostics.Debug.WriteLine($"Waiting for job {jobId} to reach terminal state..."); + var finalStatus = await WaitForJobCompletionAsync(jobUri, TimeSpan.FromSeconds(120)); + System.Diagnostics.Debug.WriteLine($"Job {jobId} reached final status: {finalStatus}"); + + // Add a small delay to ensure system is ready + await Task.Delay(2000, cancellationToken); + } + + System.Diagnostics.Debug.WriteLine("Completed checking and canceling running reindex jobs"); + } + catch (Exception ex) + { + // Log but don't fail - this is a cleanup/safety check + System.Diagnostics.Debug.WriteLine($"Error in CancelAnyRunningReindexJobsAsync: {ex.Message}"); + } + } + } +} diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/CustomSearchParamTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/CustomSearchParamTests.cs index c49a0a0d5e..46f5b2ddf7 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/CustomSearchParamTests.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/CustomSearchParamTests.cs @@ -11,6 +11,7 @@ using Hl7.Fhir.Model; using Hl7.Fhir.Serialization; using Microsoft.Health.Core.Extensions; +using Microsoft.Health.Extensions.Xunit; using Microsoft.Health.Fhir.Client; using Microsoft.Health.Fhir.Core.Features; using Microsoft.Health.Fhir.Core.Features.Operations; @@ -23,13 +24,13 @@ namespace Microsoft.Health.Fhir.Tests.E2E.Rest.Search { - [CollectionDefinition(Categories.CustomSearch, DisableParallelization = true)] - [Collection(Categories.CustomSearch)] + [CollectionDefinition(Categories.IndexAndReindex, DisableParallelization = true)] + [Collection(Categories.IndexAndReindex)] [Trait(Traits.OwningTeam, OwningTeam.Fhir)] [Trait(Traits.Category, Categories.Search)] - [Trait(Traits.Category, Categories.CustomSearch)] + [Trait(Traits.Category, Categories.IndexAndReindex)] [HttpIntegrationFixtureArgumentSets(DataStore.All, Format.Json)] - public class CustomSearchParamTests : SearchTestsBase, IAsyncLifetime + public class CustomSearchParamTests : SearchTestsBase { private readonly HttpIntegrationTestFixture _fixture; private readonly ITestOutputHelper _output; @@ -42,188 +43,7 @@ public CustomSearchParamTests(HttpIntegrationTestFixture fixture, ITestOutputHel _output = output; } - public async Task InitializeAsync() - { - await Client.DeleteAllResources(ResourceType.Specimen, null); - await Client.DeleteAllResources(ResourceType.Immunization, null); - } - - [SkippableFact] - public async Task GivenANewSearchParam_WhenReindexingComplete_ThenResourcesSearchedWithNewParamReturned() - { - Skip.If(!_fixture.IsUsingInProcTestServer, "Reindex is not enabled on this server."); - - var randomName = Guid.NewGuid().ToString().ComputeHash()[..14].ToLower(); - SearchParameter searchParam = Samples.GetJsonSample("SearchParameter-SpecimenStatus"); - searchParam.Name = randomName; - searchParam.Url = searchParam.Url.Replace("foo", randomName); - searchParam.Code = randomName + "Code"; - - // POST a new Specimen - Specimen specimen = Samples.GetJsonSample("Specimen"); - specimen.Status = Specimen.SpecimenStatus.Available; - var tag = new Coding(null, randomName); - specimen.Meta = new Meta(); - specimen.Meta.Tag.Add(tag); - FhirResponse expectedSpecimen = await Client.CreateAsync(specimen); - - // POST a second Specimen to show it is filtered and not returned when using the new search parameter - Specimen specimen2 = Samples.GetJsonSample("Specimen"); - specimen2.Status = Specimen.SpecimenStatus.EnteredInError; - specimen2.Meta = new Meta(); - specimen2.Meta.Tag.Add(tag); - await Client.CreateAsync(specimen2); - - // POST a new Search parameter - FhirResponse searchParamPosted = null; - int retryCount = 0; - bool success = true; - try - { - searchParamPosted = await Client.CreateAsync(searchParam); - _output.WriteLine($"SearchParameter is posted {searchParam.Url}"); - - // Start a reindex job - Uri reindexJobUri; - FhirResponse reindexJobResult; - (reindexJobResult, reindexJobUri) = await RunReindexToCompletion(new Parameters()); - - Parameters.ParameterComponent param = reindexJobResult.Resource.Parameter.FirstOrDefault(p => p.Name == JobRecordProperties.SearchParams); - Assert.Contains(searchParamPosted.Resource.Url, param?.Value?.ToString()); - - do - { - success = true; - retryCount++; - try - { - await ExecuteAndValidateBundle( - $"Specimen?{searchParam.Code}={Specimen.SpecimenStatus.Available.ToString().ToLower()}&_tag={tag.Code}", - expectedSpecimen.Resource); - - _output.WriteLine($"Success on attempt {retryCount} of {MaxRetryCount}"); - } - catch (Exception ex) - { - string error = $"Attempt {retryCount} of {MaxRetryCount}: Failed to validate bundle: {ex}"; - _output.WriteLine(error); - success = false; - await Task.Delay(TimeSpan.FromSeconds(10)); - } - } - while (!success && retryCount < MaxRetryCount); - - Assert.True(success); - } - catch (FhirClientException ex) when (ex.StatusCode == HttpStatusCode.BadRequest && ex.Message.Contains("not enabled")) - { - _output.WriteLine($"Skipping because reindex is disabled."); - return; - } - catch (Exception e) - { - _output.WriteLine($"Exception: {e.Message}"); - _output.WriteLine($"Stack Trace: {e.StackTrace}"); - throw; - } - finally - { - // Clean up new SearchParameter - await DeleteSearchParameterAndVerify(searchParamPosted?.Resource); - } - } - - [SkippableFact] - public async Task GivenASearchParam_WhenUpdatingParam_ThenResourcesIndexedWithUpdatedParam() - { - Skip.If(!_fixture.IsUsingInProcTestServer, "Reindex is not enabled on this server."); - - var randomName = Guid.NewGuid().ToString().ComputeHash()[28..].ToLower(); - var patient = new Patient { Name = new List { new HumanName { Family = randomName } } }; - SearchParameter searchParam = Samples.GetJsonSample("SearchParameter-Patient-foo"); - searchParam.Name = randomName; - searchParam.Url = searchParam.Url.Replace("foo", randomName); - searchParam.Code = randomName; - searchParam.Id = randomName; - - // POST a new patient - FhirResponse expectedPatient = await Client.CreateAsync(patient); - - // POST a new Search parameter - FhirResponse searchParamPosted = null; - int retryCount = 0; - bool success = true; - try - { - searchParamPosted = await Client.CreateAsync(searchParam); - - // now update the new search parameter - var randomNameUpdated = randomName + "U"; - searchParamPosted.Resource.Name = randomNameUpdated; - searchParamPosted.Resource.Url = "http://hl7.org/fhir/SearchParameter/Patient-" + randomNameUpdated; - searchParamPosted.Resource.Code = randomNameUpdated; - searchParamPosted = await Client.UpdateAsync(searchParamPosted.Resource); - - Uri reindexJobUri; - FhirResponse reindexJobResult; - - // Reindex just a single patient, so we can try searching with a partially indexed search param - (reindexJobResult, reindexJobUri) = await RunReindexToCompletion(new Parameters(), $"Patient/{expectedPatient.Resource.Id}/"); - - Parameters.ParameterComponent param = reindexJobResult.Resource.Parameter.FirstOrDefault(p => p.Name == randomNameUpdated); - if (param == null) - { - _output.WriteLine($"Parameter with name equal to randomly generated name of this test case: {randomNameUpdated} not found in reindex result."); - } - - Assert.NotNull(param); - Assert.Equal(randomName, param.Value.ToString()); - - do - { - success = true; - retryCount++; - try - { - // When reindex job complete, search for resources using new parameter - await ExecuteAndValidateBundle( - $"Patient?{searchParamPosted.Resource.Code}:exact={randomName}", - Tuple.Create(KnownHeaders.PartiallyIndexedParamsHeaderName, "true"), - expectedPatient.Resource); - - _output.WriteLine($"Success on attempt {retryCount} of {MaxRetryCount}"); - } - catch (Exception ex) - { - string error = $"Attempt {retryCount} of {MaxRetryCount}: Failed to validate bundle: {ex}"; - _output.WriteLine(error); - success = false; - await Task.Delay(TimeSpan.FromSeconds(10)); - } - } - while (!success && retryCount < MaxRetryCount); - - Assert.True(success); - } - catch (FhirClientException ex) when (ex.StatusCode == HttpStatusCode.BadRequest && ex.Message.Contains("not enabled")) - { - _output.WriteLine($"Skipping because reindex is disabled."); - return; - } - catch (Exception e) - { - _output.WriteLine($"Exception: {e.Message}"); - _output.WriteLine($"Stack Trace: {e.StackTrace}"); - throw; - } - finally - { - // Clean up new SearchParameter - await DeleteSearchParameterAndVerify(searchParamPosted?.Resource); - } - } - - [Theory] + [RetryTheory] [InlineData("SearchParameterBadSyntax", "The search parameter definition contains one or more invalid entries.")] #if Stu3 || R4 || R4B [InlineData("SearchParameterInvalidBase", "Literal 'foo' is not a valid value for enumeration 'ResourceType'")] @@ -249,91 +69,5 @@ public async Task GivenAnInvalidSearchParam_WhenCreatingParam_ThenMeaningfulErro Assert.Contains(ex.OperationOutcome.Issue, i => i.Diagnostics.Contains(errorMessage)); } } - - [Fact] - public async Task GivenNonParametersRequestBody_WhenReindexSent_ThenBadRequest() - { - string body = Samples.GetJson("PatientWithMinimalData"); - FhirClientException ex = await Assert.ThrowsAsync(async () => await Client.PostAsync("$reindex", body)); - Assert.Equal(HttpStatusCode.BadRequest, ex.StatusCode); - } - - private async Task<(FhirResponse response, Uri uri)> RunReindexToCompletion(Parameters reindexParameters, string uniqueResource = null) - { - Uri reindexJobUri; - FhirResponse response; - (response, reindexJobUri) = await Client.PostReindexJobAsync(reindexParameters, uniqueResource); - - // this becomes null when the uniqueResource gets passed in - if (reindexJobUri != null) - { - response = await Client.WaitForReindexStatus(reindexJobUri, "Completed"); - - _output.WriteLine("ReindexJobDocument:"); - var serializer = new FhirJsonSerializer(); - serializer.Settings.Pretty = true; - _output.WriteLine(serializer.SerializeToString(response.Resource)); - - var floatParse = float.TryParse( - response.Resource.Parameter.FirstOrDefault(p => p.Name == JobRecordProperties.ResourcesSuccessfullyReindexed).Value.ToString(), - out float resourcesReindexed); - - Assert.True(floatParse); - Assert.True(resourcesReindexed > 0.0); - } - else - { - _output.WriteLine("response.Resource.Parameter output:"); - foreach (Parameters.ParameterComponent paramComponent in response.Resource.Parameter) - { - _output.WriteLine($" {paramComponent.Name}: {paramComponent.Value}"); - } - } - - return (response, reindexJobUri); - } - - private async Task DeleteSearchParameterAndVerify(SearchParameter searchParam) - { - if (searchParam != null) - { - // Clean up new SearchParameter - // When there are multiple instances of the fhir-server running, it could take some time - // for the search parameter/reindex updates to propagate to all instances. Hence we are - // adding some retries below to account for that delay. - int retryCount = 0; - bool success = true; - do - { - success = true; - retryCount++; - try - { - await Client.DeleteAsync(searchParam); - } - catch (Exception exp) - { - _output.WriteLine($"Attempt {retryCount} of {MaxRetryCount}: CustomSearchParameter test experienced issue attempted to clean up SearchParameter {searchParam.Url}. The exception is {exp}"); - if (exp is FhirClientException fhirException && fhirException.OperationOutcome?.Issue != null) - { - foreach (OperationOutcome.IssueComponent issueComponent in fhirException.OperationOutcome.Issue) - { - _output.WriteLine("FhirException OperationOutome message from trying to delete SearchParameter is CustomSearchParam test: {0}", issueComponent.Diagnostics); - } - } - - success = false; - await Task.Delay(TimeSpan.FromSeconds(10)); - } - } - while (!success && retryCount < MaxRetryCount); - - Assert.True(success); - FhirClientException ex = await Assert.ThrowsAsync(() => Client.ReadAsync(ResourceType.SearchParameter, searchParam.Id)); - Assert.Contains("Gone", ex.Message); - } - } - - public Task DisposeAsync() => Task.CompletedTask; } } diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/SearchProxyTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/SearchProxyTests.cs index 228aa37237..e1a880c086 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/SearchProxyTests.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/SearchProxyTests.cs @@ -54,7 +54,7 @@ public SearchProxyTests(HttpIntegrationTestFixture fixture) _proxyClient.HttpClient.DefaultRequestHeaders.Add(XForwardedPrefix, Prefix); } - [Fact] + [RetryFact] public async Task GivenSearch_WhenIncludingForwardedHeaders_ThenModifyResponseUrls() { // Ensure at least one patient exists @@ -74,7 +74,7 @@ await Client.CreateResourcesAsync( AssertSingletonPatientBundle(searchset); } - [Fact] + [RetryFact] public async Task GivenSearchBundle_WhenIncludingForwardedHeaders_ThenModifyResponseUrls() { // Ensure at least one patient exists diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/SortTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/SortTests.cs index d1cbdf7b2c..ec2f90e58d 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/SortTests.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.E2E/Rest/Search/SortTests.cs @@ -489,7 +489,7 @@ public async Task GivenQueryWithRevinclude_WhenSearchedWithSortParamOnDatetimeWi observations.Add(obs.First()); } - resources.AddRange(patients.Reverse()); + resources.AddRange(patients.AsEnumerable().Reverse()); resources.AddRange(observations); // Ask to get all patient with specific tag order by birthdate (timestamp) @@ -561,7 +561,7 @@ public async Task GivenQueryWithRevinclude_WhenSearchedWithSortParamOnLastupdate observations.Add(obs.First()); } - resources.AddRange(patients.Reverse()); + resources.AddRange(patients.AsEnumerable().Reverse()); observations.Reverse(); resources.AddRange(observations); diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Features/Operations/Reindex/ReindexJobTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Features/Operations/Reindex/ReindexJobTests.cs index 71116b318b..f34e7f00a1 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Features/Operations/Reindex/ReindexJobTests.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Features/Operations/Reindex/ReindexJobTests.cs @@ -1132,7 +1132,7 @@ private async Task CreateSearchParam(string searchParamName, Se { var searchParam = new SearchParameter { - Url = $"http://hl7.org/fhir/SearchParameter/{baseType}-{searchParamName}", + Url = $"http://hl7.org/fhir/SearchParameter/{baseType}-{searchParamName}-Integration-ReindexJobTest", Type = searchParamType, Expression = expression, Name = searchParamName, @@ -1338,13 +1338,33 @@ private async Task DeleteTestResources(CancellationToken cancellationToken = def if (!string.IsNullOrEmpty(url)) { - // Delete from definition manager + // Delete from definition manager (safe - no exception if not found) _searchParameterDefinitionManager.DeleteSearchParameter(url); _searchParameterDefinitionManager2?.DeleteSearchParameter(url); - // Delete status - await _testHelper.DeleteSearchParameterStatusAsync(url, cancellationToken); - await _searchParameterStatusManager2?.DeleteSearchParameterStatusAsync(url, cancellationToken); + // Delete status - wrap in try-catch to handle already-deleted scenarios + try + { + await _testHelper.DeleteSearchParameterStatusAsync(url, cancellationToken); + } + catch (SearchParameterNotSupportedException) + { + // Already deleted by test cleanup - this is fine + _output.WriteLine($"SearchParameter status already deleted: {url}"); + } + + try + { + if (_searchParameterStatusManager2 != null) + { + await _searchParameterStatusManager2.DeleteSearchParameterStatusAsync(url, cancellationToken); + } + } + catch (SearchParameterNotSupportedException) + { + // Already deleted by test cleanup - this is fine + _output.WriteLine($"SearchParameter status already deleted from manager2: {url}"); + } _output.WriteLine($"Deleted SearchParameter definition and status: {url}"); } diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Persistence/FhirStorageTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Persistence/FhirStorageTests.cs index 9b83c3a5d1..52355e31c0 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Persistence/FhirStorageTests.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Persistence/FhirStorageTests.cs @@ -17,6 +17,7 @@ using Microsoft.Data.SqlClient; using Microsoft.Health.Abstractions.Exceptions; using Microsoft.Health.Abstractions.Features.Transactions; +using Microsoft.Health.Extensions.Xunit; using Microsoft.Health.Fhir.Core; using Microsoft.Health.Fhir.Core.Exceptions; using Microsoft.Health.Fhir.Core.Extensions; @@ -69,7 +70,7 @@ public FhirStorageTests(FhirStorageTestsFixture fixture) protected Mediator Mediator { get; } - [Theory] + [RetryTheory(MaxRetries = 3, DelayBetweenRetriesMs = 5000)] [InlineData(5)] // should succeed [InlineData(35)] // shoul fail [FhirStorageTestsFixtureArgumentSets(DataStore.SqlServer)] @@ -113,7 +114,7 @@ INSERT INTO Resource SELECT * FROM inserted -- this will cause dup key exception } } - [Theory] + [RetryTheory(MaxRetries = 3, DelayBetweenRetriesMs = 5000)] [InlineData(true)] [InlineData(false)] [FhirStorageTestsFixtureArgumentSets(DataStore.SqlServer)] @@ -259,7 +260,7 @@ private async Task UpdateResource(Patient patient) await _fixture.SqlHelper.ExecuteSqlCmd($"UPDATE dbo.Resource SET ResourceId = '{oldId}', Version = 3 WHERE ResourceId = '{newId}' AND Version = 1"); } - [Fact] + [RetryFact] public async Task GivenAResource_WhenSaving_ThenTheMetaIsUpdated_AndLastUpdatedIsWithin1sec() { var saveResult = await Mediator.UpsertResourceAsync(Samples.GetJsonSample("Weight")); @@ -1189,7 +1190,7 @@ private async Task CreatePatientSearchParam(string searchParamN { var searchParam = new SearchParameter { - Url = $"http://hl7.org/fhir/SearchParameter/Patient-{searchParamName}", + Url = $"http://hl7.org/fhir/SearchParameter/Patient-{searchParamName}-Integration-FhirStorageTests", Type = type, #if Stu3 || R4 || R4B Base = new List() { ResourceType.Patient }, diff --git a/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Persistence/SqlServerSchemaUpgradeTests.cs b/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Persistence/SqlServerSchemaUpgradeTests.cs index 860da9fd4f..d0a60bd77c 100644 --- a/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Persistence/SqlServerSchemaUpgradeTests.cs +++ b/test/Microsoft.Health.Fhir.Shared.Tests.Integration/Persistence/SqlServerSchemaUpgradeTests.cs @@ -17,6 +17,7 @@ using Microsoft.Extensions.Logging.Abstractions; using Microsoft.Extensions.Options; using Microsoft.Health.Extensions.DependencyInjection; +using Microsoft.Health.Extensions.Xunit; using Microsoft.Health.Fhir.Core.Configs; using Microsoft.Health.Fhir.Core.Features.Definition; using Microsoft.Health.Fhir.Core.Features.Persistence; @@ -92,7 +93,7 @@ public async Task GivenTwoSchemaInitializationMethods_WhenCreatingTwoDatabases_B } } - [Fact] + [RetryFact] public void GivenASchemaVersion_WhenApplyingDiffTwice_ShouldSucceed() { var versions = Enum.GetValues(typeof(SchemaVersion)).OfType().ToList().Select(x => Convert.ToInt32(x)).ToList(); diff --git a/test/Microsoft.Health.Fhir.Stu3.Tests.E2E/Rest/VersionSpecificTests.cs b/test/Microsoft.Health.Fhir.Stu3.Tests.E2E/Rest/VersionSpecificTests.cs index a842c9ca7d..a7a212f989 100644 --- a/test/Microsoft.Health.Fhir.Stu3.Tests.E2E/Rest/VersionSpecificTests.cs +++ b/test/Microsoft.Health.Fhir.Stu3.Tests.E2E/Rest/VersionSpecificTests.cs @@ -5,6 +5,7 @@ using System.Net; using Hl7.Fhir.Model; +using Microsoft.Health.Extensions.Xunit; using Microsoft.Health.Fhir.Client; using Microsoft.Health.Fhir.Tests.Common; using Microsoft.Health.Fhir.Tests.Common.FixtureParameters; @@ -21,13 +22,13 @@ namespace Microsoft.Health.Fhir.Tests.E2E.Rest [Trait(Traits.Category, Categories.DomainLogicValidation)] public partial class VersionSpecificTests : IClassFixture { - [Fact] + [RetryFact] public async Task GivenStu3Server_WhenCapabilityStatementIsRetrieved_ThenCorrectVersionShouldBeReturned() { await TestCapabilityStatementFhirVersion("3.0.2"); } - [Fact] + [RetryFact] [HttpIntegrationFixtureArgumentSets(DataStore.SqlServer)] public async Task GivenAnObservation_WithInvalidDecimalSpecification_ThenBadRequestShouldBeReturned() {