diff --git a/.github/PULL_REQUEST_TEMPLATE b/.github/PULL_REQUEST_TEMPLATE.md similarity index 100% rename from .github/PULL_REQUEST_TEMPLATE rename to .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/workflows/api-ci-pr.yml b/.github/workflows/api-ci-pr.yml index 63479bb..23a2ce5 100644 --- a/.github/workflows/api-ci-pr.yml +++ b/.github/workflows/api-ci-pr.yml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme and is legally attributed to the Department for Business and Trade (UK) as the governing entity. -name: Frontend CI on PR +name: API CI on PR on: workflow_call: @@ -11,8 +11,8 @@ on: paths: - "api/**" - "requirements.txt" - - ".github/workflows/frontend-ci-pr.yml" - - ".github/workflows/frontend-ci-pr-merge.yml" + - ".github/workflows/api-ci-pr.yml" + - ".github/workflows/api-ci-pr-merge.yml" workflow_dispatch: permissions: @@ -30,7 +30,7 @@ jobs: uses: actions/checkout@v4 - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.28.0 + uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # v0.33.1 with: scan-type: "fs" scan-ref: . @@ -40,10 +40,33 @@ jobs: severity: "CRITICAL,HIGH" continue-on-error: true + unit-tests: + name: Run Unit Tests + runs-on: ubuntu-latest + timeout-minutes: 10 + env: + GITHUB_ACCESS_TOKEN: ${{ secrets.GH_PACKAGES_PAT }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run unit tests + run: python -m pytest unit_tests/ -v + docker-build-test-scan: name: Build, Docker Build, Test & Scan runs-on: ubuntu-latest - needs: security-scanning + needs: [security-scanning, unit-tests] timeout-minutes: 20 env: GITHUB_ACCESS_TOKEN: ${{ secrets.GH_PACKAGES_PAT }} @@ -57,7 +80,7 @@ jobs: docker build --secret id=pat_token,env=GITHUB_ACCESS_TOKEN -t iris/api:latest -f Dockerfile . - name: Run Trivy Scan on Docker Image - uses: aquasecurity/trivy-action@0.28.0 + uses: aquasecurity/trivy-action@b6643a29fecd7f34b3597bc6acb0a98b03d33ff8 # v0.33.1 with: image-ref: "iris/api:latest" format: "table" diff --git a/.github/workflows/oss-checker.yml b/.github/workflows/oss-checker.yml index 2bf4025..e940929 100644 --- a/.github/workflows/oss-checker.yml +++ b/.github/workflows/oss-checker.yml @@ -4,11 +4,28 @@ name: Run OSS check helper on: + pull_request: + types: + - opened + - synchronize + - reopened + - labeled + - unlabeled workflow_dispatch: +permissions: + contents: read + jobs: oss-checks: + if: github.actor != 'dependabot[bot]' && + (github.event.repository.private == false || + (github.event.repository.private == true && + contains(join(github.event.pull_request.labels.*.name), 'oss-preparation'))) runs-on: ubuntu-latest + outputs: + summary-table: ${{ steps.summarise_results.outputs.summaryTable }} + has-results: ${{ steps.summarise_results.outputs.hasResults }} steps: - name: Fetch GitHub App token for target repo @@ -30,95 +47,377 @@ jobs: permission-contents: read - name: Checkout target repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: token: ${{ steps.target_token.outputs.token }} - name: Checkout OSPO source repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: repository: National-Digital-Twin/ospo-resources path: ospo-resources token: ${{ steps.ospo_token.outputs.token }} - name: Checkout archetypes source repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: repository: National-Digital-Twin/archetypes path: archetypes - name: Test for presence of OSS files and variation from templated content - run: | - missing_files=() - unchanged_files=() - - while IFS= read -r file || [ -n "$file" ]; do - # Skip comments and empty lines - if [[ -z "$file" || "$file" == \#* ]]; then - continue - fi - - target_path="$file" - archetypes_path="archetypes/$file" - - if [ ! -f "$target_path" ]; then - echo "Missing OSS file in target repository: $target_path" - missing_files+=("$file") - elif cmp -s "$target_path" "$archetypes_path"; then - echo "OSS file unchanged from archetypes template: $target_path" - unchanged_files+=("$file") - else - echo "OSS file present and different from the archetypes template: $target_path" - fi - done < ospo-resources/oss-checklist-files.txt - - echo "" - if [ ${#missing_files[@]} -ne 0 ]; then - echo "The following OSS required files are missing:" - printf '%s\n' "${missing_files[@]}" - fi - - if [ ${#unchanged_files[@]} -ne 0 ]; then - echo "The following OSS required files are unchanged from the archetypes template:" - printf '%s\n' "${unchanged_files[@]}" - fi - - if [ ${#missing_files[@]} -ne 0 ] || [ ${#unchanged_files[@]} -ne 0 ]; then - echo "OSS required file check failed." - exit 1 - else - echo "All OSS files are present and have been updated from their original templated content." - fi + uses: actions/github-script@v8 + with: + script: | + const { existsSync, readFileSync, writeFileSync } = require('fs'); + + const checklistPath = 'ospo-resources/oss-checklist-files.txt'; + const checklist = readFileSync(checklistPath, 'utf8') + .split('\n') + .map((line) => line.trim()) + .filter((line) => line && !line.startsWith('#')); + + const results = []; + const prHead = context.payload.pull_request?.head; + const repoFullName = prHead?.repo?.full_name ?? process.env.GITHUB_REPOSITORY ?? 'unknown/unknown'; + const commitSha = prHead?.sha ?? process.env.GITHUB_SHA ?? 'unknown'; + + for (const relativePath of checklist) { + const record = { + path: relativePath, + status: 'passed', + checks: { + exists: false, + differsFromTemplate: null, + }, + failureReasons: [], + }; + + const targetPath = relativePath; + const archetypePath = `archetypes/${relativePath}`; + + const fileExists = existsSync(targetPath); + record.checks.exists = fileExists; + + if (!fileExists) { + record.status = 'failed'; + record.failureReasons.push('missing or misnamed'); + core.info(`Missing or misnamed OSS file in target repository: ${targetPath}`); + results.push(record); + continue; + } + + const targetContent = readFileSync(targetPath, 'utf8'); + + if (existsSync(archetypePath)) { + const archetypeContent = readFileSync(archetypePath, 'utf8'); + const differsFromTemplate = targetContent !== archetypeContent; + record.checks.differsFromTemplate = differsFromTemplate; + + if (!differsFromTemplate) { + record.failureReasons.push('unchanged from archetype template'); + core.info(`OSS file unchanged from archetypes template: ${targetPath}`); + } else { + core.info(`OSS file present and different from the archetypes template: ${targetPath}`); + } + } else { + record.checks.differsFromTemplate = null; + core.info(`Template file missing for ${relativePath}; skipping template comparison.`); + } + + record.status = record.failureReasons.length > 0 ? 'failed' : 'passed'; + results.push(record); + } + + const passed = results.filter((result) => result.status === 'passed').length; + const failed = results.length - passed; + const score = results.length > 0 ? Number((passed / results.length).toFixed(2)) : 0; + + const report = { + runMetadata: { + checklistFile: checklistPath, + timestamp: new Date().toISOString(), + repo: repoFullName, + commit: commitSha, + checkType: 'OSS', + }, + files: results, + summary: { + total: results.length, + passed, + failed, + score, + }, + }; + + const reportPath = 'oss-results.json'; + writeFileSync(reportPath, JSON.stringify(report, null, 2)); + core.info(`Wrote checklist summary to ${reportPath}`); + + if (failed > 0) { + const failedFiles = results + .filter((result) => result.status === 'failed') + .map((result) => result.path); + core.setFailed(`The following files failed checks:\n${failedFiles.join('\n')}`); + } else { + core.info('All OSS files are present and have been updated from their original templated content.'); + } - name: Check GitHub template files are present - run: | - echo "Checking for pull request and issue template files" - - missing_templates=() - - files_to_check=( - ".github/PULL_REQUEST_TEMPLATE.md" - ".github/ISSUE_TEMPLATE/bug_report.md" - ".github/ISSUE_TEMPLATE/feature_request.md" - ) - - for file in "${files_to_check[@]}"; do - if [ ! -f "$file" ]; then - missing_templates+=("$file") - fi - done - - if [ ${#missing_templates[@]} -ne 0 ]; then - echo "" - echo "Required GitHub template files not found:" - printf ' - %s\n' "${missing_templates[@]}" - echo "" - echo "These files help improve project collaboration and are considered best practice." - echo "These need to be included in repository contents to improve the developer and repository consumer experience." - - # Fail the job - echo "Missing required GitHub template files." - exit 1 - else - echo "Required pull request and issue template files present." - fi + uses: actions/github-script@v8 + if: success() || failure() + with: + script: | + const { existsSync, writeFileSync } = require('fs'); + + core.info('Checking for pull request and issue template files'); + + const filesToCheck = [ + '.github/PULL_REQUEST_TEMPLATE.md', + '.github/ISSUE_TEMPLATE/bug_report.md', + '.github/ISSUE_TEMPLATE/feature_request.md', + ]; + + const results = filesToCheck.map((filePath) => { + const exists = existsSync(filePath); + return { + path: filePath, + status: exists ? 'passed' : 'failed', + checks: { + exists, + differsFromTemplate: null, + }, + failureReasons: exists ? [] : ['missing or misnamed'], + }; + }); + + const passed = results.filter((result) => result.status === 'passed').length; + const failed = results.length - passed; + const score = results.length > 0 ? Number((passed / results.length).toFixed(2)) : 0; + + const prHead = context.payload.pull_request?.head; + const report = { + runMetadata: { + timestamp: new Date().toISOString(), + repo: prHead?.repo?.full_name ?? process.env.GITHUB_REPOSITORY ?? 'unknown/unknown', + commit: prHead?.sha ?? process.env.GITHUB_SHA ?? 'unknown', + checkType: 'template', + }, + files: results, + summary: { + total: results.length, + passed, + failed, + score, + }, + }; + + const reportPath = 'template-results.json'; + writeFileSync(reportPath, JSON.stringify(report, null, 2)); + core.info(`Wrote template checklist summary to ${reportPath}`); + + if (failed > 0) { + const missingTemplates = results + .filter((result) => result.status === 'failed') + .map((result) => result.path); + + core.info(''); + core.info('Required GitHub template files were not found or did not match expected casing:'); + missingTemplates.forEach((file) => core.info(` - ${file}`)); + core.info(''); + core.info('These files help improve project collaboration and are considered best practice.'); + core.info('These need to be included in repository contents to improve the developer and repository consumer experience.'); + core.setFailed('Missing or misnamed GitHub template files.'); + } else { + core.info('Required pull request and issue template files present.'); + } + + - name: Generate summary + id: summarise_results + if: always() + uses: actions/github-script@v8 + with: + script: | + const { existsSync, readFileSync } = require('fs'); + + const reportFiles = [ + 'oss-results.json', + 'template-results.json', + ]; + + const reports = reportFiles + .filter((reportPath) => { + const present = existsSync(reportPath); + if (!present) { + core.info(`Summary step skipping missing report: ${reportPath}`); + } + return present; + }) + .map((reportPath) => JSON.parse(readFileSync(reportPath, 'utf8'))); + + if (reports.length === 0) { + core.info('No report files found; skipping combined summary.'); + core.setOutput('hasResults', 'false'); + return; + } + + const allResults = reports.flatMap((report) => + report.files.map((file) => ({ + ...file, + category: report.runMetadata?.checkType ?? 'unknown', + repo: report.runMetadata?.repo ?? process.env.GITHUB_REPOSITORY ?? 'unknown/unknown', + commit: report.runMetadata?.commit ?? process.env.GITHUB_SHA ?? 'unknown', + })), + ); + + const combinedTableMarkdown = [ + '| 📄 File | ✅ Result | 🧾 Details |', + '| :--- | :---: | :--- |', + ...allResults.map((result) => { + const href = `https://github.com/${result.repo}/blob/${result.commit}/${result.path}`; + const details = result.failureReasons.length > 0 + ? result.failureReasons.join('; ') + : 'Compliant'; + const statusLabel = result.status === 'passed' ? '🟢 Pass' : '🔴 Fail'; + return `| [${result.path}](${href}) | ${statusLabel} | ${details} |`; + }), + ].join('\n'); + + const total = allResults.length; + const passed = allResults.filter((result) => result.status === 'passed').length; + const failed = total - passed; + const score = total > 0 ? (passed / total) * 100 : 0; + const summary = { total, passed, failed, score }; + + const overallStatus = summary.failed === 0 + ? '🎉 Overall status: PASS (all files compliant).' + : '⚠️ Overall status: FAIL (see table below for details).'; + + const summaryMarkdown = [ + '| 📊 Total Files | 🟢 Passed | 🔴 Failed | 🧮 Score |', + '| ---: | ---: | ---: | ---: |', + `| ${summary.total} | ${summary.passed} | ${summary.failed} | ${summary.score.toFixed(0)}% |` + ].join('\n'); + + const prHead = context.payload.pull_request?.head; + const repoFullName = prHead?.repo?.full_name ?? process.env.GITHUB_REPOSITORY ?? 'unknown/unknown'; + const fullSha = prHead?.sha ?? process.env.GITHUB_SHA ?? ''; + const shortSha = fullSha?.slice(0, 7) ?? 'unknown'; + const commitUrl = fullSha + ? `https://github.com/${repoFullName}/commit/${fullSha}` + : null; + const commitLine = commitUrl + ? `Results from commit [\`${shortSha}\`](${commitUrl}).` + : `Results from commit \`${shortSha}\`.`; + + await core.summary + .addRaw('# OSS Check Results ⚙️\n', true) + .addRaw(`\n${combinedTableMarkdown}\n`, true) + .addRaw('\n# Summary 🏁\n', true) + .addRaw(`\n${overallStatus}\n`, true) + .addRaw(`\n${summaryMarkdown}\n`, true) + .addRaw(`\n${commitLine}\n`, true) + .write(); + + core.setOutput('hasResults', 'true'); + core.setOutput('summaryTable', summaryMarkdown); + + if (summary.failed > 0) { + core.setFailed('OSS checks detected one or more failing files.'); + } + + - name: Upload OSS result artifacts + if: ${{ steps.summarise_results.outputs.hasResults == 'true' }} + uses: actions/upload-artifact@v6 + with: + name: oss-checks-${{ github.run_id }} + retention-days: 30 + path: | + oss-results.json + template-results.json + + comment-on-results: + needs: oss-checks + if: >- + always() && + github.event_name == 'pull_request' && + needs.oss-checks.outputs.has-results == 'true' + runs-on: ubuntu-latest + + permissions: + pull-requests: write + + steps: + - name: Comment with OSS summary + uses: actions/github-script@v8 + env: + SUMMARY_TABLE: ${{ needs.oss-checks.outputs.summary-table }} + JOB_RESULT: ${{ needs.oss-checks.result }} + with: + script: | + const { owner, repo } = context.repo; + const prNumber = context.payload.pull_request?.number; + + if (!prNumber) { + core.info('No pull request context; skipping comment step.'); + return; + } + + const jobSummaryUrl = `https://github.com/${owner}/${repo}/actions/runs/${context.runId}`; + const prHead = context.payload.pull_request?.head; + const headCommitSha = prHead?.sha ?? process.env.GITHUB_SHA ?? ''; + const shortSha = headCommitSha ? headCommitSha.slice(0, 7) : 'unknown'; + const runResult = process.env.JOB_RESULT?.toLowerCase() ?? ''; + const isFailure = runResult === 'failure'; + + const marker = ''; + const heading = isFailure + ? '## ⚠️ OSS Checks Failed' + : '## ✅ OSS Checks Passed'; + const narration = isFailure + ? 'One or more OSS checks failed in this run.' + : 'All tracked OSS checks passed in this run.'; + + const bodySections = [ + heading, + narration, + process.env.SUMMARY_TABLE, + `Results from commit ${shortSha}, view the full [job summary↗️](${jobSummaryUrl}) for detailed results.` + ]; + + const existingComments = await github.paginate( + github.rest.issues.listComments, + { + owner, + repo, + issue_number: prNumber, + per_page: 100, + }, + ); + + const previous = existingComments.find((comment) => + comment.body?.includes(marker), + ); + + if(previous) { + bodySections.push(':recycle: This comment has been updated with latest results.'); + } + + const body = `${marker}\n${bodySections.join('\n\n')}\n${marker}`; + + if (previous) { + core.info(`Updating existing OSS summary comment (${previous.id}).`); + await github.rest.issues.updateComment({ + owner, + repo, + comment_id: previous.id, + body, + }); + } else { + core.info('Creating new OSS summary comment.'); + await github.rest.issues.createComment({ + owner, + repo, + issue_number: prNumber, + body, + }); + } diff --git a/.github/workflows/publish-github-release.yml b/.github/workflows/publish-github-release.yml index 23a5669..90ff017 100644 --- a/.github/workflows/publish-github-release.yml +++ b/.github/workflows/publish-github-release.yml @@ -57,7 +57,7 @@ jobs: needs: [versioning] steps: - name: Checkout Code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Generate SPDX SBOM run: | @@ -72,7 +72,7 @@ jobs: echo "$api_response" | jq '.sbom' > sbom.spdx.json - name: Upload SBOM Artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: sbom path: sbom.spdx.json @@ -83,7 +83,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 with: fetch-depth: 0 @@ -100,7 +100,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download SBOM Artifact - uses: actions/download-artifact@v4 + uses: actions/download-artifact@v7 with: name: sbom diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..4eadc01 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,9 @@ +repos: + - repo: local + hooks: + - id: trufflehog + name: TruffleHog + description: Detect secrets in your data. + entry: bash -c 'docker run --rm -v "$(pwd):/workdir" -i --rm trufflesecurity/trufflehog:latest git file:///workdir --since-commit HEAD --results=verified,unknown --fail' + language: system + stages: ["pre-commit", "pre-push"] diff --git a/CHANGELOG.md b/CHANGELOG.md index a8e3268..467fbee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,74 @@ This project follows **Semantic Versioning (SemVer)** ([semver.org](https://semv - **Build metadata** – If needed, use `+build` (e.g., `2.1.0+20250314`). --- + +## [0.95.1] - 2026-01-14 + +- [NON-REQ]: updated indices to optimise for performance of the building feature charts + +## [0.95.0] - 2026-01-12 + +### Features + +- [NON-REQ]: added materialized view for analytics dashboard +- [DPAV-1779]: add endpoints for initial dashboard charts +- [DPAV-1779]: add endpoint for building fuel type chart +- [DPAV-1926]: updated and split the analytics view for dashboards +- [NON-REQ]: updated dashboard queries to reference renamed view +- [NON-REQ]: updated epc analytics view to include epc active field +- [NON-REQ]: added partial indices for base charts +- [NON-REQ]: added way to retrieve data for extreme weather chart +- [DPAV-1956]: updated view, query and routes to match extreme weather national dashboard spec +- [DPAV-1961]: added query and route to fetch data for the in date vs expired epc chart +- [DPAV-1951]: add covering index for historical EPC queries +- [DPAV-1951]: add active_snapshots to build_epc_analytics +- [DPAV-2060]: add database query timeout +- [DPAV-1951]: add building_epc_analytics_aggregates materialized view +- [DPAV-2060]: Ensure DEFAULT_QUERY_TIMEOUT is set with fallback +- [DPAV-1968]: add support for named areas dashboard +- [DPAV-1961]: support area filters on expired vs in-date epc +- [DPAV-1957]: add area columns to the extreme weather analytics view +- [DPAV-1970]: add active partial indices for building_epc_analytics table +- [DPAV-1970]: add composite partial index for fuel charts +- [DPAV-1963]: update dashboard building attributes endpoint +- [DPAV-1953]: add dashboard endpoint for epc over time chart +- [DPAV-2118]: combine Welsh regions +- [DPAV-1779]: New EPC charts +- [DPAV-1958] & [DPAV-1959]: add endpoints for grouped sap timeline charts +- [DPAV-2060]: reverted db query timeout back to 29 seconds + +### Bug fixes + +- [NON-REQ]: fixed down revision id to reference penultimate revision +- [NON-REQ]: added gpkg table to entrypoint script to fix data loading error +- [NON-REQ]: standardized index names across migrations +- [NON-REQ]: moved where statement to subquery to ngd attributes query +- [DPAV-2060]: fix execute_with_timeout reset +- [DPAV-1926]: changed the logic for geom matching to contains +- [DPAV-1926]: corrected the join logic in the sync region fk script + +## [0.94.4] - 2025-10-24 + +- Fixed and removed visual overlap of region polygons introduced in 0.94.3. + +## [0.94.3] - 2025-10-23 + +- Fixed issue with EPC analytics at the region level + +## [0.94.2] - 2025-10-22 + +- [DPAV-1922] Updated EPC-related queries to fetch latest EPC records. Also added migration to update view definition for EPC analytics. +- [DPAV-1922] New migration to reduce time taken by regions materialized view refresh + +## [0.94.1] - 2025-10-14 + +- [DPAV-1731] Added an `is_residential` flag to `iris.building` via Alembic migration to support distinguishing residential dwellings in downstream queries. +- Added OS NGD Buildings attributes to `/buildings/{uprn}`: roof material, solar panel presence, roof shape, and roof aspect areas (N, NE, E, SE, S, SW, W, NW, indeterminable). +- Added PostGIS fallback for NGD attributes when graph data is missing; extended mappers/DTOs accordingly. +- Included OS roof data in filterable buildings and filter summary; only include aspect directions with area > 0. +- New climate GeoJSON endpoints: `/data/climate/hot-summer-days`, `/data/climate/icing-days`, `/data/climate/wind-driven-rain`. +- Added underlying geometry data for Wales. + ## [0.92.1] - 2025-07-22 - Updated changelog with note for release 0.92.0 diff --git a/Dockerfile b/Dockerfile index fb33269..ea7b946 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,7 @@ ARG PIP_EXTRA_INDEX_URL RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cargo \ + gdal-bin \ git \ libffi-dev \ librdkafka-dev \ @@ -19,7 +20,8 @@ COPY requirements.txt . RUN --mount=type=secret,id=pat_token \ export GITHUB_ACCESS_TOKEN=$(cat /run/secrets/pat_token) && \ pip install --no-cache-dir --upgrade -r requirements.txt - +COPY developer-resources/load_gpkg_to_postgis.py load_gpkg_to_postgis.py COPY . . -CMD ["python", "api/main.py", "--host", "0.0.0.0"] +RUN chmod +x ./entrypoint.sh +ENTRYPOINT ["./entrypoint.sh"] \ No newline at end of file diff --git a/Makefile b/Makefile index 11bc4a5..00b593f 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,54 @@ -docker-image: - docker build --no-cache -t iris/write-api:latest . +docker-build: + docker build --no-cache --secret id=pat_token,env=GITHUB_ACCESS_TOKEN -t iris/write-api:latest . docker-run: - docker run --rm --name iris-write-api -e PORT=3010 -e DEV=True -e JENA_PROTOCOL=http -e JENA_URL=127.0.0.1 -e JENA_PORT:3030 -p 3010:3010 iris/write-api:latest + docker run -d --rm --name iris-write-api --network developer-resources_iris -e PORT=3010 -e DEV=True -e JENA_PROTOCOL=http -e JENA_URL=127.0.0.1 -e JENA_PORT:3030 -e DB_HOST=postgis -p 3010:3010 iris/write-api:latest run-api: - uvicorn api.main:app --reload --port 5021 + python developer-resources/sync_region_fks_dbu.py + python api/main.py test: - python -m pytest \ No newline at end of file + python -m pytest + +load-met-office-data: + MATERIALIZED_VIEW=iris.wind_driven_rain_projections_geojson TARGET_TABLE=wind_driven_rain_projections GPKG_SOURCE=https://services.arcgis.com/Lq3V5RFuTBC9I7kv/arcgis/rest/services/Annual_Index_of_Wind_Driven_Rain_Projections_5km/FeatureServer/replicafilescache/Annual_Index_of_Wind_Driven_Rain_Projections_5km_-6134910210859057092.gpkg GPKG_TABLE=Annual_Index_of_Wind_Driven_Rain___Projections__5km_ python developer-resources/load_gpkg_to_postgis.py + MATERIALIZED_VIEW=iris.icing_days_geojson TARGET_TABLE=annual_count_of_icing_days_1991_2020 GPKG_SOURCE=https://services.arcgis.com/Lq3V5RFuTBC9I7kv/arcgis/rest/services/Annual_Count_of_Icing_Days_1991_2020/FeatureServer/replicafilescache/Annual_Count_of_Icing_Days_1991_2020_5977951113111576455.gpkg GPKG_TABLE=annual_count_of_icing_days_1991_2020 python developer-resources/load_gpkg_to_postgis.py + MATERIALIZED_VIEW=iris.hot_summer_days_geojson TARGET_TABLE=annual_count_of_hot_summer_days_projections_12km GPKG_SOURCE=https://services.arcgis.com/Lq3V5RFuTBC9I7kv/arcgis/rest/services/Annual_Count_of_Hot_Days___Projections__12km_grid_/FeatureServer/replicafilescache/Annual_Count_of_Hot_Days___Projections__12km_grid__5151054028377652076.gpkg GPKG_TABLE=annual_count_of_hot_summer_days_projections_12km python developer-resources/load_gpkg_to_postgis.py + +load-epc-data: + JOIN_VIEW=iris.uk_ward DATA_VIEW=iris.uk_ward_epc_data MATERIALIZED_VIEW=iris.uk_ward_epc TARGET_TABLE=district_borough_unitary_ward GPKG_SOURCE=https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect GPKG_TABLE=district_borough_unitary_ward python developer-resources/load_gpkg_to_postgis.py + JOIN_VIEW=iris.uk_ward DATA_VIEW=iris.uk_ward_epc_data MATERIALIZED_VIEW=iris.uk_ward_epc TARGET_TABLE=unitary_electoral_division GPKG_SOURCE=https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect GPKG_TABLE=unitary_electoral_division python developer-resources/load_gpkg_to_postgis.py + JOIN_VIEW=iris.uk_region DATA_VIEW=iris.uk_region_epc_data MATERIALIZED_VIEW=iris.uk_region_epc TARGET_TABLE=scotland_and_wales_region GPKG_SOURCE=https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect GPKG_TABLE=scotland_and_wales_region python developer-resources/load_gpkg_to_postgis.py + JOIN_VIEW=iris.uk_region DATA_VIEW=iris.uk_region_epc_data MATERIALIZED_VIEW=iris.uk_region_epc TARGET_TABLE=english_region GPKG_SOURCE=https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect GPKG_TABLE=english_region python developer-resources/load_gpkg_to_postgis.py + DATA_VIEW=iris.district_borough_unitary_epc_data MATERIALIZED_VIEW=iris.district_borough_unitary_epc TARGET_TABLE=district_borough_unitary GPKG_SOURCE=https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect GPKG_TABLE=district_borough_unitary python developer-resources/load_gpkg_to_postgis.py + DATA_VIEW=iris.boundary_line_ceremonial_counties_epc_data MATERIALIZED_VIEW=iris.boundary_line_ceremonial_counties_epc TARGET_TABLE=boundary_line_ceremonial_counties GPKG_SOURCE=https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect GPKG_TABLE=boundary_line_ceremonial_counties python developer-resources/load_gpkg_to_postgis.py + +load-country: + TARGET_TABLE=country_region GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=country_region python developer-resources/load_gpkg_to_postgis.py + +load-counties: + TARGET_TABLE=boundary_line_ceremonial_counties GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=boundary_line_ceremonial_counties python developer-resources/load_gpkg_to_postgis.py + +load-districts: + TARGET_TABLE=district_borough_unitary GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=district_borough_unitary python developer-resources/load_gpkg_to_postgis.py + python developer-resources/sync_region_fks_dbu.py + +load-english-region: + TARGET_TABLE=english_region GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=english_region python developer-resources/load_gpkg_to_postgis.py + +load-scotland-and-wales-region: + TARGET_TABLE=scotland_and_wales_region GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=scotland_and_wales_region python developer-resources/load_gpkg_to_postgis.py + +load-wards: + TARGET_TABLE=district_borough_unitary_ward GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=district_borough_unitary_ward python developer-resources/load_gpkg_to_postgis.py + TARGET_TABLE=unitary_electoral_division GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=unitary_electoral_division python developer-resources/load_gpkg_to_postgis.py + +migrate: + alembic upgrade head + +iris-api-resources-up: + docker compose -f developer-resources/docker-compose.yml up -d + +iris-api-resources-down: + docker compose -f developer-resources/docker-compose.yml down diff --git a/api/alembic/versions/0cba3d41c22e_001_create_building_table.py b/api/alembic/versions/0cba3d41c22e_001_create_building_table.py index ea3b6d3..dbdd7b9 100644 --- a/api/alembic/versions/0cba3d41c22e_001_create_building_table.py +++ b/api/alembic/versions/0cba3d41c22e_001_create_building_table.py @@ -37,7 +37,7 @@ def upgrade() -> None: op.execute( """ - CREATE INDEX point_ix + CREATE INDEX building_point_idx ON iris.building USING GIST (point); """ diff --git a/api/alembic/versions/0e6126841f0c_015_update_buildings_is_residential.py b/api/alembic/versions/0e6126841f0c_015_update_buildings_is_residential.py new file mode 100644 index 0000000..3ad71af --- /dev/null +++ b/api/alembic/versions/0e6126841f0c_015_update_buildings_is_residential.py @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""update_buildings_is_residential + +Revision ID: 0e6126841f0c +Revises: cc816c325e2a +Create Date: 2025-09-17 13:39:21.303885 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '0e6126841f0c' +down_revision: Union[str, None] = 'cc816c325e2a' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + op.execute(sa.text("SET LOCAL application_name = 'alembic_015_is_residential';")) + + + op.execute( + """ + ALTER TABLE iris.building ADD COLUMN "is_residential" BOOLEAN not null DEFAULT false; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute(sa.text("SET LOCAL application_name = 'alembic_015_is_residential';")) + + op.execute( + """ + ALTER TABLE iris.building DROP COLUMN "is_residential"; + """ + ) + \ No newline at end of file diff --git a/api/alembic/versions/10f244f0a95e_012_alter_structure_unit_add_roof_attributes.py b/api/alembic/versions/10f244f0a95e_012_alter_structure_unit_add_roof_attributes.py new file mode 100644 index 0000000..7d49c6e --- /dev/null +++ b/api/alembic/versions/10f244f0a95e_012_alter_structure_unit_add_roof_attributes.py @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""012_alter_structure_unit_add_roof_attributes + +Revision ID: 10f244f0a95e +Revises: d99eb3e1e4ab +Create Date: 2025-08-21 10:31:46.144023 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '10f244f0a95e' +down_revision: Union[str, None] = 'd99eb3e1e4ab' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + """ Alter table DROP COLUMN IF EXISTS.""" + op.execute( + """ + ALTER TABLE iris.structure_unit + ADD COLUMN has_roof_solar_panels BOOLEAN NULL, + ADD COLUMN roof_material TEXT NULL, + ADD COLUMN roof_aspect_area_facing_north_m2 DOUBLE PRECISION NULL, + ADD COLUMN roof_aspect_area_facing_east_m2 DOUBLE PRECISION NULL, + ADD COLUMN roof_aspect_area_facing_south_m2 DOUBLE PRECISION NULL, + ADD COLUMN roof_aspect_area_facing_west_m2 DOUBLE PRECISION NULL, + ADD COLUMN roof_aspect_area_facing_north_east_m2 DOUBLE PRECISION NULL, + ADD COLUMN roof_aspect_area_facing_south_east_m2 DOUBLE PRECISION NULL, + ADD COLUMN roof_aspect_area_facing_south_west_m2 DOUBLE PRECISION NULL, + ADD COLUMN roof_aspect_area_facing_north_west_m2 DOUBLE PRECISION NULL, + ADD COLUMN roof_aspect_area_indeterminable_m2 DOUBLE PRECISION NULL; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + """ Alter table DROP COLUMN IF EXISTSs.""" + op.execute( + """ + ALTER TABLE iris.structure_unit + DROP COLUMN IF EXISTS has_roof_solar_panels, + DROP COLUMN IF EXISTS roof_material, + DROP COLUMN IF EXISTS roof_aspect_area_facing_north_m2, + DROP COLUMN IF EXISTS roof_aspect_area_facing_east_m2, + DROP COLUMN IF EXISTS roof_aspect_area_facing_south_m2, + DROP COLUMN IF EXISTS roof_aspect_area_facing_west_m2, + DROP COLUMN IF EXISTS roof_aspect_area_facing_north_east_m2, + DROP COLUMN IF EXISTS roof_aspect_area_facing_south_east_m2, + DROP COLUMN IF EXISTS roof_aspect_area_facing_south_west_m2, + DROP COLUMN IF EXISTS roof_aspect_area_facing_north_west_m2, + DROP COLUMN IF EXISTS roof_aspect_area_indeterminable_m2; + """ + ) + diff --git a/api/alembic/versions/2215b32f49a9_008_create_uk_ward_table_view.py b/api/alembic/versions/2215b32f49a9_008_create_uk_ward_table_view.py new file mode 100644 index 0000000..13ab0fb --- /dev/null +++ b/api/alembic/versions/2215b32f49a9_008_create_uk_ward_table_view.py @@ -0,0 +1,245 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""008_create_uk_ward_table_view + +Revision ID: 2215b32f49a9 +Revises: a75353f01fa0 +Create Date: 2025-08-19 16:01:44.272631 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "2215b32f49a9" +down_revision: Union[str, None] = "a75353f01fa0" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + """ Create lookup table building_epc""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_uprn_idx ON iris.building(uprn); + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_epc AS + SELECT a.uprn, b.epc_rating, a.point + FROM iris.building a + LEFT JOIN iris.epc_assessment b + ON a.uprn = b.uprn; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_idx + ON iris.building_epc + USING GIST (point); + """ + ) + """ Create id for iris.district_borough_unitary_ward""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.district_borough_unitary_ward_fid_seq1 + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + + """ Create table for iris.district_borough_unitary_ward""" + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.district_borough_unitary_ward + ( + fid integer NOT NULL DEFAULT nextval('iris.district_borough_unitary_ward_fid_seq1'::regclass), + name character varying, + area_code character varying, + area_description character varying, + file_name character varying, + feature_serial_number integer, + collection_serial_number integer, + global_polygon_id integer, + admin_unit_id integer, + census_code character varying, + hectares double precision, + non_inland_area double precision, + area_type_code character varying, + area_type_description character varying, + non_area_type_code character varying, + non_area_type_description character varying, + geometry geometry(MultiPolygon,4326), + CONSTRAINT district_borough_unitary_ward_P PRIMARY KEY (fid) + ) + """ + ) + + """ Create geo index for district_borough_unitary_ward""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS district_borough_unitary_ward_geometry_idx + ON iris.district_borough_unitary_ward USING gist + (geometry) + TABLESPACE pg_default; + """ + ) + """ Create id for unitary_electoral_division""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.unitary_electoral_division_fid_seq1 + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + """ Create table for iris.unitary_electoral_division""" + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.unitary_electoral_division + ( + fid integer NOT NULL DEFAULT nextval('iris.unitary_electoral_division_fid_seq1'::regclass), + name character varying, + area_code character varying, + area_description character varying, + file_name character varying, + feature_serial_number integer, + collection_serial_number integer, + global_polygon_id integer, + admin_unit_id integer, + census_code character varying, + hectares double precision, + non_inland_area double precision, + area_type_code character varying, + area_type_description character varying, + non_area_type_code character varying, + non_area_type_description character varying, + geometry geometry(MultiPolygon,4326), + CONSTRAINT unitary_electoral_division_P PRIMARY KEY (fid) + ) + """ + ) + + """ Create geo index for unitary_electoral_division""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS unitary_electoral_division_geometry_idx + ON iris.unitary_electoral_division USING gist + (geometry) + TABLESPACE pg_default; + """ + ) + + """ Create table uk_ward""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_ward + AS + SELECT * FROM iris.district_borough_unitary_ward + UNION + SELECT * FROM iris.unitary_electoral_division; + """ + ) + """ Create geo index for uk_ward""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS uk_ward_geometry_idx + ON iris.uk_ward USING gist + (geometry) + TABLESPACE pg_default; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_ward_epc_data + AS + SELECT + b.name, + COUNT (a.point) AS total, + COUNT(CASE WHEN a.epc_rating = 'A' THEN 1 END) AS epc_a, + COUNT(CASE WHEN a.epc_rating = 'B' THEN 1 END) AS epc_b, + COUNT(CASE WHEN a.epc_rating = 'C' THEN 1 END) AS epc_c, + COUNT(CASE WHEN a.epc_rating = 'D' THEN 1 END) AS epc_d, + COUNT(CASE WHEN a.epc_rating = 'E' THEN 1 END) AS epc_e, + COUNT(CASE WHEN a.epc_rating = 'F' THEN 1 END) AS epc_f, + COUNT(CASE WHEN a.epc_rating = 'G' THEN 1 END) AS epc_g, + COUNT(CASE WHEN a.epc_rating IS NULL THEN 1 END) AS epc_null, + b.geometry + FROM iris.building_epc a + LEFT JOIN iris.uk_ward b + ON ST_Intersects(b.geometry, a.point) + GROUP BY b.name, b.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_ward_epc + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.uk_ward_epc_data t + WITH NO DATA; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_ward_epc; + """ + ) + + op.execute( + """ + DROP INDEX IF EXISTS iris.uk_ward_geometry_idx; + """ + ) + + op.execute( + """ + DROP TABLE IF EXISTS iris.uk_ward; + """ + ) + op.execute( + """ + DROP TABLE IF EXISTS iris.district_borough_unitary_ward; + """ + ) + op.execute( + """ + DROP TABLE IF EXISTS iris.unitary_electoral_division; + """ + ) + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.uk_ward_objectid_seq; + """ + ) + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.district_borough_unitary_ward_fid_seq1; + """ + ) + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.unitary_electoral_division_fid_seq1; + """ + ) diff --git a/api/alembic/versions/2599ec4b20bd_004_create_tables_views_wind_driven_.py b/api/alembic/versions/2599ec4b20bd_004_create_tables_views_wind_driven_.py new file mode 100644 index 0000000..87a820d --- /dev/null +++ b/api/alembic/versions/2599ec4b20bd_004_create_tables_views_wind_driven_.py @@ -0,0 +1,150 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""004_create_tables_views_wind_driven_rain + +Revision ID: 2599ec4b20bd +Revises: d12ce7dc9019 +Create Date: 2025-08-11 17:38:06.036314 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "2599ec4b20bd" +down_revision: Union[str, None] = "d12ce7dc9019" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + """ Create table for wind-driven rain.""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.wind_driven_rain_projections_objectid_seq + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.wind_driven_rain_projections + ( + objectid integer NOT NULL DEFAULT nextval('iris.wind_driven_rain_projections_objectid_seq'::regclass), + wall_orientation integer, + wdr_baseline_lower double precision, + wdr_baseline_median double precision, + wdr_baseline_upper double precision, + wdr_20_lower double precision, + wdr_20_median double precision, + wdr_20_upper double precision, + wdr_40_lower double precision, + wdr_40_median double precision, + wdr_40_upper double precision, + x_coord double precision, + y_coord double precision, + shape geometry(MultiPolygon,4326), + CONSTRAINT wind_driven_rain_projections__pkey PRIMARY KEY (objectid) + ) + """ + ) + + op.execute( + """ + ALTER SEQUENCE iris.wind_driven_rain_projections_objectid_seq + OWNED BY iris.wind_driven_rain_projections.objectid; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS wind_driven_rain_projections_shape_idx + ON iris.wind_driven_rain_projections USING gist + (shape) + TABLESPACE pg_default; + """ + ) + + """Create views for wind-driven rain.""" + + op.execute( + """ + CREATE OR REPLACE VIEW iris.median_projections_per_shape + AS + SELECT x_coord, + y_coord, + max(shape::text) AS shape, + max(wdr_20_median) FILTER (WHERE wall_orientation = 0) AS wdr20_0, + max(wdr_40_median) FILTER (WHERE wall_orientation = 0) AS wdr40_0, + max(wdr_20_median) FILTER (WHERE wall_orientation = 45) AS wdr20_45, + max(wdr_40_median) FILTER (WHERE wall_orientation = 45) AS wdr40_45, + max(wdr_20_median) FILTER (WHERE wall_orientation = 90) AS wdr20_90, + max(wdr_40_median) FILTER (WHERE wall_orientation = 90) AS wdr40_90, + max(wdr_20_median) FILTER (WHERE wall_orientation = 135) AS wdr20_135, + max(wdr_40_median) FILTER (WHERE wall_orientation = 135) AS wdr40_135, + max(wdr_20_median) FILTER (WHERE wall_orientation = 180) AS wdr20_180, + max(wdr_40_median) FILTER (WHERE wall_orientation = 180) AS wdr40_180, + max(wdr_20_median) FILTER (WHERE wall_orientation = 225) AS wdr20_225, + max(wdr_40_median) FILTER (WHERE wall_orientation = 225) AS wdr40_225, + max(wdr_20_median) FILTER (WHERE wall_orientation = 270) AS wdr20_270, + max(wdr_40_median) FILTER (WHERE wall_orientation = 270) AS wdr40_270, + max(wdr_20_median) FILTER (WHERE wall_orientation = 315) AS wdr20_315, + max(wdr_40_median) FILTER (WHERE wall_orientation = 315) AS wdr40_315 + FROM iris.wind_driven_rain_projections + GROUP BY x_coord, y_coord; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.wind_driven_rain_projections_geojson + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(shape)::json, 'properties', to_jsonb(t.*) - 'geom'::text))) AS geojson + FROM iris.median_projections_per_shape t + WITH DATA; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.wind_driven_rain_projections_geojson; + """ + ) + + op.execute( + """ + DROP VIEW iris.median_projections_per_shape; + """ + ) + + op.execute( + """ + DROP INDEX IF EXISTS iris.wind_driven_rain_projections_shape_idx; + """ + ) + + op.execute( + """ + DROP TABLE IF EXISTS iris.wind_driven_rain_projections CASCADE; + """ + ) + + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.wind_driven_rain_projections_objectid_seq; + """ + ) diff --git a/api/alembic/versions/28c37c30e0e4_025_create_indices_for_base_charts.py b/api/alembic/versions/28c37c30e0e4_025_create_indices_for_base_charts.py new file mode 100644 index 0000000..743184d --- /dev/null +++ b/api/alembic/versions/28c37c30e0e4_025_create_indices_for_base_charts.py @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""025_create_indices_for_base_charts + +Revision ID: 28c37c30e0e4 +Revises: b929538f7ee1 +Create Date: 2025-10-30 16:28:17.527840 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "28c37c30e0e4" +down_revision: Union[str, None] = "b929538f7ee1" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_fuel_types_idx + ON iris.building_epc_analytics (type, fuel_type) + WHERE epc_active = true + AND type IS NOT NULL + AND fuel_type IS NOT NULL; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_epc_ratings_idx + ON iris.building_epc_analytics (epc_rating, region_name) + WHERE epc_active = true; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_attributes_idx + ON iris.building_epc_analytics ( + region_name, + has_roof_solar_panels, + window_glazing, + floor_construction, + roof_insulation_thickness, + roof_construction, + wall_construction + ) + WHERE epc_active = true; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + op.execute( + """ + DROP INDEX IF EXISTS building_epc_analytics_fuel_types_idx; + """ + ) + + op.execute( + """ + DROP INDEX IF EXISTS building_epc_analytics_epc_ratings_idx; + """ + ) + + op.execute( + """ + DROP INDEX IF EXISTS building_epc_analytics_region_attributes_idx; + """ + ) diff --git a/api/alembic/versions/29a78cfac78a_020_recreate_regions_view.py b/api/alembic/versions/29a78cfac78a_020_recreate_regions_view.py new file mode 100644 index 0000000..2b505c6 --- /dev/null +++ b/api/alembic/versions/29a78cfac78a_020_recreate_regions_view.py @@ -0,0 +1,167 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""020_recreate_regions_view + +Revision ID: 29a78cfac78a +Revises: f7639f884c24 +Create Date: 2025-10-23 15:45:36.986302 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "29a78cfac78a" +down_revision: Union[str, None] = "f7639f884c24" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + """ Add simplified geometry column to english_region table""" + op.execute( + """ + ALTER TABLE iris.english_region ADD COLUMN IF NOT EXISTS geom_simplified geometry; + """ + ) + + """ Update column to add simplified geometry""" + op.execute( + """ + UPDATE iris.english_region SET geom_simplified = ST_Simplify(geometry, 0.0001); + """ + ) + + """ Create index for simplified geometry column""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS english_region_geom_simplified_idx ON iris.english_region USING gist (geom_simplified); + """ + ) + + """ Recreate materialised views for regions""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc_data; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc_data + AS + WITH regions AS ( + SELECT name, geom_simplified AS geometry + FROM iris.english_region + UNION ALL + SELECT name, ST_Simplify(geometry, 0.0001) AS geometry + FROM iris.country_region + WHERE name = 'Wales' + ) + SELECT + r.name AS name, + SUM(e.total) AS total, + SUM(e.epc_a) AS epc_a, + SUM(e.epc_b) AS epc_b, + SUM(e.epc_c) AS epc_c, + SUM(e.epc_d) AS epc_d, + SUM(e.epc_e) AS epc_e, + SUM(e.epc_f) AS epc_f, + SUM(e.epc_g) AS epc_g, + SUM(e.epc_null) AS epc_null, + r.geometry + FROM regions r + JOIN iris.district_borough_unitary_epc_data e + ON ST_Contains(r.geometry, ST_Simplify(e.geometry, 0.0001)) + GROUP BY r.name, r.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.uk_region_epc_data t + WITH NO DATA; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + """ Recreate materialised views for regions""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc_data; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc_data + AS + SELECT + b.name, + COUNT (a.point) AS total, + COUNT(CASE WHEN a.epc_rating = 'A' THEN 1 END) AS epc_a, + COUNT(CASE WHEN a.epc_rating = 'B' THEN 1 END) AS epc_b, + COUNT(CASE WHEN a.epc_rating = 'C' THEN 1 END) AS epc_c, + COUNT(CASE WHEN a.epc_rating = 'D' THEN 1 END) AS epc_d, + COUNT(CASE WHEN a.epc_rating = 'E' THEN 1 END) AS epc_e, + COUNT(CASE WHEN a.epc_rating = 'F' THEN 1 END) AS epc_f, + COUNT(CASE WHEN a.epc_rating = 'G' THEN 1 END) AS epc_g, + COUNT(CASE WHEN a.epc_rating IS NULL THEN 1 END) AS epc_null, + b.geometry + FROM iris.building_epc a + LEFT JOIN iris.english_region b + ON ST_Intersects(b.geometry, a.point) + GROUP BY b.name, b.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.uk_region_epc_data t + WITH NO DATA; + """ + ) + + op.execute( + """ + DROP INDEX IF EXISTS english_region_geom_simplified_idx; + """ + ) + + op.execute( + """ + ALTER TABLE iris.english_region DROP COLUMN IF EXISTS geom_simplified geometry; + """ + ) diff --git a/api/alembic/versions/37989279ce33_016_add_region_link_to_district_borough_unitary.py b/api/alembic/versions/37989279ce33_016_add_region_link_to_district_borough_unitary.py new file mode 100644 index 0000000..651780b --- /dev/null +++ b/api/alembic/versions/37989279ce33_016_add_region_link_to_district_borough_unitary.py @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""add region link to district_borough_unitary + +Revision ID: 37989279ce33 +Revises: 0e6126841f0c +Create Date: 2025-10-02 12:04:32.313209 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "37989279ce33" +down_revision: Union[str, None] = "0e6126841f0c" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Add english_region_fid and scotland_and_wales_region_fid columns to district borough unitary table + and assign it a value based on location + """ + op.execute( + """ + ALTER TABLE iris.district_borough_unitary + ADD COLUMN english_region_fid INTEGER, + ADD COLUMN scotland_and_wales_region_fid INTEGER, + ADD CONSTRAINT english_region_fid_fk FOREIGN KEY(english_region_fid) REFERENCES iris.english_region(fid), + ADD CONSTRAINT scotland_and_wales_region_fid_fk FOREIGN KEY(scotland_and_wales_region_fid) REFERENCES iris.scotland_and_wales_region(fid); + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + op.execute( + """ + ALTER TABLE iris.district_borough_unitary + DROP CONSTRAINT english_region_fid_fk, + DROP CONSTRAINT scotland_and_wales_region_fid_fk; + """ + ) + + op.execute( + """ + ALTER TABLE iris.district_borough_unitary + DROP COLUMN english_region_fid, + DROP COLUMN scotland_and_wales_region_fid; + """ + ) diff --git a/api/alembic/versions/3e9303a52bf0_023_update_and_split_analytics_view.py b/api/alembic/versions/3e9303a52bf0_023_update_and_split_analytics_view.py new file mode 100644 index 0000000..8bee50c --- /dev/null +++ b/api/alembic/versions/3e9303a52bf0_023_update_and_split_analytics_view.py @@ -0,0 +1,287 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""023_update_and_split_analytics_view + +Revision ID: 3e9303a52bf0 +Revises: 47ed09fc85cc +Create Date: 2025-10-24 13:57:09.344870 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "3e9303a52bf0" +down_revision: Union[str, None] = "47ed09fc85cc" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _create_indices(view: str) -> None: + op.execute( + f""" + CREATE INDEX IF NOT EXISTS {view}_uprn_idx ON iris.{view}(uprn); + """ + ) + + op.execute( + f""" + CREATE INDEX IF NOT EXISTS {view}_point_idx ON iris.{view} USING GIST(point); + """ + ) + + op.execute( + f""" + CREATE INDEX IF NOT EXISTS {view}_region_name_idx ON iris.{view}(region_name); + """ + ) + + op.execute( + f""" + CREATE INDEX IF NOT EXISTS {view}_lodgement_date_idx ON iris.{view}(lodgement_date); + """ + ) + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.analytics; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_epc_analytics + AS ( + SELECT + b.uprn, + b.point, + b.is_residential, + ea.lodgement_date, + ea.epc_rating, + ea.sap_rating, + ea.expiry_date, + COALESCE(su_epc.type, su_build.type) AS type, + COALESCE(su_epc.built_form, su_build.built_form) AS built_form, + COALESCE(su_epc.fuel_type, su_build.fuel_type) AS fuel_type, + COALESCE(su_epc.window_glazing, su_build.window_glazing) AS window_glazing, + COALESCE(su_epc.wall_construction, su_build.wall_construction) AS wall_construction, + COALESCE(su_epc.wall_insulation, su_build.wall_insulation) AS wall_insulation, + COALESCE(su_epc.roof_construction, su_build.roof_construction) AS roof_construction, + COALESCE(su_epc.roof_insulation, su_build.roof_insulation) AS roof_insulation, + COALESCE(su_epc.roof_insulation_thickness, su_build.roof_insulation_thickness) AS roof_insulation_thickness, + COALESCE(su_epc.floor_construction, su_build.floor_construction) AS floor_construction, + COALESCE(su_epc.floor_insulation, su_build.floor_insulation) AS floor_insulation, + COALESCE(su_epc.has_roof_solar_panels, su_build.has_roof_solar_panels) AS has_roof_solar_panels, + COALESCE(su_epc.roof_material, su_build.roof_material) AS roof_material, + COALESCE(su_epc.roof_aspect_area_facing_north_m2, su_build.roof_aspect_area_facing_north_m2) AS roof_aspect_area_facing_north_m2, + COALESCE(su_epc.roof_aspect_area_facing_east_m2, su_build.roof_aspect_area_facing_east_m2) AS roof_aspect_area_facing_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_m2, su_build.roof_aspect_area_facing_south_m2) AS roof_aspect_area_facing_south_m2, + COALESCE(su_epc.roof_aspect_area_facing_west_m2, su_build.roof_aspect_area_facing_west_m2) AS roof_aspect_area_facing_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_east_m2, su_build.roof_aspect_area_facing_north_east_m2) AS roof_aspect_area_facing_north_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_east_m2, su_build.roof_aspect_area_facing_south_east_m2) AS roof_aspect_area_facing_south_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_west_m2, su_build.roof_aspect_area_facing_south_west_m2) AS roof_aspect_area_facing_south_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_west_m2, su_build.roof_aspect_area_facing_north_west_m2) AS roof_aspect_area_facing_north_west_m2, + COALESCE(su_epc.roof_aspect_area_indeterminable_m2, su_build.roof_aspect_area_indeterminable_m2) AS roof_aspect_area_indeterminable_m2, + COALESCE(su_epc.roof_shape, su_build.roof_shape) AS roof_shape, + COALESCE(er.name, sawr.name) AS region_name, + blcc.name AS county_name, + dbu.name AS district_name, + COALESCE(dbuw.name, ued.name) AS ward_name + FROM iris.building b + LEFT JOIN iris.epc_assessment ea ON ea.uprn = b.uprn + LEFT JOIN iris.structure_unit su_epc ON su_epc.epc_assessment_id = ea.id + LEFT JOIN iris.structure_unit su_build + ON su_build.uprn = b.uprn + AND su_build.epc_assessment_id IS NULL + AND ea.id IS NULL + JOIN iris.boundary_line_ceremonial_counties blcc ON ST_INTERSECTS(blcc.geometry, b.point) + JOIN iris.district_borough_unitary dbu ON ST_INTERSECTS(dbu.geometry, b.point) + LEFT JOIN iris.district_borough_unitary_ward dbuw ON ST_INTERSECTS(dbuw.geometry, b.point) + LEFT JOIN iris.unitary_electoral_division ued ON ST_INTERSECTS(ued.geometry, b.point) + LEFT JOIN iris.english_region er ON er.fid = dbu.english_region_fid + LEFT JOIN iris.scotland_and_wales_region sawr ON sawr.fid = dbu.scotland_and_wales_region_fid + WHERE su_epc.epc_assessment_id IS NOT NULL OR su_build.uprn IS NOT NULL + ) + WITH NO DATA; + """ + ) + + _create_indices("building_epc_analytics") + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_county_name_idx ON iris.building_epc_analytics(county_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_district_name_idx ON iris.building_epc_analytics(district_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_ward_name_idx ON iris.building_epc_analytics(ward_name); + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_weather_analytics + AS ( + WITH wdrp_0 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 0 + ), + wdrp_45 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 45 + ), + wdrp_90 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 90 + ), + wdrp_135 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 135 + ), + wdrp_180 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 180 + ), + wdrp_225 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 225 + ), + wdrp_270 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 270 + ), + wdrp_315 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 315 + ) + SELECT + b.uprn, + b.point, + acoid.icingdays as icing_days, + wdrp_0.wdr_40_median as wdr_40_median_0, + wdrp_45.wdr_40_median as wdr_40_median_45, + wdrp_90.wdr_40_median as wdr_40_median_90, + wdrp_135.wdr_40_median as wdr_40_median_135, + wdrp_180.wdr_40_median as wdr_40_median_180, + wdrp_225.wdr_40_median as wdr_40_median_225, + wdrp_270.wdr_40_median as wdr_40_median_270, + wdrp_315.wdr_40_median as wdr_40_median_315, + acohdp.hsd_40_median + FROM iris.building b + JOIN iris.annual_count_of_icing_days_1991_2020 acoid ON ST_CONTAINS(acoid.shape, b.point) + JOIN wdrp_0 ON ST_CONTAINS(wdrp_0.shape, b.point) + JOIN wdrp_45 ON ST_CONTAINS(wdrp_45.shape, b.point) + JOIN wdrp_90 ON ST_CONTAINS(wdrp_90.shape, b.point) + JOIN wdrp_135 ON ST_CONTAINS(wdrp_135.shape, b.point) + JOIN wdrp_180 ON ST_CONTAINS(wdrp_180.shape, b.point) + JOIN wdrp_225 ON ST_CONTAINS(wdrp_225.shape, b.point) + JOIN wdrp_270 ON ST_CONTAINS(wdrp_270.shape, b.point) + JOIN wdrp_315 ON ST_CONTAINS(wdrp_315.shape, b.point) + JOIN iris.annual_count_of_hot_summer_days_projections_12km acohdp ON ST_CONTAINS(acohdp.shape, b.point) + ) + WITH NO DATA; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_weather_analytics_uprn_idx ON iris.building_weather_analytics(uprn); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_weather_analytics_point_idx ON iris.building_weather_analytics USING GIST(point); + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_weather_analytics; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.analytics + AS ( + SELECT + b.uprn, + b.point, + b.is_residential, + ea.lodgement_date, + ea.epc_rating, + ea.sap_rating, + ea.expiry_date, + COALESCE(su_epc.type, su_build.type) AS type, + COALESCE(su_epc.built_form, su_build.built_form) AS built_form, + COALESCE(su_epc.fuel_type, su_build.fuel_type) AS fuel_type, + COALESCE(su_epc.window_glazing, su_build.window_glazing) AS window_glazing, + COALESCE(su_epc.wall_construction, su_build.wall_construction) AS wall_construction, + COALESCE(su_epc.wall_insulation, su_build.wall_insulation) AS wall_insulation, + COALESCE(su_epc.roof_construction, su_build.roof_construction) AS roof_construction, + COALESCE(su_epc.roof_insulation, su_build.roof_insulation) AS roof_insulation, + COALESCE(su_epc.roof_insulation_thickness, su_build.roof_insulation_thickness) AS roof_insulation_thickness, + COALESCE(su_epc.floor_construction, su_build.floor_construction) AS floor_construction, + COALESCE(su_epc.floor_insulation, su_build.floor_insulation) AS floor_insulation, + COALESCE(su_epc.has_roof_solar_panels, su_build.has_roof_solar_panels) AS has_roof_solar_panels, + COALESCE(su_epc.roof_material, su_build.roof_material) AS roof_material, + COALESCE(su_epc.roof_aspect_area_facing_north_m2, su_build.roof_aspect_area_facing_north_m2) AS roof_aspect_area_facing_north_m2, + COALESCE(su_epc.roof_aspect_area_facing_east_m2, su_build.roof_aspect_area_facing_east_m2) AS roof_aspect_area_facing_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_m2, su_build.roof_aspect_area_facing_south_m2) AS roof_aspect_area_facing_south_m2, + COALESCE(su_epc.roof_aspect_area_facing_west_m2, su_build.roof_aspect_area_facing_west_m2) AS roof_aspect_area_facing_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_east_m2, su_build.roof_aspect_area_facing_north_east_m2) AS roof_aspect_area_facing_north_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_east_m2, su_build.roof_aspect_area_facing_south_east_m2) AS roof_aspect_area_facing_south_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_west_m2, su_build.roof_aspect_area_facing_south_west_m2) AS roof_aspect_area_facing_south_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_west_m2, su_build.roof_aspect_area_facing_north_west_m2) AS roof_aspect_area_facing_north_west_m2, + COALESCE(su_epc.roof_aspect_area_indeterminable_m2, su_build.roof_aspect_area_indeterminable_m2) AS roof_aspect_area_indeterminable_m2, + COALESCE(su_epc.roof_shape, su_build.roof_shape) AS roof_shape, + COALESCE(er.name, sawr.name) AS region_name + FROM iris.building b + LEFT JOIN iris.epc_assessment ea ON ea.uprn = b.uprn + LEFT JOIN iris.structure_unit su_epc ON su_epc.epc_assessment_id = ea.id + LEFT JOIN iris.structure_unit su_build + ON su_build.uprn = b.uprn + AND su_build.epc_assessment_id IS NULL + AND ea.id IS NULL + JOIN iris.district_borough_unitary dbu ON ST_INTERSECTS(dbu.geometry, b.point) + LEFT JOIN iris.english_region er ON er.fid = dbu.english_region_fid + LEFT JOIN iris.scotland_and_wales_region sawr ON sawr.fid = dbu.scotland_and_wales_region_fid + WHERE su_epc.epc_assessment_id IS NOT NULL OR su_build.uprn IS NOT NULL + ) + WITH NO DATA; + """ + ) + + _create_indices("analytics") diff --git a/api/alembic/versions/4137d5faffb3_032_recreate_extreme_weather_view.py b/api/alembic/versions/4137d5faffb3_032_recreate_extreme_weather_view.py new file mode 100644 index 0000000..7330515 --- /dev/null +++ b/api/alembic/versions/4137d5faffb3_032_recreate_extreme_weather_view.py @@ -0,0 +1,256 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""032_recreate_extreme_weather_view + +Revision ID: 4137d5faffb3 +Revises: 86f3db5e9bbe +Create Date: 2025-11-12 14:57:16.032597 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = '4137d5faffb3' +down_revision: Union[str, None] = '86f3db5e9bbe' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Recreate building_extreme_weather_analytics with geographic area columns.""" + + op.execute(""" + DROP MATERIALIZED VIEW IF EXISTS iris.building_extreme_weather_analytics CASCADE; + """) + + op.execute(""" + CREATE MATERIALIZED VIEW iris.building_extreme_weather_analytics AS ( + WITH buildings_affected_by_icing_days AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE icing_days > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY icing_days + ) + FROM iris.building_weather_analytics + ) + ), + buildings_affected_by_hsds AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE hsd_40_median > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY hsd_40_median + ) + FROM iris.building_weather_analytics + ) + ), + buildings_affected_by_wdrp AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE wdr_40_median_0 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_0 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_45 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_45 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_90 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_90 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_135 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_135 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_180 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_180 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_225 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_225 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_270 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_270 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_315 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_315 + ) + FROM iris.building_weather_analytics + ) + ) + SELECT DISTINCT ON (bwa.uprn) + bwa.uprn, + bwa.point, + babi.affected as affected_by_icing_days, + babh.affected as affected_by_hsds, + babw.affected as affected_by_wdr, + bea.region_name, + bea.county_name, + bea.district_name, + bea.ward_name + FROM iris.building_weather_analytics bwa + LEFT JOIN buildings_affected_by_icing_days babi ON bwa.uprn = babi.uprn + LEFT JOIN buildings_affected_by_hsds babh ON bwa.uprn = babh.uprn + LEFT JOIN buildings_affected_by_wdrp babw ON bwa.uprn = babw.uprn + LEFT JOIN iris.building_epc_analytics bea ON bwa.uprn = bea.uprn + ORDER BY bwa.uprn, bea.lodgement_date DESC NULLS LAST + ) + WITH NO DATA; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_uprn_idx + ON iris.building_extreme_weather_analytics (uprn); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_point_idx + ON iris.building_extreme_weather_analytics USING GIST (point); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_region_name_idx + ON iris.building_extreme_weather_analytics (region_name); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_county_name_idx + ON iris.building_extreme_weather_analytics (county_name); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_district_name_idx + ON iris.building_extreme_weather_analytics (district_name); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_ward_name_idx + ON iris.building_extreme_weather_analytics (ward_name); + """) + + +def downgrade() -> None: + """Revert to original building_extreme_weather_analytics view.""" + + op.execute(""" + DROP MATERIALIZED VIEW IF EXISTS iris.building_extreme_weather_analytics CASCADE; + """) + + op.execute(""" + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_extreme_weather_analytics AS ( + WITH buildings_affected_by_icing_days AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE icing_days > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY icing_days + ) + FROM iris.building_weather_analytics + ) + ), + buildings_affected_by_hsds AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE hsd_40_median > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY hsd_40_median + ) + FROM iris.building_weather_analytics + ) + ), + buildings_affected_by_wdrp AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE wdr_40_median_0 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_0 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_45 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_45 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_90 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_90 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_135 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_135 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_180 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_180 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_225 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_225 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_270 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_270 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_315 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_315 + ) + FROM iris.building_weather_analytics + ) + ) + SELECT bwa.uprn, bwa.point, babi.affected as affected_by_icing_days, + babh.affected as affected_by_hsds, babw.affected as affected_by_wdr + FROM iris.building_weather_analytics bwa + LEFT JOIN buildings_affected_by_icing_days babi ON bwa.uprn = babi.uprn + LEFT JOIN buildings_affected_by_hsds babh ON bwa.uprn = babh.uprn + LEFT JOIN buildings_affected_by_wdrp babw ON bwa.uprn = babw.uprn + ) + WITH NO DATA; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_uprn_idx + ON iris.building_extreme_weather_analytics (uprn); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_point_idx + ON iris.building_extreme_weather_analytics USING GIST (point); + """) + diff --git a/api/alembic/versions/44de9a59d873_035_recreate_building_epc_analytics.py b/api/alembic/versions/44de9a59d873_035_recreate_building_epc_analytics.py new file mode 100644 index 0000000..1458ea9 --- /dev/null +++ b/api/alembic/versions/44de9a59d873_035_recreate_building_epc_analytics.py @@ -0,0 +1,699 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""035_recreate_building_epc_analytics + +Revision ID: 44de9a59d873 +Revises: 5c66700e5c72 +Create Date: 2026-01-06 16:07:40.889098 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "44de9a59d873" +down_revision: Union[str, None] = "5c66700e5c72" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _create_indices(): + """Create all indexes on building_epc_analytics (from migrations + - 024 + - 025 + - new timeline indexes mentioned in 030 + - 033 + - 034).""" + + # From migration 024 - basic indexes + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_uprn_idx + ON iris.building_epc_analytics(uprn); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_point_idx + ON iris.building_epc_analytics USING GIST(point); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_name_idx + ON iris.building_epc_analytics(region_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_lodgement_date_idx + ON iris.building_epc_analytics(lodgement_date); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_county_name_idx + ON iris.building_epc_analytics(county_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_district_name_idx + ON iris.building_epc_analytics(district_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_ward_name_idx + ON iris.building_epc_analytics(ward_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_epc_active_idx + ON iris.building_epc_analytics(epc_active); + """ + ) + + # From migration 025 - partial indexes for active EPC charts + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_fuel_types_idx + ON iris.building_epc_analytics (type, fuel_type) + WHERE epc_active = true + AND type IS NOT NULL + AND fuel_type IS NOT NULL; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_epc_ratings_idx + ON iris.building_epc_analytics (epc_rating, region_name) + WHERE epc_active = true; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_attributes_idx + ON iris.building_epc_analytics ( + region_name, + has_roof_solar_panels, + window_glazing, + floor_construction, + roof_insulation_thickness, + roof_construction, + wall_construction + ) + WHERE epc_active = true; + """ + ) + + # New indexes for timeline charts (historical data via active_snapshots) + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_epc_rating_idx + ON iris.building_epc_analytics(epc_rating); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_type_idx + ON iris.building_epc_analytics(type); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_point_active_idx + ON iris.building_epc_analytics USING GIST(point) + WHERE epc_active = true; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_name_active_idx + ON iris.building_epc_analytics (region_name) + WHERE epc_active = true; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_county_name_active_idx + ON iris.building_epc_analytics (county_name) + WHERE epc_active = true; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_district_name_active_idx + ON iris.building_epc_analytics (district_name) + WHERE epc_active = true; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_ward_name_active_idx + ON iris.building_epc_analytics (ward_name) + WHERE epc_active = true; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_fuel_types_idx + ON iris.building_epc_analytics (region_name, type, fuel_type) + WHERE epc_active = true AND type IS NOT NULL AND fuel_type IS NOT NULL; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_county_fuel_types_idx + ON iris.building_epc_analytics (county_name, type, fuel_type) + WHERE epc_active = true AND type IS NOT NULL AND fuel_type IS NOT NULL; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_district_fuel_types_idx + ON iris.building_epc_analytics (district_name, type, fuel_type) + WHERE epc_active = true AND type IS NOT NULL AND fuel_type IS NOT NULL; + """ + ) + + +def _create_building_epc_analytics_aggregates_view(): + op.execute( + """ + CREATE MATERIALIZED VIEW iris.building_epc_analytics_aggregates AS + WITH snapshot_dates AS ( + SELECT generate_series( + DATE_TRUNC('year', (SELECT MIN(lodgement_date) FROM iris.building_epc_analytics WHERE lodgement_date IS NOT NULL))::date + interval '1 year' - interval '1 day', + DATE_TRUNC('year', CURRENT_DATE)::date + interval '1 year' - interval '1 day', + interval '1 year' + )::date as snapshot_date + ), + issued_counts AS ( + SELECT + sd.snapshot_date, + bea.region_name, + bea.county_name, + bea.district_name, + bea.ward_name, + bea.type, + COUNT(DISTINCT bea.uprn) as total_issued_count + FROM snapshot_dates sd + CROSS JOIN iris.building_epc_analytics bea + WHERE bea.lodgement_date <= sd.snapshot_date + AND bea.active_snapshots IS NOT NULL + GROUP BY sd.snapshot_date, bea.region_name, bea.county_name, bea.district_name, bea.ward_name, bea.type + ), + active_aggregates AS ( + SELECT + unnest(active_snapshots) as snapshot_date, + region_name, + county_name, + district_name, + ward_name, + type, + COUNT(*) as active_epc_count, + SUM(sap_rating) as sum_sap_rating, + COUNT(*) FILTER (WHERE epc_rating = 'A') as count_rating_a, + COUNT(*) FILTER (WHERE epc_rating = 'B') as count_rating_b, + COUNT(*) FILTER (WHERE epc_rating = 'C') as count_rating_c, + COUNT(*) FILTER (WHERE epc_rating = 'D') as count_rating_d, + COUNT(*) FILTER (WHERE epc_rating = 'E') as count_rating_e, + COUNT(*) FILTER (WHERE epc_rating = 'F') as count_rating_f, + COUNT(*) FILTER (WHERE epc_rating = 'G') as count_rating_g + FROM iris.building_epc_analytics + WHERE active_snapshots IS NOT NULL + GROUP BY snapshot_date, region_name, county_name, district_name, ward_name, type + ) + SELECT + aa.snapshot_date, + aa.region_name, + aa.county_name, + aa.district_name, + aa.ward_name, + aa.type, + aa.active_epc_count, + aa.sum_sap_rating, + aa.count_rating_a, + aa.count_rating_b, + aa.count_rating_c, + aa.count_rating_d, + aa.count_rating_e, + aa.count_rating_f, + aa.count_rating_g, + (ic.total_issued_count - aa.active_epc_count) as expired_epc_count + FROM active_aggregates aa + JOIN issued_counts ic + ON aa.snapshot_date = ic.snapshot_date + AND aa.region_name IS NOT DISTINCT FROM ic.region_name + AND aa.county_name IS NOT DISTINCT FROM ic.county_name + AND aa.district_name IS NOT DISTINCT FROM ic.district_name + AND aa.ward_name IS NOT DISTINCT FROM ic.ward_name + AND aa.type IS NOT DISTINCT FROM ic.type + WITH NO DATA; + """ + ) + + op.execute( + """ + CREATE INDEX building_epc_analytics_aggregates_snapshot_date_idx + ON iris.building_epc_analytics_aggregates(snapshot_date); + """ + ) + + op.execute( + """ + CREATE INDEX building_epc_analytics_aggregates_region_snapshot_idx + ON iris.building_epc_analytics_aggregates(region_name, snapshot_date); + """ + ) + + op.execute( + """ + CREATE INDEX building_epc_analytics_aggregates_county_snapshot_idx + ON iris.building_epc_analytics_aggregates(county_name, snapshot_date); + """ + ) + + op.execute( + """ + CREATE INDEX building_epc_analytics_aggregates_district_snapshot_idx + ON iris.building_epc_analytics_aggregates(district_name, snapshot_date); + """ + ) + + op.execute( + """ + CREATE INDEX building_epc_analytics_aggregates_ward_snapshot_idx + ON iris.building_epc_analytics_aggregates(ward_name, snapshot_date); + """ + ) + + op.execute( + """ + CREATE INDEX building_epc_analytics_aggregates_snapshot_type_idx + ON iris.building_epc_analytics_aggregates(snapshot_date, type); + """ + ) + + +def _create_building_extreme_weather_analytics_view(): + op.execute( + """ + CREATE MATERIALIZED VIEW iris.building_extreme_weather_analytics AS ( + WITH buildings_affected_by_icing_days AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE icing_days > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY icing_days + ) + FROM iris.building_weather_analytics + ) + ), + buildings_affected_by_hsds AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE hsd_40_median > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY hsd_40_median + ) + FROM iris.building_weather_analytics + ) + ), + buildings_affected_by_wdrp AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE wdr_40_median_0 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_0 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_45 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_45 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_90 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_90 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_135 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_135 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_180 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_180 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_225 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_225 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_270 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_270 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_315 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_315 + ) + FROM iris.building_weather_analytics + ) + ) + SELECT DISTINCT ON (bwa.uprn) + bwa.uprn, + bwa.point, + babi.affected as affected_by_icing_days, + babh.affected as affected_by_hsds, + babw.affected as affected_by_wdr, + bea.region_name, + bea.county_name, + bea.district_name, + bea.ward_name + FROM iris.building_weather_analytics bwa + LEFT JOIN buildings_affected_by_icing_days babi ON bwa.uprn = babi.uprn + LEFT JOIN buildings_affected_by_hsds babh ON bwa.uprn = babh.uprn + LEFT JOIN buildings_affected_by_wdrp babw ON bwa.uprn = babw.uprn + LEFT JOIN iris.building_epc_analytics bea ON bwa.uprn = bea.uprn + ORDER BY bwa.uprn, bea.lodgement_date DESC NULLS LAST + ) + WITH NO DATA; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_uprn_idx + ON iris.building_extreme_weather_analytics (uprn); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_point_idx + ON iris.building_extreme_weather_analytics USING GIST (point); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_region_name_idx + ON iris.building_extreme_weather_analytics (region_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_county_name_idx + ON iris.building_extreme_weather_analytics (county_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_district_name_idx + ON iris.building_extreme_weather_analytics (district_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_ward_name_idx + ON iris.building_extreme_weather_analytics (ward_name); + """ + ) + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics_aggregates; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_extreme_weather_analytics; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_epc_analytics + AS ( + WITH active_epcs AS ( + SELECT DISTINCT ON (epc_assessment.uprn) epc_assessment.id, + epc_assessment.uprn, + epc_assessment.epc_rating, + epc_assessment.lodgement_date, + epc_assessment.sap_rating, + epc_assessment.expiry_date + FROM iris.epc_assessment + WHERE epc_assessment.lodgement_date IS NOT NULL AND epc_assessment.expiry_date >= CURRENT_DATE + ORDER BY epc_assessment.uprn, epc_assessment.lodgement_date DESC + ), year_end_dates AS ( + SELECT generate_series(date_trunc('year'::text, (( SELECT min(epc_assessment.lodgement_date) AS min + FROM iris.epc_assessment + WHERE epc_assessment.lodgement_date IS NOT NULL))::timestamp with time zone)::date + '1 year'::interval - '1 day'::interval, date_trunc('year'::text, CURRENT_DATE::timestamp with time zone)::date + '1 year'::interval - '1 day'::interval, '1 year'::interval)::date AS snapshot_date + ), snapshot_lookup AS ( + WITH epc_snapshots AS ( + SELECT ea_1.uprn, + ea_1.lodgement_date, + yed.snapshot_date, + row_number() OVER ( + PARTITION BY ea_1.uprn, yed.snapshot_date ORDER BY ea_1.lodgement_date DESC + ) AS rn + FROM iris.epc_assessment ea_1 + CROSS JOIN year_end_dates yed + WHERE ea_1.lodgement_date <= yed.snapshot_date AND ea_1.expiry_date >= yed.snapshot_date AND ea_1.lodgement_date IS NOT NULL AND ea_1.expiry_date IS NOT NULL + ) + SELECT epc_snapshots.uprn, + epc_snapshots.lodgement_date, + array_agg(epc_snapshots.snapshot_date ORDER BY epc_snapshots.snapshot_date) AS active_snapshots + FROM epc_snapshots + WHERE epc_snapshots.rn = 1 + GROUP BY epc_snapshots.uprn, epc_snapshots.lodgement_date + ) + SELECT b.uprn, + b.point, + b.is_residential, + ea.lodgement_date, + ea.epc_rating, + ea.sap_rating, + ea.expiry_date, + COALESCE(su_epc.type, su_build.type) AS type, + COALESCE(su_epc.built_form, su_build.built_form) AS built_form, + COALESCE(su_epc.fuel_type, su_build.fuel_type) AS fuel_type, + COALESCE(su_epc.window_glazing, su_build.window_glazing) AS window_glazing, + COALESCE(su_epc.wall_construction, su_build.wall_construction) AS wall_construction, + COALESCE(su_epc.wall_insulation, su_build.wall_insulation) AS wall_insulation, + COALESCE(su_epc.roof_construction, su_build.roof_construction) AS roof_construction, + COALESCE(su_epc.roof_insulation, su_build.roof_insulation) AS roof_insulation, + COALESCE(su_epc.roof_insulation_thickness, su_build.roof_insulation_thickness) AS roof_insulation_thickness, + COALESCE(su_epc.floor_construction, su_build.floor_construction) AS floor_construction, + COALESCE(su_epc.floor_insulation, su_build.floor_insulation) AS floor_insulation, + COALESCE(su_epc.has_roof_solar_panels, su_build.has_roof_solar_panels) AS has_roof_solar_panels, + COALESCE(su_epc.roof_material, su_build.roof_material) AS roof_material, + COALESCE(su_epc.roof_aspect_area_facing_north_m2, su_build.roof_aspect_area_facing_north_m2) AS roof_aspect_area_facing_north_m2, + COALESCE(su_epc.roof_aspect_area_facing_east_m2, su_build.roof_aspect_area_facing_east_m2) AS roof_aspect_area_facing_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_m2, su_build.roof_aspect_area_facing_south_m2) AS roof_aspect_area_facing_south_m2, + COALESCE(su_epc.roof_aspect_area_facing_west_m2, su_build.roof_aspect_area_facing_west_m2) AS roof_aspect_area_facing_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_east_m2, su_build.roof_aspect_area_facing_north_east_m2) AS roof_aspect_area_facing_north_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_east_m2, su_build.roof_aspect_area_facing_south_east_m2) AS roof_aspect_area_facing_south_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_west_m2, su_build.roof_aspect_area_facing_south_west_m2) AS roof_aspect_area_facing_south_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_west_m2, su_build.roof_aspect_area_facing_north_west_m2) AS roof_aspect_area_facing_north_west_m2, + COALESCE(su_epc.roof_aspect_area_indeterminable_m2, su_build.roof_aspect_area_indeterminable_m2) AS roof_aspect_area_indeterminable_m2, + COALESCE(su_epc.roof_shape, su_build.roof_shape) AS roof_shape, + COALESCE(er.name, sawr.name) AS region_name, + blcc.name AS county_name, + dbu.name AS district_name, + COALESCE(dbuw.name, ued.name) AS ward_name, + CASE + WHEN aes.id IS NOT NULL THEN true + ELSE false + END AS epc_active, + sl.active_snapshots + FROM iris.building b + LEFT JOIN iris.epc_assessment ea ON ea.uprn = b.uprn + LEFT JOIN active_epcs aes ON ea.id = aes.id + LEFT JOIN snapshot_lookup sl ON sl.uprn = b.uprn AND sl.lodgement_date = ea.lodgement_date + LEFT JOIN iris.structure_unit su_epc ON su_epc.epc_assessment_id = ea.id + LEFT JOIN iris.structure_unit su_build ON su_build.uprn = b.uprn AND su_build.epc_assessment_id IS NULL AND ea.id IS NULL + JOIN iris.boundary_line_ceremonial_counties blcc ON st_contains(blcc.geometry, b.point) + JOIN iris.district_borough_unitary dbu ON st_contains(dbu.geometry, b.point) + LEFT JOIN iris.district_borough_unitary_ward dbuw ON st_contains(dbuw.geometry, b.point) + LEFT JOIN iris.unitary_electoral_division ued ON st_contains(ued.geometry, b.point) + LEFT JOIN iris.english_region er ON er.fid = dbu.english_region_fid + LEFT JOIN iris.scotland_and_wales_region sawr ON sawr.fid = dbu.scotland_and_wales_region_fid + WHERE su_epc.epc_assessment_id IS NOT NULL OR su_build.uprn IS NOT NULL + ) WITH NO DATA; + """ + ) + + _create_indices() + + _create_building_epc_analytics_aggregates_view() + + _create_building_extreme_weather_analytics_view() + + +def downgrade() -> None: + """Downgrade schema.""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics_aggregates; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_extreme_weather_analytics; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_epc_analytics + AS ( + WITH active_epcs AS ( + SELECT DISTINCT ON (epc_assessment.uprn) epc_assessment.id, + epc_assessment.uprn, + epc_assessment.epc_rating, + epc_assessment.lodgement_date, + epc_assessment.sap_rating, + epc_assessment.expiry_date + FROM iris.epc_assessment + WHERE epc_assessment.lodgement_date IS NOT NULL AND epc_assessment.expiry_date >= CURRENT_DATE + ORDER BY epc_assessment.uprn, epc_assessment.lodgement_date DESC + ), year_end_dates AS ( + SELECT generate_series(date_trunc('year'::text, (( SELECT min(epc_assessment.lodgement_date) AS min + FROM iris.epc_assessment + WHERE epc_assessment.lodgement_date IS NOT NULL))::timestamp with time zone)::date + '1 year'::interval - '1 day'::interval, date_trunc('year'::text, CURRENT_DATE::timestamp with time zone)::date + '1 year'::interval - '1 day'::interval, '1 year'::interval)::date AS snapshot_date + ), snapshot_lookup AS ( + WITH epc_snapshots AS ( + SELECT ea_1.uprn, + ea_1.lodgement_date, + yed.snapshot_date, + row_number() OVER ( + PARTITION BY ea_1.uprn, yed.snapshot_date ORDER BY ea_1.lodgement_date DESC + ) AS rn + FROM iris.epc_assessment ea_1 + CROSS JOIN year_end_dates yed + WHERE ea_1.lodgement_date <= yed.snapshot_date AND ea_1.expiry_date >= yed.snapshot_date AND ea_1.lodgement_date IS NOT NULL AND ea_1.expiry_date IS NOT NULL + ) + SELECT epc_snapshots.uprn, + epc_snapshots.lodgement_date, + array_agg(epc_snapshots.snapshot_date ORDER BY epc_snapshots.snapshot_date) AS active_snapshots + FROM epc_snapshots + WHERE epc_snapshots.rn = 1 + GROUP BY epc_snapshots.uprn, epc_snapshots.lodgement_date + ) + SELECT b.uprn, + b.point, + b.is_residential, + ea.lodgement_date, + ea.epc_rating, + ea.sap_rating, + ea.expiry_date, + COALESCE(su_epc.type, su_build.type) AS type, + COALESCE(su_epc.built_form, su_build.built_form) AS built_form, + COALESCE(su_epc.fuel_type, su_build.fuel_type) AS fuel_type, + COALESCE(su_epc.window_glazing, su_build.window_glazing) AS window_glazing, + COALESCE(su_epc.wall_construction, su_build.wall_construction) AS wall_construction, + COALESCE(su_epc.wall_insulation, su_build.wall_insulation) AS wall_insulation, + COALESCE(su_epc.roof_construction, su_build.roof_construction) AS roof_construction, + COALESCE(su_epc.roof_insulation, su_build.roof_insulation) AS roof_insulation, + COALESCE(su_epc.roof_insulation_thickness, su_build.roof_insulation_thickness) AS roof_insulation_thickness, + COALESCE(su_epc.floor_construction, su_build.floor_construction) AS floor_construction, + COALESCE(su_epc.floor_insulation, su_build.floor_insulation) AS floor_insulation, + COALESCE(su_epc.has_roof_solar_panels, su_build.has_roof_solar_panels) AS has_roof_solar_panels, + COALESCE(su_epc.roof_material, su_build.roof_material) AS roof_material, + COALESCE(su_epc.roof_aspect_area_facing_north_m2, su_build.roof_aspect_area_facing_north_m2) AS roof_aspect_area_facing_north_m2, + COALESCE(su_epc.roof_aspect_area_facing_east_m2, su_build.roof_aspect_area_facing_east_m2) AS roof_aspect_area_facing_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_m2, su_build.roof_aspect_area_facing_south_m2) AS roof_aspect_area_facing_south_m2, + COALESCE(su_epc.roof_aspect_area_facing_west_m2, su_build.roof_aspect_area_facing_west_m2) AS roof_aspect_area_facing_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_east_m2, su_build.roof_aspect_area_facing_north_east_m2) AS roof_aspect_area_facing_north_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_east_m2, su_build.roof_aspect_area_facing_south_east_m2) AS roof_aspect_area_facing_south_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_west_m2, su_build.roof_aspect_area_facing_south_west_m2) AS roof_aspect_area_facing_south_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_west_m2, su_build.roof_aspect_area_facing_north_west_m2) AS roof_aspect_area_facing_north_west_m2, + COALESCE(su_epc.roof_aspect_area_indeterminable_m2, su_build.roof_aspect_area_indeterminable_m2) AS roof_aspect_area_indeterminable_m2, + COALESCE(su_epc.roof_shape, su_build.roof_shape) AS roof_shape, + COALESCE(er.name, sawr.name) AS region_name, + blcc.name AS county_name, + dbu.name AS district_name, + COALESCE(dbuw.name, ued.name) AS ward_name, + CASE + WHEN aes.id IS NOT NULL THEN true + ELSE false + END AS epc_active, + sl.active_snapshots + FROM iris.building b + LEFT JOIN iris.epc_assessment ea ON ea.uprn = b.uprn + LEFT JOIN active_epcs aes ON ea.id = aes.id + LEFT JOIN snapshot_lookup sl ON sl.uprn = b.uprn AND sl.lodgement_date = ea.lodgement_date + LEFT JOIN iris.structure_unit su_epc ON su_epc.epc_assessment_id = ea.id + LEFT JOIN iris.structure_unit su_build ON su_build.uprn = b.uprn AND su_build.epc_assessment_id IS NULL AND ea.id IS NULL + JOIN iris.boundary_line_ceremonial_counties blcc ON st_intersects(blcc.geometry, b.point) + JOIN iris.district_borough_unitary dbu ON st_intersects(dbu.geometry, b.point) + LEFT JOIN iris.district_borough_unitary_ward dbuw ON st_intersects(dbuw.geometry, b.point) + LEFT JOIN iris.unitary_electoral_division ued ON st_intersects(ued.geometry, b.point) + LEFT JOIN iris.english_region er ON er.fid = dbu.english_region_fid + LEFT JOIN iris.scotland_and_wales_region sawr ON sawr.fid = dbu.scotland_and_wales_region_fid + WHERE su_epc.epc_assessment_id IS NOT NULL OR su_build.uprn IS NOT NULL + ) WITH NO DATA; + """ + ) + + _create_indices() + + _create_building_epc_analytics_aggregates_view() + + _create_building_extreme_weather_analytics_view() diff --git a/api/alembic/versions/47ed09fc85cc_022_update_regions_view_remove_visual_.py b/api/alembic/versions/47ed09fc85cc_022_update_regions_view_remove_visual_.py new file mode 100644 index 0000000..243574e --- /dev/null +++ b/api/alembic/versions/47ed09fc85cc_022_update_regions_view_remove_visual_.py @@ -0,0 +1,176 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""022_update_regions_view_remove_visual_buffer + +Revision ID: 47ed09fc85cc +Revises: e72a7ae5be78 +Create Date: 2025-10-24 09:55:13.319383 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '47ed09fc85cc' +down_revision: Union[str, None] = 'e72a7ae5be78' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + """ Recreate materialised views for regions""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc_data; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc_data + AS + WITH regions_buffered AS( + SELECT + name, + geom_simplified as geometry, + ST_Transform( -- convert back to WGS84 + ST_Buffer( -- buffer in metres (because 27700 uses metres) + ST_Transform(geom_simplified, 27700), + 5000 -- buffer distance in metres + ), + 4326 -- back to WGS84 + ) AS geometry_buff_5k + FROM iris.english_region + UNION ALL + SELECT + name, + geometry, + ST_Transform( + ST_Buffer( + ST_Transform(geometry, 27700), + 5000 + ), + 4326 + ) AS geometry_buff_5k + FROM iris.country_region + WHERE name = 'Wales') + SELECT + r.name AS name, + SUM(e.total) AS total, + SUM(e.epc_a) AS epc_a, + SUM(e.epc_b) AS epc_b, + SUM(e.epc_c) AS epc_c, + SUM(e.epc_d) AS epc_d, + SUM(e.epc_e) AS epc_e, + SUM(e.epc_f) AS epc_f, + SUM(e.epc_g) AS epc_g, + SUM(e.epc_null) AS epc_null, + r.geometry + FROM regions_buffered r + JOIN iris.district_borough_unitary_epc_data e + ON ST_Contains(r.geometry_buff_5k, ST_Simplify(e.geometry, 0.0001)) + GROUP BY r.name, r.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.uk_region_epc_data t + WITH NO DATA; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + """ Recreate materialised views for regions""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc_data; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc_data + AS + WITH regions_buffered AS( + SELECT + name, + ST_Transform( -- convert back to WGS84 + ST_Buffer( -- buffer in metres (because 27700 uses metres) + ST_Transform(geom_simplified, 27700), + 5000 -- buffer distance in metres + ), + 4326 -- back to WGS84 + ) AS geometry + FROM iris.english_region + UNION ALL + SELECT + name, + ST_Transform( + ST_Buffer( + ST_Transform(geometry, 27700), + 5000 + ), + 4326 + ) AS geometry + FROM iris.country_region + WHERE name = 'Wales') + SELECT + r.name AS name, + SUM(e.total) AS total, + SUM(e.epc_a) AS epc_a, + SUM(e.epc_b) AS epc_b, + SUM(e.epc_c) AS epc_c, + SUM(e.epc_d) AS epc_d, + SUM(e.epc_e) AS epc_e, + SUM(e.epc_f) AS epc_f, + SUM(e.epc_g) AS epc_g, + SUM(e.epc_null) AS epc_null, + r.geometry + FROM regions_buffered r + JOIN iris.district_borough_unitary_epc_data e + ON ST_Contains(r.geometry, ST_Simplify(e.geometry, 0.0001)) + GROUP BY r.name, r.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.uk_region_epc_data t + WITH NO DATA; + """ + ) diff --git a/api/alembic/versions/5c66700e5c72_034_epc_analytics_fuel_indexes.py b/api/alembic/versions/5c66700e5c72_034_epc_analytics_fuel_indexes.py new file mode 100644 index 0000000..c914511 --- /dev/null +++ b/api/alembic/versions/5c66700e5c72_034_epc_analytics_fuel_indexes.py @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""034_epc_analytics_fuel_indexes + +Revision ID: 5c66700e5c72 +Revises: be69024c1b9c +Create Date: 2025-11-18 12:45:58.731082 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = '5c66700e5c72' +down_revision: Union[str, None] = 'be69024c1b9c' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_fuel_types_idx + ON iris.building_epc_analytics (region_name, type, fuel_type) + WHERE epc_active = true AND type IS NOT NULL AND fuel_type IS NOT NULL; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_county_fuel_types_idx + ON iris.building_epc_analytics (county_name, type, fuel_type) + WHERE epc_active = true AND type IS NOT NULL AND fuel_type IS NOT NULL; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_district_fuel_types_idx + ON iris.building_epc_analytics (district_name, type, fuel_type) + WHERE epc_active = true AND type IS NOT NULL AND fuel_type IS NOT NULL; + """) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_district_fuel_types_idx;") + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_county_fuel_types_idx;") + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_region_fuel_types_idx;") diff --git a/api/alembic/versions/5d3b574055a5_030_recreate_building_epc_analytics_.py b/api/alembic/versions/5d3b574055a5_030_recreate_building_epc_analytics_.py new file mode 100644 index 0000000..7533a9c --- /dev/null +++ b/api/alembic/versions/5d3b574055a5_030_recreate_building_epc_analytics_.py @@ -0,0 +1,368 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""030_recreate_building_epc_analytics_with_active_snapshot_dates + +Revision ID: 5d3b574055a5 +Revises: 9e07a98054cc +Create Date: 2025-11-06 12:32:51.655070 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = '5d3b574055a5' +down_revision: Union[str, None] = '9e07a98054cc' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _create_indices(): + """Create all indexes on building_epc_analytics (from migrations 024, 025, and new timeline indexes).""" + + # From migration 024 - basic indexes + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_uprn_idx + ON iris.building_epc_analytics(uprn); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_point_idx + ON iris.building_epc_analytics USING GIST(point); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_name_idx + ON iris.building_epc_analytics(region_name); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_lodgement_date_idx + ON iris.building_epc_analytics(lodgement_date); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_county_name_idx + ON iris.building_epc_analytics(county_name); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_district_name_idx + ON iris.building_epc_analytics(district_name); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_ward_name_idx + ON iris.building_epc_analytics(ward_name); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_epc_active_idx + ON iris.building_epc_analytics(epc_active); + """) + + # From migration 025 - partial indexes for active EPC charts + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_fuel_types_idx + ON iris.building_epc_analytics (type, fuel_type) + WHERE epc_active = true + AND type IS NOT NULL + AND fuel_type IS NOT NULL; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_epc_ratings_idx + ON iris.building_epc_analytics (epc_rating, region_name) + WHERE epc_active = true; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_attributes_idx + ON iris.building_epc_analytics ( + region_name, + has_roof_solar_panels, + window_glazing, + floor_construction, + roof_insulation_thickness, + roof_construction, + wall_construction + ) + WHERE epc_active = true; + """) + + # New indexes for timeline charts (historical data via active_snapshots) + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_epc_rating_idx + ON iris.building_epc_analytics(epc_rating); + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_type_idx + ON iris.building_epc_analytics(type); + """) + + +def upgrade() -> None: + """Recreate building_epc_analytics with active_snapshots column.""" + + # Drop existing materialized view + op.execute(""" + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics CASCADE; + """) + + # Recreate with active_snapshots column + op.execute(""" + CREATE MATERIALIZED VIEW iris.building_epc_analytics AS ( + WITH active_epcs AS ( + -- Find the most recent valid EPC per UPRN (for epc_active flag) + SELECT DISTINCT ON (uprn) * + FROM iris.epc_assessment + WHERE lodgement_date IS NOT NULL AND expiry_date >= CURRENT_DATE + ORDER BY uprn, lodgement_date DESC + ), + year_end_dates AS ( + -- Generate all year-end dates from earliest to current year + SELECT generate_series( + DATE_TRUNC('year', (SELECT MIN(lodgement_date) FROM iris.epc_assessment WHERE lodgement_date IS NOT NULL))::date + interval '1 year' - interval '1 day', + DATE_TRUNC('year', CURRENT_DATE)::date + interval '1 year' - interval '1 day', + interval '1 year' + )::date as snapshot_date + ), + snapshot_lookup AS ( + -- For each EPC, find year-end dates where it was the active EPC (most recent valid one) + WITH epc_snapshots AS ( + SELECT + ea.uprn, + ea.lodgement_date, + yed.snapshot_date, + ROW_NUMBER() OVER ( + PARTITION BY ea.uprn, yed.snapshot_date + ORDER BY ea.lodgement_date DESC + ) as rn + FROM iris.epc_assessment ea + CROSS JOIN year_end_dates yed + WHERE ea.lodgement_date <= yed.snapshot_date + AND ea.expiry_date >= yed.snapshot_date + AND ea.lodgement_date IS NOT NULL + AND ea.expiry_date IS NOT NULL + ) + SELECT + uprn, + lodgement_date, + ARRAY_AGG(snapshot_date ORDER BY snapshot_date) as active_snapshots + FROM epc_snapshots + WHERE rn = 1 -- Only the most recent EPC at each snapshot + GROUP BY uprn, lodgement_date + ) + SELECT + b.uprn, + b.point, + b.is_residential, + ea.lodgement_date, + ea.epc_rating, + ea.sap_rating, + ea.expiry_date, + COALESCE(su_epc.type, su_build.type) AS type, + COALESCE(su_epc.built_form, su_build.built_form) AS built_form, + COALESCE(su_epc.fuel_type, su_build.fuel_type) AS fuel_type, + COALESCE(su_epc.window_glazing, su_build.window_glazing) AS window_glazing, + COALESCE(su_epc.wall_construction, su_build.wall_construction) AS wall_construction, + COALESCE(su_epc.wall_insulation, su_build.wall_insulation) AS wall_insulation, + COALESCE(su_epc.roof_construction, su_build.roof_construction) AS roof_construction, + COALESCE(su_epc.roof_insulation, su_build.roof_insulation) AS roof_insulation, + COALESCE(su_epc.roof_insulation_thickness, su_build.roof_insulation_thickness) AS roof_insulation_thickness, + COALESCE(su_epc.floor_construction, su_build.floor_construction) AS floor_construction, + COALESCE(su_epc.floor_insulation, su_build.floor_insulation) AS floor_insulation, + COALESCE(su_epc.has_roof_solar_panels, su_build.has_roof_solar_panels) AS has_roof_solar_panels, + COALESCE(su_epc.roof_material, su_build.roof_material) AS roof_material, + COALESCE(su_epc.roof_aspect_area_facing_north_m2, su_build.roof_aspect_area_facing_north_m2) AS roof_aspect_area_facing_north_m2, + COALESCE(su_epc.roof_aspect_area_facing_east_m2, su_build.roof_aspect_area_facing_east_m2) AS roof_aspect_area_facing_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_m2, su_build.roof_aspect_area_facing_south_m2) AS roof_aspect_area_facing_south_m2, + COALESCE(su_epc.roof_aspect_area_facing_west_m2, su_build.roof_aspect_area_facing_west_m2) AS roof_aspect_area_facing_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_east_m2, su_build.roof_aspect_area_facing_north_east_m2) AS roof_aspect_area_facing_north_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_east_m2, su_build.roof_aspect_area_facing_south_east_m2) AS roof_aspect_area_facing_south_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_west_m2, su_build.roof_aspect_area_facing_south_west_m2) AS roof_aspect_area_facing_south_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_west_m2, su_build.roof_aspect_area_facing_north_west_m2) AS roof_aspect_area_facing_north_west_m2, + COALESCE(su_epc.roof_aspect_area_indeterminable_m2, su_build.roof_aspect_area_indeterminable_m2) AS roof_aspect_area_indeterminable_m2, + COALESCE(su_epc.roof_shape, su_build.roof_shape) AS roof_shape, + COALESCE(er.name, sawr.name) AS region_name, + blcc.name AS county_name, + dbu.name AS district_name, + COALESCE(dbuw.name, ued.name) AS ward_name, + CASE + WHEN aes.id IS NOT NULL THEN true + ELSE false + END AS epc_active, + sl.active_snapshots + FROM iris.building b + LEFT JOIN iris.epc_assessment ea ON ea.uprn = b.uprn + LEFT JOIN active_epcs aes ON ea.id = aes.id + LEFT JOIN snapshot_lookup sl ON sl.uprn = b.uprn AND sl.lodgement_date = ea.lodgement_date + LEFT JOIN iris.structure_unit su_epc ON su_epc.epc_assessment_id = ea.id + LEFT JOIN iris.structure_unit su_build + ON su_build.uprn = b.uprn + AND su_build.epc_assessment_id IS NULL + AND ea.id IS NULL + JOIN iris.boundary_line_ceremonial_counties blcc ON ST_INTERSECTS(blcc.geometry, b.point) + JOIN iris.district_borough_unitary dbu ON ST_INTERSECTS(dbu.geometry, b.point) + LEFT JOIN iris.district_borough_unitary_ward dbuw ON ST_INTERSECTS(dbuw.geometry, b.point) + LEFT JOIN iris.unitary_electoral_division ued ON ST_INTERSECTS(ued.geometry, b.point) + LEFT JOIN iris.english_region er ON er.fid = dbu.english_region_fid + LEFT JOIN iris.scotland_and_wales_region sawr ON sawr.fid = dbu.scotland_and_wales_region_fid + WHERE su_epc.epc_assessment_id IS NOT NULL OR su_build.uprn IS NOT NULL + ) + WITH NO DATA; + """) + + # Create all indexes + _create_indices() + + # Drop building_epc_active_by_year as it's no longer needed + op.execute(""" + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_active_by_year CASCADE; + """) + + +def downgrade() -> None: + """Revert to previous building_epc_analytics definition without active_snapshots.""" + + # Drop current view + op.execute(""" + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics CASCADE; + """) + + # Recreate previous definition (from migration 024, without active_snapshots) + op.execute(""" + CREATE MATERIALIZED VIEW iris.building_epc_analytics AS ( + WITH active_epcs AS ( + SELECT DISTINCT ON (uprn) * + FROM iris.epc_assessment + WHERE lodgement_date IS NOT NULL AND expiry_date >= CURRENT_DATE + ORDER BY uprn, lodgement_date DESC + ) + SELECT + b.uprn, + b.point, + b.is_residential, + ea.lodgement_date, + ea.epc_rating, + ea.sap_rating, + ea.expiry_date, + COALESCE(su_epc.type, su_build.type) AS type, + COALESCE(su_epc.built_form, su_build.built_form) AS built_form, + COALESCE(su_epc.fuel_type, su_build.fuel_type) AS fuel_type, + COALESCE(su_epc.window_glazing, su_build.window_glazing) AS window_glazing, + COALESCE(su_epc.wall_construction, su_build.wall_construction) AS wall_construction, + COALESCE(su_epc.wall_insulation, su_build.wall_insulation) AS wall_insulation, + COALESCE(su_epc.roof_construction, su_build.roof_construction) AS roof_construction, + COALESCE(su_epc.roof_insulation, su_build.roof_insulation) AS roof_insulation, + COALESCE(su_epc.roof_insulation_thickness, su_build.roof_insulation_thickness) AS roof_insulation_thickness, + COALESCE(su_epc.floor_construction, su_build.floor_construction) AS floor_construction, + COALESCE(su_epc.floor_insulation, su_build.floor_insulation) AS floor_insulation, + COALESCE(su_epc.has_roof_solar_panels, su_build.has_roof_solar_panels) AS has_roof_solar_panels, + COALESCE(su_epc.roof_material, su_build.roof_material) AS roof_material, + COALESCE(su_epc.roof_aspect_area_facing_north_m2, su_build.roof_aspect_area_facing_north_m2) AS roof_aspect_area_facing_north_m2, + COALESCE(su_epc.roof_aspect_area_facing_east_m2, su_build.roof_aspect_area_facing_east_m2) AS roof_aspect_area_facing_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_m2, su_build.roof_aspect_area_facing_south_m2) AS roof_aspect_area_facing_south_m2, + COALESCE(su_epc.roof_aspect_area_facing_west_m2, su_build.roof_aspect_area_facing_west_m2) AS roof_aspect_area_facing_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_east_m2, su_build.roof_aspect_area_facing_north_east_m2) AS roof_aspect_area_facing_north_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_east_m2, su_build.roof_aspect_area_facing_south_east_m2) AS roof_aspect_area_facing_south_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_west_m2, su_build.roof_aspect_area_facing_south_west_m2) AS roof_aspect_area_facing_south_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_west_m2, su_build.roof_aspect_area_facing_north_west_m2) AS roof_aspect_area_facing_north_west_m2, + COALESCE(su_epc.roof_aspect_area_indeterminable_m2, su_build.roof_aspect_area_indeterminable_m2) AS roof_aspect_area_indeterminable_m2, + COALESCE(su_epc.roof_shape, su_build.roof_shape) AS roof_shape, + COALESCE(er.name, sawr.name) AS region_name, + blcc.name AS county_name, + dbu.name AS district_name, + COALESCE(dbuw.name, ued.name) AS ward_name, + CASE + WHEN aes.id IS NOT NULL THEN true + ELSE false + END AS epc_active + FROM iris.building b + LEFT JOIN iris.epc_assessment ea ON ea.uprn = b.uprn + LEFT JOIN active_epcs aes ON ea.id = aes.id + LEFT JOIN iris.structure_unit su_epc ON su_epc.epc_assessment_id = ea.id + LEFT JOIN iris.structure_unit su_build + ON su_build.uprn = b.uprn + AND su_build.epc_assessment_id IS NULL + AND ea.id IS NULL + JOIN iris.boundary_line_ceremonial_counties blcc ON ST_INTERSECTS(blcc.geometry, b.point) + JOIN iris.district_borough_unitary dbu ON ST_INTERSECTS(dbu.geometry, b.point) + LEFT JOIN iris.district_borough_unitary_ward dbuw ON ST_INTERSECTS(dbuw.geometry, b.point) + LEFT JOIN iris.unitary_electoral_division ued ON ST_INTERSECTS(ued.geometry, b.point) + LEFT JOIN iris.english_region er ON er.fid = dbu.english_region_fid + LEFT JOIN iris.scotland_and_wales_region sawr ON sawr.fid = dbu.scotland_and_wales_region_fid + WHERE su_epc.epc_assessment_id IS NOT NULL OR su_build.uprn IS NOT NULL + ) + WITH NO DATA; + """) + + # Recreate indexes + _create_indices() + + # Recreate building_epc_active_by_year + op.execute(""" + CREATE MATERIALIZED VIEW iris.building_epc_active_by_year AS + WITH RECURSIVE date_range AS ( + SELECT + (DATE_TRUNC('year', MIN(lodgement_date))::date + interval '1 year' - interval '1 day')::date as snapshot_date, + (DATE_TRUNC('year', CURRENT_DATE)::date + interval '1 year' - interval '1 day')::date as max_date + FROM iris.building_epc_analytics + WHERE lodgement_date IS NOT NULL + + UNION ALL + + SELECT + (snapshot_date + interval '1 year')::date, + max_date + FROM date_range + WHERE snapshot_date < max_date + ), + snapshots AS ( + SELECT snapshot_date FROM date_range WHERE snapshot_date IS NOT NULL + ), + snapshot_per_year AS ( + SELECT + s.snapshot_date, + b.uprn, + b.lodgement_date, + ROW_NUMBER() OVER ( + PARTITION BY s.snapshot_date, b.uprn + ORDER BY b.lodgement_date DESC + ) as rn + FROM snapshots s + INNER JOIN iris.building_epc_analytics b + ON b.lodgement_date <= s.snapshot_date + AND b.expiry_date >= s.snapshot_date + WHERE b.lodgement_date IS NOT NULL + AND b.expiry_date IS NOT NULL + AND b.sap_rating IS NOT NULL + ) + SELECT + snapshot_date, + uprn, + lodgement_date + FROM snapshot_per_year + WHERE rn = 1 + WITH NO DATA; + """) + + op.execute(""" + CREATE INDEX building_epc_active_by_year_snapshot_date_idx + ON iris.building_epc_active_by_year(snapshot_date); + """) + + op.execute(""" + CREATE INDEX building_epc_active_by_year_epc_ref_idx + ON iris.building_epc_active_by_year(uprn, lodgement_date); + """) + + op.execute(""" + CREATE INDEX building_epc_analytics_uprn_lodgement_idx + ON iris.building_epc_analytics(uprn, lodgement_date); + """) diff --git a/api/alembic/versions/67aaf92accd2_026_update_weather_analytics_view.py b/api/alembic/versions/67aaf92accd2_026_update_weather_analytics_view.py new file mode 100644 index 0000000..4343ceb --- /dev/null +++ b/api/alembic/versions/67aaf92accd2_026_update_weather_analytics_view.py @@ -0,0 +1,203 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""026_update_weather_analytics_view + +Revision ID: 67aaf92accd2 +Revises: 28c37c30e0e4 +Create Date: 2025-10-31 23:34:09.454308 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "67aaf92accd2" +down_revision: Union[str, None] = "28c37c30e0e4" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _create_indices(): + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_weather_analytics_uprn_idx ON iris.building_weather_analytics(uprn); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_weather_analytics_point_idx ON iris.building_weather_analytics USING GIST(point); + """ + ) + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_weather_analytics; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_weather_analytics + AS ( + WITH wdrp_0 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 0 + ), + wdrp_45 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 45 + ), + wdrp_90 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 90 + ), + wdrp_135 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 135 + ), + wdrp_180 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 180 + ), + wdrp_225 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 225 + ), + wdrp_270 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 270 + ), + wdrp_315 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 315 + ) + SELECT + b.uprn, + b.point, + acoid.icingdays as icing_days, + wdrp_0.wdr_40_median as wdr_40_median_0, + wdrp_45.wdr_40_median as wdr_40_median_45, + wdrp_90.wdr_40_median as wdr_40_median_90, + wdrp_135.wdr_40_median as wdr_40_median_135, + wdrp_180.wdr_40_median as wdr_40_median_180, + wdrp_225.wdr_40_median as wdr_40_median_225, + wdrp_270.wdr_40_median as wdr_40_median_270, + wdrp_315.wdr_40_median as wdr_40_median_315, + acohdp.hsd_40_median + FROM iris.building b + JOIN iris.annual_count_of_icing_days_1991_2020 acoid ON ST_CONTAINS(acoid.shape, b.point) + JOIN wdrp_0 ON ST_CONTAINS(wdrp_0.shape, b.point) + JOIN wdrp_45 ON ST_CONTAINS(wdrp_45.shape, b.point) + JOIN wdrp_90 ON ST_CONTAINS(wdrp_90.shape, b.point) + JOIN wdrp_135 ON ST_CONTAINS(wdrp_135.shape, b.point) + JOIN wdrp_180 ON ST_CONTAINS(wdrp_180.shape, b.point) + JOIN wdrp_225 ON ST_CONTAINS(wdrp_225.shape, b.point) + JOIN wdrp_270 ON ST_CONTAINS(wdrp_270.shape, b.point) + JOIN wdrp_315 ON ST_CONTAINS(wdrp_315.shape, b.point) + JOIN iris.annual_count_of_hot_summer_days_projections_12km acohdp ON ST_CONTAINS(acohdp.shape, b.point) + WHERE b.is_residential = true + ) + WITH NO DATA; + """ + ) + + _create_indices() + + +def downgrade() -> None: + """Downgrade schema.""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_weather_analytics; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_weather_analytics + AS ( + WITH wdrp_0 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 0 + ), + wdrp_45 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 45 + ), + wdrp_90 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 90 + ), + wdrp_135 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 135 + ), + wdrp_180 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 180 + ), + wdrp_225 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 225 + ), + wdrp_270 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 270 + ), + wdrp_315 AS ( + SELECT wdr_40_median, shape + FROM iris.wind_driven_rain_projections + WHERE wall_orientation = 315 + ) + SELECT + b.uprn, + b.point, + acoid.icingdays as icing_days, + wdrp_0.wdr_40_median as wdr_40_median_0, + wdrp_45.wdr_40_median as wdr_40_median_45, + wdrp_90.wdr_40_median as wdr_40_median_90, + wdrp_135.wdr_40_median as wdr_40_median_135, + wdrp_180.wdr_40_median as wdr_40_median_180, + wdrp_225.wdr_40_median as wdr_40_median_225, + wdrp_270.wdr_40_median as wdr_40_median_270, + wdrp_315.wdr_40_median as wdr_40_median_315, + acohdp.hsd_40_median + FROM iris.building b + JOIN iris.annual_count_of_icing_days_1991_2020 acoid ON ST_CONTAINS(acoid.shape, b.point) + JOIN wdrp_0 ON ST_CONTAINS(wdrp_0.shape, b.point) + JOIN wdrp_45 ON ST_CONTAINS(wdrp_45.shape, b.point) + JOIN wdrp_90 ON ST_CONTAINS(wdrp_90.shape, b.point) + JOIN wdrp_135 ON ST_CONTAINS(wdrp_135.shape, b.point) + JOIN wdrp_180 ON ST_CONTAINS(wdrp_180.shape, b.point) + JOIN wdrp_225 ON ST_CONTAINS(wdrp_225.shape, b.point) + JOIN wdrp_270 ON ST_CONTAINS(wdrp_270.shape, b.point) + JOIN wdrp_315 ON ST_CONTAINS(wdrp_315.shape, b.point) + JOIN iris.annual_count_of_hot_summer_days_projections_12km acohdp ON ST_CONTAINS(acohdp.shape, b.point) + ) + WITH NO DATA; + """ + ) + + _create_indices() diff --git a/api/alembic/versions/86f3db5e9bbe_031_create_view_analytics_aggregates.py b/api/alembic/versions/86f3db5e9bbe_031_create_view_analytics_aggregates.py new file mode 100644 index 0000000..c0d00a3 --- /dev/null +++ b/api/alembic/versions/86f3db5e9bbe_031_create_view_analytics_aggregates.py @@ -0,0 +1,135 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""031_create_view_analytics_aggregates + +Revision ID: 86f3db5e9bbe +Revises: 5d3b574055a5 +Create Date: 2025-11-07 12:11:19.381293 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = '86f3db5e9bbe' +down_revision: Union[str, None] = '5d3b574055a5' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Create building_epc_analytics_aggregates materialized view.""" + + op.execute(""" + CREATE MATERIALIZED VIEW iris.building_epc_analytics_aggregates AS + WITH snapshot_dates AS ( + SELECT generate_series( + DATE_TRUNC('year', (SELECT MIN(lodgement_date) FROM iris.building_epc_analytics WHERE lodgement_date IS NOT NULL))::date + interval '1 year' - interval '1 day', + DATE_TRUNC('year', CURRENT_DATE)::date + interval '1 year' - interval '1 day', + interval '1 year' + )::date as snapshot_date + ), + issued_counts AS ( + SELECT + sd.snapshot_date, + bea.region_name, + bea.county_name, + bea.district_name, + bea.ward_name, + bea.type, + COUNT(DISTINCT bea.uprn) as total_issued_count + FROM snapshot_dates sd + CROSS JOIN iris.building_epc_analytics bea + WHERE bea.lodgement_date <= sd.snapshot_date + AND bea.active_snapshots IS NOT NULL + GROUP BY sd.snapshot_date, bea.region_name, bea.county_name, bea.district_name, bea.ward_name, bea.type + ), + active_aggregates AS ( + SELECT + unnest(active_snapshots) as snapshot_date, + region_name, + county_name, + district_name, + ward_name, + type, + COUNT(*) as active_epc_count, + SUM(sap_rating) as sum_sap_rating, + COUNT(*) FILTER (WHERE epc_rating = 'A') as count_rating_a, + COUNT(*) FILTER (WHERE epc_rating = 'B') as count_rating_b, + COUNT(*) FILTER (WHERE epc_rating = 'C') as count_rating_c, + COUNT(*) FILTER (WHERE epc_rating = 'D') as count_rating_d, + COUNT(*) FILTER (WHERE epc_rating = 'E') as count_rating_e, + COUNT(*) FILTER (WHERE epc_rating = 'F') as count_rating_f, + COUNT(*) FILTER (WHERE epc_rating = 'G') as count_rating_g + FROM iris.building_epc_analytics + WHERE active_snapshots IS NOT NULL + GROUP BY snapshot_date, region_name, county_name, district_name, ward_name, type + ) + SELECT + aa.snapshot_date, + aa.region_name, + aa.county_name, + aa.district_name, + aa.ward_name, + aa.type, + aa.active_epc_count, + aa.sum_sap_rating, + aa.count_rating_a, + aa.count_rating_b, + aa.count_rating_c, + aa.count_rating_d, + aa.count_rating_e, + aa.count_rating_f, + aa.count_rating_g, + (ic.total_issued_count - aa.active_epc_count) as expired_epc_count + FROM active_aggregates aa + JOIN issued_counts ic + ON aa.snapshot_date = ic.snapshot_date + AND aa.region_name IS NOT DISTINCT FROM ic.region_name + AND aa.county_name IS NOT DISTINCT FROM ic.county_name + AND aa.district_name IS NOT DISTINCT FROM ic.district_name + AND aa.ward_name IS NOT DISTINCT FROM ic.ward_name + AND aa.type IS NOT DISTINCT FROM ic.type + WITH NO DATA; + """) + + op.execute(""" + CREATE INDEX building_epc_analytics_aggregates_snapshot_date_idx + ON iris.building_epc_analytics_aggregates(snapshot_date); + """) + + op.execute(""" + CREATE INDEX building_epc_analytics_aggregates_region_snapshot_idx + ON iris.building_epc_analytics_aggregates(region_name, snapshot_date); + """) + + op.execute(""" + CREATE INDEX building_epc_analytics_aggregates_county_snapshot_idx + ON iris.building_epc_analytics_aggregates(county_name, snapshot_date); + """) + + op.execute(""" + CREATE INDEX building_epc_analytics_aggregates_district_snapshot_idx + ON iris.building_epc_analytics_aggregates(district_name, snapshot_date); + """) + + op.execute(""" + CREATE INDEX building_epc_analytics_aggregates_ward_snapshot_idx + ON iris.building_epc_analytics_aggregates(ward_name, snapshot_date); + """) + + op.execute(""" + CREATE INDEX building_epc_analytics_aggregates_snapshot_type_idx + ON iris.building_epc_analytics_aggregates(snapshot_date, type); + """) + + +def downgrade() -> None: + """Drop the aggregates materialized view.""" + op.execute(""" + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics_aggregates CASCADE; + """) diff --git a/api/alembic/versions/8f985d72a651_009_create_uk_region_table_view.py b/api/alembic/versions/8f985d72a651_009_create_uk_region_table_view.py new file mode 100644 index 0000000..d5e0d0b --- /dev/null +++ b/api/alembic/versions/8f985d72a651_009_create_uk_region_table_view.py @@ -0,0 +1,204 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""009_create_uk_region_table_view + +Revision ID: 8f985d72a651 +Revises: 2215b32f49a9 +Create Date: 2025-08-26 13:48:42.927690 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "8f985d72a651" +down_revision: Union[str, None] = "2215b32f49a9" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + """ Create id for iris.scotland_and_wales_region""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.scotland_and_wales_region_fid_seq1 + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + + """ Create table for iris.scotland_and_wales_region""" + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.scotland_and_wales_region + ( + fid integer NOT NULL DEFAULT nextval('iris.scotland_and_wales_region_fid_seq1'::regclass), + name character varying, + area_code character varying, + area_description character varying, + file_name character varying, + feature_serial_number integer, + collection_serial_number integer, + global_polygon_id integer, + admin_unit_id integer, + census_code character varying, + hectares double precision, + non_inland_area double precision, + area_type_code character varying, + area_type_description character varying, + non_area_type_code character varying, + non_area_type_description character varying, + geometry geometry(MultiPolygon,4326), + CONSTRAINT scotland_and_wales_region_P PRIMARY KEY (fid) + ) + """ + ) + + """ Create geo index for district_borough_unitary_ward""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS scotland_and_wales_region_geometry_idx + ON iris.scotland_and_wales_region USING gist + (geometry) + TABLESPACE pg_default; + """ + ) + + """ Create id for english_region""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.english_region_fid_seq1 + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + """ Create table for iris.english_region""" + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.english_region + ( + fid integer NOT NULL DEFAULT nextval('iris.english_region_fid_seq1'::regclass), + name character varying, + area_code character varying, + area_description character varying, + file_name character varying, + feature_serial_number integer, + collection_serial_number integer, + global_polygon_id integer, + admin_unit_id integer, + census_code character varying, + hectares double precision, + non_inland_area double precision, + area_type_code character varying, + area_type_description character varying, + non_area_type_code character varying, + non_area_type_description character varying, + geometry geometry(MultiPolygon,4326), + CONSTRAINT english_region_P PRIMARY KEY (fid) + ) + """ + ) + + """ Create geo index for english_region""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS english_region_geometry_idx + ON iris.english_region USING gist + (geometry) + TABLESPACE pg_default; + """ + ) + + """ Create table uk_region""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region + AS + SELECT * FROM iris.scotland_and_wales_region + UNION + SELECT * FROM iris.english_region; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc_data + AS + SELECT + b.name, + COUNT (a.point) AS total, + COUNT(CASE WHEN a.epc_rating = 'A' THEN 1 END) AS epc_a, + COUNT(CASE WHEN a.epc_rating = 'B' THEN 1 END) AS epc_b, + COUNT(CASE WHEN a.epc_rating = 'C' THEN 1 END) AS epc_c, + COUNT(CASE WHEN a.epc_rating = 'D' THEN 1 END) AS epc_d, + COUNT(CASE WHEN a.epc_rating = 'E' THEN 1 END) AS epc_e, + COUNT(CASE WHEN a.epc_rating = 'F' THEN 1 END) AS epc_f, + COUNT(CASE WHEN a.epc_rating = 'G' THEN 1 END) AS epc_g, + COUNT(CASE WHEN a.epc_rating IS NULL THEN 1 END) AS epc_null, + b.geometry + FROM iris.building_epc a + LEFT JOIN iris.uk_region b + ON ST_Intersects(b.geometry, a.point) + GROUP BY b.name, b.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.uk_region_epc_data t + WITH NO DATA; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc; + """ + ) + + op.execute( + """ + DROP TABLE IF EXISTS iris.uk_region; + """ + ) + op.execute( + """ + DROP TABLE IF EXISTS iris.scotland_and_wales_region; + """ + ) + op.execute( + """ + DROP TABLE IF EXISTS iris.english_region; + """ + ) + + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.scotland_and_wales_region_fid_seq1; + """ + ) + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.english_region_fid_seq1; + """ + ) diff --git a/api/alembic/versions/993c0cf8ea04_010_create_uk_district_table_view.py b/api/alembic/versions/993c0cf8ea04_010_create_uk_district_table_view.py new file mode 100644 index 0000000..31ac2e4 --- /dev/null +++ b/api/alembic/versions/993c0cf8ea04_010_create_uk_district_table_view.py @@ -0,0 +1,139 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""010_create_uk_district_table_view + +Revision ID: 993c0cf8ea04 +Revises: 8f985d72a651 +Create Date: 2025-08-27 10:55:17.873861 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "993c0cf8ea04" +down_revision: Union[str, None] = "8f985d72a651" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + """ Create id for iris.district_borough_unitary""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.district_borough_unitary_fid_seq1 + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + + """ Create table for iris.district_borough_unitary""" + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.district_borough_unitary + ( + fid integer NOT NULL DEFAULT nextval('iris.district_borough_unitary_fid_seq1'::regclass), + name character varying, + area_code character varying, + area_description character varying, + file_name character varying, + feature_serial_number integer, + collection_serial_number integer, + global_polygon_id integer, + admin_unit_id integer, + census_code character varying, + hectares double precision, + non_inland_area double precision, + area_type_code character varying, + area_type_description character varying, + non_area_type_code character varying, + non_area_type_description character varying, + geometry geometry(MultiPolygon,4326), + CONSTRAINT district_borough_unitary_P PRIMARY KEY (fid) + ) + """ + ) + + """ Create geo index for district_borough_unitary""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS district_borough_unitary_geometry_idx + ON iris.district_borough_unitary USING gist + (geometry) + TABLESPACE pg_default; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.district_borough_unitary_epc_data + AS + SELECT + b.name, + COUNT (a.point) AS total, + COUNT(CASE WHEN a.epc_rating = 'A' THEN 1 END) AS epc_a, + COUNT(CASE WHEN a.epc_rating = 'B' THEN 1 END) AS epc_b, + COUNT(CASE WHEN a.epc_rating = 'C' THEN 1 END) AS epc_c, + COUNT(CASE WHEN a.epc_rating = 'D' THEN 1 END) AS epc_d, + COUNT(CASE WHEN a.epc_rating = 'E' THEN 1 END) AS epc_e, + COUNT(CASE WHEN a.epc_rating = 'F' THEN 1 END) AS epc_f, + COUNT(CASE WHEN a.epc_rating = 'G' THEN 1 END) AS epc_g, + COUNT(CASE WHEN a.epc_rating IS NULL THEN 1 END) AS epc_null, + b.geometry + FROM iris.building_epc a + LEFT JOIN iris.district_borough_unitary b + ON ST_Intersects(b.geometry, a.point) + GROUP BY b.name, b.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.district_borough_unitary_epc + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.district_borough_unitary_epc_data t + WITH NO DATA; + """ + ) + + +def downgrade() -> None: + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.district_borough_unitary_epc; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.district_borough_unitary_epc_data; + """ + ) + op.execute( + """ + DROP INDEX IF EXISTS iris.district_borough_unitary_geometry_idx; + """ + ) + + op.execute( + """ + DROP TABLE IF EXISTS iris.district_borough_unitary; + """ + ) + + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.district_borough_unitary_fid_seq1; + """ + ) diff --git a/api/alembic/versions/9e07a98054cc_029_index_for_analytics_historical_.py b/api/alembic/versions/9e07a98054cc_029_index_for_analytics_historical_.py new file mode 100644 index 0000000..157ca57 --- /dev/null +++ b/api/alembic/versions/9e07a98054cc_029_index_for_analytics_historical_.py @@ -0,0 +1,40 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""029_index_for_analytics_historical_lookups + +Revision ID: 9e07a98054cc +Revises: cf22dad02891 +Create Date: 2025-11-05 13:11:48.398969 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = '9e07a98054cc' +down_revision: Union[str, None] = 'cf22dad02891' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_uprn_lodgement_idx;") + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_uprn_lodgement_idx + ON iris.building_epc_analytics (uprn, lodgement_date) + INCLUDE (sap_rating, epc_rating, type, point); + """) + + +def downgrade() -> None: + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_uprn_lodgement_idx;") + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_uprn_lodgement_idx + ON iris.building_epc_analytics(uprn, lodgement_date); + """) diff --git a/api/alembic/versions/a75353f01fa0_006_create_hot_summer_days_table_views.py b/api/alembic/versions/a75353f01fa0_006_create_hot_summer_days_table_views.py new file mode 100644 index 0000000..c2df530 --- /dev/null +++ b/api/alembic/versions/a75353f01fa0_006_create_hot_summer_days_table_views.py @@ -0,0 +1,146 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""006_create_hot_summer_days_table_views + +Revision ID: a75353f01fa0 +Revises: bbd1a2348c7b +Create Date: 2025-08-18 10:52:55.528477 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "a75353f01fa0" +down_revision: Union[str, None] = "bbd1a2348c7b" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + """ Create table for hot summer days.""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.annual_count_of_hot_summer_days_projections_12km_objectid_seq + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + + """ Create table for hot summer days.""" + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.annual_count_of_hot_summer_days_projections_12km + ( + objectid integer NOT NULL DEFAULT nextval('iris.annual_count_of_hot_summer_days_projections_12km_objectid_seq'::regclass), + latitude double precision, + longitude double precision, + projection_y_coordinate double precision, + projection_x_coordinate double precision, + hsd_baseline_81_00_lower double precision, + hsd_baseline_81_00_median double precision, + hsd_baseline_81_00_upper double precision, + hsd_baseline_01_20_lower double precision, + hsd_baseline_01_20_median double precision, + hsd_baseline_01_20_upper double precision, + hsd_15_lower double precision, + hsd_15_median double precision, + hsd_15_upper double precision, + hsd_20_lower double precision, + hsd_20_median double precision, + hsd_20_upper double precision, + hsd_25_lower double precision, + hsd_25_median double precision, + hsd_25_upper double precision, + hsd_30_lower double precision, + hsd_30_median double precision, + hsd_30_upper double precision, + hsd_40_lower double precision, + hsd_40_median double precision, + hsd_40_upper double precision, + shape geometry(MultiPolygon,4326), + CONSTRAINT annual_count_of_hot_summer_days_projections_12km_pkey PRIMARY KEY (objectid) + ) + """ + ) + + """ Alter sequence owner.""" + op.execute( + """ + ALTER SEQUENCE iris.annual_count_of_hot_summer_days_projections_12km_objectid_seq + OWNED BY iris.annual_count_of_hot_summer_days_projections_12km.objectid; + """ + ) + + """ Create index for hot summer days table.""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS annual_count_of_hot_summer_days_projections_12km_shape_idx + ON iris.annual_count_of_hot_summer_days_projections_12km USING gist + (shape) + TABLESPACE pg_default; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE OR REPLACE VIEW iris.median_summer_days_per_projection + AS + SELECT objectid, latitude, longitude, hsd_baseline_01_20_median, hsd_15_median, hsd_20_median, hsd_25_median, hsd_30_median, hsd_40_median, shape + FROM iris.annual_count_of_hot_summer_days_projections_12km; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.hot_summer_days_geojson + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(shape)::json, 'properties', to_jsonb(t.*) - 'shape'::text))) AS geojson + FROM iris.median_summer_days_per_projection t + WITH DATA; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.hot_summer_days_days_geojson; + """ + ) + + op.execute( + """ + DROP VIEW IF EXISTS iris.median_summer_days_per_projection; + """ + ) + + op.execute( + """ + DROP INDEX IF EXISTS iris.annual_count_of_hot_summer_days_projections_12km_shape_idx; """ + ) + + op.execute( + """ + DROP TABLE IF EXISTS iris.annual_count_of_hot_summer_days_projections_12km; + """ + ) + + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.annual_count_of_hot_summer_days_projections_12km_objectid_seq; + """ + ) diff --git a/api/alembic/versions/a83943138c33_027_add_extreme_weather_view.py b/api/alembic/versions/a83943138c33_027_add_extreme_weather_view.py new file mode 100644 index 0000000..80dc455 --- /dev/null +++ b/api/alembic/versions/a83943138c33_027_add_extreme_weather_view.py @@ -0,0 +1,133 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""027_add_extreme_weather_view + +Revision ID: a83943138c33 +Revises: 67aaf92accd2 +Create Date: 2025-11-01 00:39:50.863001 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "a83943138c33" +down_revision: Union[str, None] = "67aaf92accd2" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_extreme_weather_analytics + AS ( + WITH buildings_affected_by_icing_days AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE icing_days > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY icing_days + ) + FROM iris.building_weather_analytics + ) + ), + buildings_affected_by_hsds AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE hsd_40_median > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY hsd_40_median + ) + FROM iris.building_weather_analytics + ) + ), + buildings_affected_by_wdrp AS ( + SELECT uprn, true AS affected + FROM iris.building_weather_analytics + WHERE wdr_40_median_0 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_0 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_45 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_45 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_90 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_90 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_135 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_135 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_180 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_180 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_225 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_225 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_270 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_270 + ) + FROM iris.building_weather_analytics + ) + OR wdr_40_median_315 > ( + SELECT percentile_cont(0.75) WITHIN GROUP ( + ORDER BY wdr_40_median_315 + ) + FROM iris.building_weather_analytics + ) + ) + SELECT bwa.uprn, bwa.point, babi.affected as affected_by_icing_days, + babh.affected as affected_by_hsds, babw.affected as affected_by_wdr + FROM iris.building_weather_analytics bwa + LEFT JOIN buildings_affected_by_icing_days babi ON bwa.uprn = babi.uprn + LEFT JOIN buildings_affected_by_hsds babh ON bwa.uprn = babh.uprn + LEFT JOIN buildings_affected_by_wdrp babw ON bwa.uprn = babw.uprn + ) + WITH NO DATA; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_uprn_idx + ON iris.building_extreme_weather_analytics (uprn); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_extreme_weather_analytics_point_idx + ON iris.building_extreme_weather_analytics USING GIST (point); + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_extreme_weather_analytics; + """ + ) diff --git a/api/alembic/versions/b0c5faee6d07_017_alter_epc_assessment_add_indexes.py b/api/alembic/versions/b0c5faee6d07_017_alter_epc_assessment_add_indexes.py new file mode 100644 index 0000000..51e845e --- /dev/null +++ b/api/alembic/versions/b0c5faee6d07_017_alter_epc_assessment_add_indexes.py @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""017_alter_epc_assessment_add_indexes + +Revision ID: b0c5faee6d07 +Revises: 37989279ce33 +Create Date: 2025-10-10 10:44:31.110775 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "b0c5faee6d07" +down_revision: Union[str, None] = "37989279ce33" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + # Add sap_score and expiry_date columns to epc_assessment table + op.execute( + """ + ALTER TABLE iris.epc_assessment + ADD COLUMN IF NOT EXISTS sap_rating INTEGER; + """ + ) + op.execute( + """ + ALTER TABLE iris.epc_assessment + ADD COLUMN IF NOT EXISTS expiry_date DATE; + """ + ) + + # Add composite unique index for upserts on (uprn, lodgement_date) + op.execute( + """ + CREATE UNIQUE INDEX IF NOT EXISTS epc_uprn_lodgement_date_idx + ON iris.epc_assessment(uprn, lodgement_date); + """ + ) + + # Add index on expiry_date for filtering active/expired certificates + op.execute( + """ + CREATE INDEX IF NOT EXISTS epc_expiry_date_idx + ON iris.epc_assessment(expiry_date); + """ + ) + + # Add missing index on structure_unit.uprn for join performance + op.execute( + """ + CREATE INDEX IF NOT EXISTS structure_unit_uprn_idx ON iris.structure_unit(uprn); + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute( + """ + DROP INDEX IF EXISTS iris.epc_uprn_lodgement_date_idx; + """ + ) + + op.execute( + """ + ALTER TABLE iris.epc_assessment + DROP COLUMN IF EXISTS sap_rating; + """ + ) + + op.execute( + """ + ALTER TABLE iris.epc_assessment + DROP COLUMN IF EXISTS expiry_date; + """ + ) diff --git a/api/alembic/versions/b55e05000f66_018_create_view_for_analytics_dashboard.py b/api/alembic/versions/b55e05000f66_018_create_view_for_analytics_dashboard.py new file mode 100644 index 0000000..0ac5968 --- /dev/null +++ b/api/alembic/versions/b55e05000f66_018_create_view_for_analytics_dashboard.py @@ -0,0 +1,113 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""018_create_view_for_analytics_dashboard + +Revision ID: b55e05000f66 +Revises: b0c5faee6d07 +Create Date: 2025-10-10 10:45:39.530115 + +""" + +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "b55e05000f66" +down_revision: Union[str, None] = "b0c5faee6d07" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + # COALESCE picks structure_unit data from EPC path if available, otherwise from building path + # This handles both buildings with EPC assessments (via su_epc) and without (via su_build) + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.analytics + AS ( + SELECT + b.uprn, + b.point, + b.is_residential, + ea.lodgement_date, + ea.epc_rating, + ea.sap_rating, + ea.expiry_date, + COALESCE(su_epc.type, su_build.type) AS type, + COALESCE(su_epc.built_form, su_build.built_form) AS built_form, + COALESCE(su_epc.fuel_type, su_build.fuel_type) AS fuel_type, + COALESCE(su_epc.window_glazing, su_build.window_glazing) AS window_glazing, + COALESCE(su_epc.wall_construction, su_build.wall_construction) AS wall_construction, + COALESCE(su_epc.wall_insulation, su_build.wall_insulation) AS wall_insulation, + COALESCE(su_epc.roof_construction, su_build.roof_construction) AS roof_construction, + COALESCE(su_epc.roof_insulation, su_build.roof_insulation) AS roof_insulation, + COALESCE(su_epc.roof_insulation_thickness, su_build.roof_insulation_thickness) AS roof_insulation_thickness, + COALESCE(su_epc.floor_construction, su_build.floor_construction) AS floor_construction, + COALESCE(su_epc.floor_insulation, su_build.floor_insulation) AS floor_insulation, + COALESCE(su_epc.has_roof_solar_panels, su_build.has_roof_solar_panels) AS has_roof_solar_panels, + COALESCE(su_epc.roof_material, su_build.roof_material) AS roof_material, + COALESCE(su_epc.roof_aspect_area_facing_north_m2, su_build.roof_aspect_area_facing_north_m2) AS roof_aspect_area_facing_north_m2, + COALESCE(su_epc.roof_aspect_area_facing_east_m2, su_build.roof_aspect_area_facing_east_m2) AS roof_aspect_area_facing_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_m2, su_build.roof_aspect_area_facing_south_m2) AS roof_aspect_area_facing_south_m2, + COALESCE(su_epc.roof_aspect_area_facing_west_m2, su_build.roof_aspect_area_facing_west_m2) AS roof_aspect_area_facing_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_east_m2, su_build.roof_aspect_area_facing_north_east_m2) AS roof_aspect_area_facing_north_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_east_m2, su_build.roof_aspect_area_facing_south_east_m2) AS roof_aspect_area_facing_south_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_west_m2, su_build.roof_aspect_area_facing_south_west_m2) AS roof_aspect_area_facing_south_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_west_m2, su_build.roof_aspect_area_facing_north_west_m2) AS roof_aspect_area_facing_north_west_m2, + COALESCE(su_epc.roof_aspect_area_indeterminable_m2, su_build.roof_aspect_area_indeterminable_m2) AS roof_aspect_area_indeterminable_m2, + COALESCE(su_epc.roof_shape, su_build.roof_shape) AS roof_shape, + COALESCE(er.name, sawr.name) AS region_name + FROM iris.building b + LEFT JOIN iris.epc_assessment ea ON ea.uprn = b.uprn + LEFT JOIN iris.structure_unit su_epc ON su_epc.epc_assessment_id = ea.id + LEFT JOIN iris.structure_unit su_build + ON su_build.uprn = b.uprn + AND su_build.epc_assessment_id IS NULL + AND ea.id IS NULL + JOIN iris.district_borough_unitary dbu ON ST_INTERSECTS(dbu.geometry, b.point) + LEFT JOIN iris.english_region er ON er.fid = dbu.english_region_fid + LEFT JOIN iris.scotland_and_wales_region sawr ON sawr.fid = dbu.scotland_and_wales_region_fid + WHERE su_epc.epc_assessment_id IS NOT NULL OR su_build.uprn IS NOT NULL + ) + WITH NO DATA; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS analytics_uprn_idx ON iris.analytics(uprn); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS analytics_point_idx ON iris.analytics USING GIST(point); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS analytics_region_name_idx ON iris.analytics(region_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS analytics_lodgement_date_idx ON iris.analytics(lodgement_date); + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + # Drop materialized view (automatically drops all its indexes) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.analytics; + """ + ) diff --git a/api/alembic/versions/b929538f7ee1_024_add_epc_active_to_building_epc_.py b/api/alembic/versions/b929538f7ee1_024_add_epc_active_to_building_epc_.py new file mode 100644 index 0000000..68471ad --- /dev/null +++ b/api/alembic/versions/b929538f7ee1_024_add_epc_active_to_building_epc_.py @@ -0,0 +1,211 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""024_add_epc_active_to_building_epc_analytics_view + +Revision ID: b929538f7ee1 +Revises: 3e9303a52bf0 +Create Date: 2025-10-29 11:25:08.825643 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "b929538f7ee1" +down_revision: Union[str, None] = "3e9303a52bf0" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _create_indices(): + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_uprn_idx ON iris.building_epc_analytics(uprn); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_point_idx ON iris.building_epc_analytics USING GIST(point); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_name_idx ON iris.building_epc_analytics(region_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_lodgement_date_idx ON iris.building_epc_analytics(lodgement_date); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_county_name_idx ON iris.building_epc_analytics(county_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_district_name_idx ON iris.building_epc_analytics(district_name); + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_ward_name_idx ON iris.building_epc_analytics(ward_name); + """ + ) + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_epc_analytics + AS ( + WITH active_epcs AS ( + SELECT DISTINCT ON (uprn) * + FROM iris.epc_assessment + WHERE lodgement_date IS NOT NULL AND expiry_date >= CURRENT_DATE + ORDER BY uprn, lodgement_date DESC + ) + SELECT b.uprn, + b.point, + b.is_residential, + ea.lodgement_date, + ea.epc_rating, + ea.sap_rating, + ea.expiry_date, + COALESCE(su_epc.type, su_build.type) AS type, + COALESCE(su_epc.built_form, su_build.built_form) AS built_form, + COALESCE(su_epc.fuel_type, su_build.fuel_type) AS fuel_type, + COALESCE(su_epc.window_glazing, su_build.window_glazing) AS window_glazing, + COALESCE(su_epc.wall_construction, su_build.wall_construction) AS wall_construction, + COALESCE(su_epc.wall_insulation, su_build.wall_insulation) AS wall_insulation, + COALESCE(su_epc.roof_construction, su_build.roof_construction) AS roof_construction, + COALESCE(su_epc.roof_insulation, su_build.roof_insulation) AS roof_insulation, + COALESCE(su_epc.roof_insulation_thickness, su_build.roof_insulation_thickness) AS roof_insulation_thickness, + COALESCE(su_epc.floor_construction, su_build.floor_construction) AS floor_construction, + COALESCE(su_epc.floor_insulation, su_build.floor_insulation) AS floor_insulation, + COALESCE(su_epc.has_roof_solar_panels, su_build.has_roof_solar_panels) AS has_roof_solar_panels, + COALESCE(su_epc.roof_material, su_build.roof_material) AS roof_material, + COALESCE(su_epc.roof_aspect_area_facing_north_m2, su_build.roof_aspect_area_facing_north_m2) AS roof_aspect_area_facing_north_m2, + COALESCE(su_epc.roof_aspect_area_facing_east_m2, su_build.roof_aspect_area_facing_east_m2) AS roof_aspect_area_facing_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_m2, su_build.roof_aspect_area_facing_south_m2) AS roof_aspect_area_facing_south_m2, + COALESCE(su_epc.roof_aspect_area_facing_west_m2, su_build.roof_aspect_area_facing_west_m2) AS roof_aspect_area_facing_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_east_m2, su_build.roof_aspect_area_facing_north_east_m2) AS roof_aspect_area_facing_north_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_east_m2, su_build.roof_aspect_area_facing_south_east_m2) AS roof_aspect_area_facing_south_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_west_m2, su_build.roof_aspect_area_facing_south_west_m2) AS roof_aspect_area_facing_south_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_west_m2, su_build.roof_aspect_area_facing_north_west_m2) AS roof_aspect_area_facing_north_west_m2, + COALESCE(su_epc.roof_aspect_area_indeterminable_m2, su_build.roof_aspect_area_indeterminable_m2) AS roof_aspect_area_indeterminable_m2, + COALESCE(su_epc.roof_shape, su_build.roof_shape) AS roof_shape, + COALESCE(er.name, sawr.name) AS region_name, + blcc.name AS county_name, + dbu.name AS district_name, + COALESCE(dbuw.name, ued.name) AS ward_name, + CASE + WHEN aes.id IS NOT NULL THEN true + ELSE false + END AS epc_active + FROM iris.building b + LEFT JOIN iris.epc_assessment ea ON ea.uprn = b.uprn + LEFT JOIN active_epcs aes ON ea.id = aes.id + LEFT JOIN iris.structure_unit su_epc ON su_epc.epc_assessment_id = ea.id + LEFT JOIN iris.structure_unit su_build ON su_build.uprn = b.uprn AND su_build.epc_assessment_id IS NULL AND ea.id IS NULL + JOIN iris.boundary_line_ceremonial_counties blcc ON st_intersects(blcc.geometry, b.point) + JOIN iris.district_borough_unitary dbu ON st_intersects(dbu.geometry, b.point) + LEFT JOIN iris.district_borough_unitary_ward dbuw ON st_intersects(dbuw.geometry, b.point) + LEFT JOIN iris.unitary_electoral_division ued ON st_intersects(ued.geometry, b.point) + LEFT JOIN iris.english_region er ON er.fid = dbu.english_region_fid + LEFT JOIN iris.scotland_and_wales_region sawr ON sawr.fid = dbu.scotland_and_wales_region_fid + WHERE su_epc.epc_assessment_id IS NOT NULL OR su_build.uprn IS NOT NULL + ) + WITH NO DATA; + """ + ) + + _create_indices() + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_epc_active_idx ON iris.building_epc_analytics(epc_active) + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_analytics; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.building_epc_analytics + AS ( + SELECT b.uprn, + b.point, + b.is_residential, + ea.lodgement_date, + ea.epc_rating, + ea.sap_rating, + ea.expiry_date, + COALESCE(su_epc.type, su_build.type) AS type, + COALESCE(su_epc.built_form, su_build.built_form) AS built_form, + COALESCE(su_epc.fuel_type, su_build.fuel_type) AS fuel_type, + COALESCE(su_epc.window_glazing, su_build.window_glazing) AS window_glazing, + COALESCE(su_epc.wall_construction, su_build.wall_construction) AS wall_construction, + COALESCE(su_epc.wall_insulation, su_build.wall_insulation) AS wall_insulation, + COALESCE(su_epc.roof_construction, su_build.roof_construction) AS roof_construction, + COALESCE(su_epc.roof_insulation, su_build.roof_insulation) AS roof_insulation, + COALESCE(su_epc.roof_insulation_thickness, su_build.roof_insulation_thickness) AS roof_insulation_thickness, + COALESCE(su_epc.floor_construction, su_build.floor_construction) AS floor_construction, + COALESCE(su_epc.floor_insulation, su_build.floor_insulation) AS floor_insulation, + COALESCE(su_epc.has_roof_solar_panels, su_build.has_roof_solar_panels) AS has_roof_solar_panels, + COALESCE(su_epc.roof_material, su_build.roof_material) AS roof_material, + COALESCE(su_epc.roof_aspect_area_facing_north_m2, su_build.roof_aspect_area_facing_north_m2) AS roof_aspect_area_facing_north_m2, + COALESCE(su_epc.roof_aspect_area_facing_east_m2, su_build.roof_aspect_area_facing_east_m2) AS roof_aspect_area_facing_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_m2, su_build.roof_aspect_area_facing_south_m2) AS roof_aspect_area_facing_south_m2, + COALESCE(su_epc.roof_aspect_area_facing_west_m2, su_build.roof_aspect_area_facing_west_m2) AS roof_aspect_area_facing_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_east_m2, su_build.roof_aspect_area_facing_north_east_m2) AS roof_aspect_area_facing_north_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_east_m2, su_build.roof_aspect_area_facing_south_east_m2) AS roof_aspect_area_facing_south_east_m2, + COALESCE(su_epc.roof_aspect_area_facing_south_west_m2, su_build.roof_aspect_area_facing_south_west_m2) AS roof_aspect_area_facing_south_west_m2, + COALESCE(su_epc.roof_aspect_area_facing_north_west_m2, su_build.roof_aspect_area_facing_north_west_m2) AS roof_aspect_area_facing_north_west_m2, + COALESCE(su_epc.roof_aspect_area_indeterminable_m2, su_build.roof_aspect_area_indeterminable_m2) AS roof_aspect_area_indeterminable_m2, + COALESCE(su_epc.roof_shape, su_build.roof_shape) AS roof_shape, + COALESCE(er.name, sawr.name) AS region_name, + blcc.name AS county_name, + dbu.name AS district_name, + COALESCE(dbuw.name, ued.name) AS ward_name + FROM iris.building b + LEFT JOIN iris.epc_assessment ea ON ea.uprn = b.uprn + LEFT JOIN iris.structure_unit su_epc ON su_epc.epc_assessment_id = ea.id + LEFT JOIN iris.structure_unit su_build ON su_build.uprn = b.uprn AND su_build.epc_assessment_id IS NULL AND ea.id IS NULL + JOIN iris.boundary_line_ceremonial_counties blcc ON st_intersects(blcc.geometry, b.point) + JOIN iris.district_borough_unitary dbu ON st_intersects(dbu.geometry, b.point) + LEFT JOIN iris.district_borough_unitary_ward dbuw ON st_intersects(dbuw.geometry, b.point) + LEFT JOIN iris.unitary_electoral_division ued ON st_intersects(ued.geometry, b.point) + LEFT JOIN iris.english_region er ON er.fid = dbu.english_region_fid + LEFT JOIN iris.scotland_and_wales_region sawr ON sawr.fid = dbu.scotland_and_wales_region_fid + WHERE su_epc.epc_assessment_id IS NOT NULL OR su_build.uprn IS NOT NULL + ) + WITH NO DATA; + """ + ) + + _create_indices() diff --git a/api/alembic/versions/b92c77db6ba3_036_updating_building_epc_analytics_indices.py b/api/alembic/versions/b92c77db6ba3_036_updating_building_epc_analytics_indices.py new file mode 100644 index 0000000..a82c095 --- /dev/null +++ b/api/alembic/versions/b92c77db6ba3_036_updating_building_epc_analytics_indices.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""036_updating_building_epc_analytics_indices + +Revision ID: b92c77db6ba3 +Revises: 44de9a59d873 +Create Date: 2026-01-14 13:28:51.347954 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "b92c77db6ba3" +down_revision: Union[str, None] = "44de9a59d873" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute( + """ + DROP INDEX IF EXISTS iris.building_epc_analytics_region_attributes_idx; + """ + ) + + op.execute( + """ + DROP INDEX IF EXISTS iris.building_epc_analytics_region_fuel_types_idx; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_attributes_idx + ON iris.building_epc_analytics ( + region_name, + has_roof_solar_panels, + window_glazing, + floor_construction, + roof_insulation_thickness, + roof_construction, + wall_construction, + epc_rating + ) + WHERE epc_active = true; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_fuel_types_idx + ON iris.building_epc_analytics (region_name, type, fuel_type, epc_rating) + WHERE epc_active = true AND type IS NOT NULL AND fuel_type IS NOT NULL; + """ + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + op.execute( + """ + DROP INDEX IF EXISTS iris.building_epc_analytics_region_attributes_idx; + """ + ) + + op.execute( + """ + DROP INDEX IF EXISTS iris.building_epc_analytics_region_fuel_types_idx; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_attributes_idx + ON iris.building_epc_analytics ( + region_name, + has_roof_solar_panels, + window_glazing, + floor_construction, + roof_insulation_thickness, + roof_construction, + wall_construction, + ) + WHERE epc_active = true; + """ + ) + + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_fuel_types_idx + ON iris.building_epc_analytics (region_name, type, fuel_type) + WHERE epc_active = true AND type IS NOT NULL AND fuel_type IS NOT NULL; + """ + ) + # ### end Alembic commands ### diff --git a/api/alembic/versions/bbd1a2348c7b_005_create_icing_days_tables_view.py b/api/alembic/versions/bbd1a2348c7b_005_create_icing_days_tables_view.py new file mode 100644 index 0000000..b6d4823 --- /dev/null +++ b/api/alembic/versions/bbd1a2348c7b_005_create_icing_days_tables_view.py @@ -0,0 +1,108 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""005_create_icing_days_tables_view + +Revision ID: bbd1a2348c7b +Revises: 2599ec4b20bd +Create Date: 2025-08-14 15:19:20.548773 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "bbd1a2348c7b" +down_revision: Union[str, None] = "2599ec4b20bd" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + """ Create sequence for icing days table primary key.""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.annual_count_of_icing_days_1991_2020_objectid_seq + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + + """ Create table for icing days.""" + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.annual_count_of_icing_days_1991_2020 + ( + objectid integer NOT NULL DEFAULT nextval('iris.annual_count_of_icing_days_1991_2020_objectid_seq'::regclass), + icingdays double precision, + shape geometry(MultiPolygon,4326), + CONSTRAINT annual_count_of_icing_days_1991_2020_pkey PRIMARY KEY (objectid) + ) + """ + ) + + """ Alter sequence owner.""" + op.execute( + """ + ALTER SEQUENCE iris.annual_count_of_icing_days_1991_2020_objectid_seq + OWNED BY iris.annual_count_of_icing_days_1991_2020.objectid; + """ + ) + + """ Create index for icing days table.""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS annual_count_of_icing_days_1991_2020_shape_idx + ON iris.annual_count_of_icing_days_1991_2020 USING gist + (shape) + TABLESPACE pg_default; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.icing_days_geojson + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(shape)::json, 'properties', to_jsonb(t.*) - 'shape'::text))) AS geojson + FROM iris.annual_count_of_icing_days_1991_2020 t + WITH DATA; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.icing_days_geojson; + """ + ) + + op.execute( + """ + DROP INDEX IF EXISTS iris.annual_count_of_icing_days_1991_2020_shape_idx; + """ + ) + + op.execute( + """ + DROP TABLE IF EXISTS iris.annual_count_of_icing_days_1991_2020; + """ + ) + + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.annual_count_of_icing_days_1991_2020_objectid_seq; + """ + ) + diff --git a/api/alembic/versions/be69024c1b9c_033_epc_analytics_active_indexes.py b/api/alembic/versions/be69024c1b9c_033_epc_analytics_active_indexes.py new file mode 100644 index 0000000..d9e9440 --- /dev/null +++ b/api/alembic/versions/be69024c1b9c_033_epc_analytics_active_indexes.py @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""033_epc_analytics_active_indexes + +Revision ID: be69024c1b9c +Revises: 4137d5faffb3 +Create Date: 2025-11-17 14:13:47.589788 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = 'be69024c1b9c' +down_revision: Union[str, None] = '4137d5faffb3' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_point_active_idx + ON iris.building_epc_analytics USING GIST(point) + WHERE epc_active = true; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_region_name_active_idx + ON iris.building_epc_analytics (region_name) + WHERE epc_active = true; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_county_name_active_idx + ON iris.building_epc_analytics (county_name) + WHERE epc_active = true; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_district_name_active_idx + ON iris.building_epc_analytics (district_name) + WHERE epc_active = true; + """) + + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_ward_name_active_idx + ON iris.building_epc_analytics (ward_name) + WHERE epc_active = true; + """) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_ward_name_active_idx;") + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_district_name_active_idx;") + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_county_name_active_idx;") + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_region_name_active_idx;") + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_point_active_idx;") diff --git a/api/alembic/versions/cc816c325e2a_014_add_country_table.py b/api/alembic/versions/cc816c325e2a_014_add_country_table.py new file mode 100644 index 0000000..8e36bdd --- /dev/null +++ b/api/alembic/versions/cc816c325e2a_014_add_country_table.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""014_add_country_table + +Revision ID: cc816c325e2a +Revises: cf408e1ffd0e +Create Date: 2025-09-10 10:40:29.793570 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "cc816c325e2a" +down_revision: Union[str, None] = "cf408e1ffd0e" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + """ Create table for country regions.""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.country_region_fid_seq + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + + """ Create table for country_region.""" + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.country_region + ( + fid integer NOT NULL DEFAULT nextval('iris.country_region_fid_seq'::regclass), + name character varying(100), + area_code character varying(3), + area_description character varying(50), + file_name character varying(100), + feature_serial_number integer, + collection_serial_number integer NOT NULL, + global_polygon_id integer NOT NULL, + admin_unit_id integer NOT NULL, + census_code character varying(9), + hectares double precision NOT NULL, + non_inland_area double precision NOT NULL, + area_type_code character varying(2) NOT NULL, + area_type_description character varying(25)NOT NULL, + non_area_type_code character varying(3), + non_area_type_description character varying(36), + geometry geometry(MultiPolygon,4326) NOT NULL, + CONSTRAINT country_region_pkey PRIMARY KEY (fid) + ) + """ + ) + + """ Alter sequence owner.""" + op.execute( + """ + ALTER SEQUENCE iris.country_region_fid_seq + OWNED BY iris.country_region.fid; + """ + ) + + """ Create index for country_region table.""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS country_region_geometry_idx + ON iris.country_region USING gist + (geometry) + TABLESPACE pg_default; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute( + """ + DROP INDEX IF EXISTS iris.country_region_geometry_idx; """ + ) + + op.execute( + """ + DROP TABLE IF EXISTS iris.country_region; + """ + ) + + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.country_region_fid_seq; + """ + ) diff --git a/api/alembic/versions/cf22dad02891_028_create_view_for_analytics_.py b/api/alembic/versions/cf22dad02891_028_create_view_for_analytics_.py new file mode 100644 index 0000000..461cd09 --- /dev/null +++ b/api/alembic/versions/cf22dad02891_028_create_view_for_analytics_.py @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""028_create_view_for_analytics_historical_lookups + +Revision ID: cf22dad02891 +Revises: a83943138c33 +Create Date: 2025-11-03 13:15:34.854944 + +""" +from typing import Sequence, Union + +from alembic import op + + +# revision identifiers, used by Alembic. +revision: str = 'cf22dad02891' +down_revision: Union[str, None] = 'a83943138c33' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Create normalized materialized view for historical EPC snapshots.""" + + # composite index on building_epc_analytics for efficient joins + op.execute(""" + CREATE INDEX IF NOT EXISTS building_epc_analytics_uprn_lodgement_idx + ON iris.building_epc_analytics(uprn, lodgement_date); + """) + + op.execute(""" + CREATE MATERIALIZED VIEW iris.building_epc_active_by_year AS + WITH RECURSIVE date_range AS ( + -- Generate range of years from earliest EPC to current year (Dec 31st) + SELECT + (DATE_TRUNC('year', MIN(lodgement_date))::date + interval '1 year' - interval '1 day')::date as snapshot_date, + (DATE_TRUNC('year', CURRENT_DATE)::date + interval '1 year' - interval '1 day')::date as max_date + FROM iris.building_epc_analytics + WHERE lodgement_date IS NOT NULL + + UNION ALL + + SELECT + (snapshot_date + interval '1 year')::date, + max_date + FROM date_range + WHERE snapshot_date < max_date + ), + snapshots AS ( + SELECT snapshot_date FROM date_range WHERE snapshot_date IS NOT NULL + ), + snapshot_per_year AS ( + -- For each year-end date and UPRN, find which EPC certificate was active + SELECT + s.snapshot_date, + b.uprn, + b.lodgement_date, + ROW_NUMBER() OVER ( + PARTITION BY s.snapshot_date, b.uprn + ORDER BY b.lodgement_date DESC + ) as rn + FROM snapshots s + INNER JOIN iris.building_epc_analytics b + ON b.lodgement_date <= s.snapshot_date + AND b.expiry_date >= s.snapshot_date + WHERE b.lodgement_date IS NOT NULL + AND b.expiry_date IS NOT NULL + AND b.sap_rating IS NOT NULL + ) + SELECT + snapshot_date, + uprn, + lodgement_date + FROM snapshot_per_year + WHERE rn = 1 + WITH NO DATA; + """) + + op.execute(""" + CREATE INDEX building_epc_active_by_year_snapshot_date_idx + ON iris.building_epc_active_by_year(snapshot_date); + """) + + op.execute(""" + CREATE INDEX building_epc_active_by_year_epc_ref_idx + ON iris.building_epc_active_by_year(uprn, lodgement_date); + """) + + +def downgrade() -> None: + """Drop the materialized view and its indexes.""" + op.execute("DROP MATERIALIZED VIEW IF EXISTS iris.building_epc_active_by_year CASCADE;") + op.execute("DROP INDEX IF EXISTS iris.building_epc_analytics_uprn_lodgement_idx;") diff --git a/api/alembic/versions/cf408e1ffd0e_013_add_uprn_structure_unit.py b/api/alembic/versions/cf408e1ffd0e_013_add_uprn_structure_unit.py new file mode 100644 index 0000000..ecd782d --- /dev/null +++ b/api/alembic/versions/cf408e1ffd0e_013_add_uprn_structure_unit.py @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""013_add_uprn_structure_unit + +Revision ID: cf408e1ffd0e +Revises: 10f244f0a95e +Create Date: 2025-09-08 14:35:48.115234 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'cf408e1ffd0e' +down_revision: Union[str, None] = '10f244f0a95e' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.execute( + """ + ALTER TABLE iris.structure_unit + ADD COLUMN uprn TEXT NULL, + ADD COLUMN roof_shape TEXT NULL; + """ + ) + + op.execute( + """ + ALTER TABLE iris.structure_unit + ADD CONSTRAINT fk_building_uprn FOREIGN KEY (uprn) REFERENCES iris.building(uprn) ON DELETE CASCADE; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + op.execute( + """ + ALTER TABLE iris.structure_unit + DROP CONSTRAINT fk_building_uprn; + """ + ) + + op.execute( + """ + ALTER TABLE iris.structure_unit + DROP COLUMN IF EXISTS uprn, + DROP COLUMN IF EXISTS roof_shape; + """ + ) + diff --git a/api/alembic/versions/d12ce7dc9019_003_create_structure_unit_table.py b/api/alembic/versions/d12ce7dc9019_003_create_structure_unit_table.py index 861b3af..e397e45 100644 --- a/api/alembic/versions/d12ce7dc9019_003_create_structure_unit_table.py +++ b/api/alembic/versions/d12ce7dc9019_003_create_structure_unit_table.py @@ -50,7 +50,7 @@ def upgrade() -> None: op.execute( """ - CREATE INDEX idx_structure_unit_epc ON iris.structure_unit(epc_assessment_id); + CREATE INDEX structure_unit_epc_assessment_id_idx ON iris.structure_unit(epc_assessment_id); """ ) @@ -59,7 +59,7 @@ def downgrade() -> None: """Downgrade schema.""" op.execute( """ - DROP INDEX IF EXISTS idx_structure_unit_epc; + DROP INDEX IF EXISTS structure_unit_epc_assessment_id_idx; """ ) diff --git a/api/alembic/versions/d99eb3e1e4ab_011_create_uk_counties_table_view.py b/api/alembic/versions/d99eb3e1e4ab_011_create_uk_counties_table_view.py new file mode 100644 index 0000000..36672a5 --- /dev/null +++ b/api/alembic/versions/d99eb3e1e4ab_011_create_uk_counties_table_view.py @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""011_create_uk_counties_table_view + +Revision ID: d99eb3e1e4ab +Revises: 993c0cf8ea04 +Create Date: 2025-08-27 16:38:28.857698 + +""" +from typing import Sequence, Union + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "d99eb3e1e4ab" +down_revision: Union[str, None] = "993c0cf8ea04" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def upgrade() -> None: + """Upgrade schema.""" + """ Create id for iris.boundary_line_ceremonial_counties""" + op.execute( + """ + CREATE SEQUENCE IF NOT EXISTS iris.boundary_line_ceremonial_counties_fid_seq1 + INCREMENT 1 + START 1 + MINVALUE 1 + MAXVALUE 2147483647 + CACHE 1; + """ + ) + + """ Create table for iris.boundary_line_ceremonial_counties""" + op.execute( + """ + CREATE TABLE IF NOT EXISTS iris.boundary_line_ceremonial_counties + ( + fid integer NOT NULL DEFAULT nextval('iris.boundary_line_ceremonial_counties_fid_seq1'::regclass), + name character varying, + Area_description character varying, + geometry geometry(MultiPolygon,4326), + CONSTRAINT boundary_line_ceremonial_counties_P PRIMARY KEY (fid) + ) + """ + ) + + """ Create geo index for boundary_line_ceremonial_counties""" + op.execute( + """ + CREATE INDEX IF NOT EXISTS boundary_line_ceremonial_counties_geometry_idx + ON iris.boundary_line_ceremonial_counties USING gist + (geometry) + TABLESPACE pg_default; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.boundary_line_ceremonial_counties_epc_data + AS + SELECT + b.name, + COUNT (a.point) AS total, + COUNT(CASE WHEN a.epc_rating = 'A' THEN 1 END) AS epc_a, + COUNT(CASE WHEN a.epc_rating = 'B' THEN 1 END) AS epc_b, + COUNT(CASE WHEN a.epc_rating = 'C' THEN 1 END) AS epc_c, + COUNT(CASE WHEN a.epc_rating = 'D' THEN 1 END) AS epc_d, + COUNT(CASE WHEN a.epc_rating = 'E' THEN 1 END) AS epc_e, + COUNT(CASE WHEN a.epc_rating = 'F' THEN 1 END) AS epc_f, + COUNT(CASE WHEN a.epc_rating = 'G' THEN 1 END) AS epc_g, + COUNT(CASE WHEN a.epc_rating IS NULL THEN 1 END) AS epc_null, + b.geometry + FROM iris.building_epc a + LEFT JOIN iris.boundary_line_ceremonial_counties b + ON ST_Intersects(b.geometry, a.point) + GROUP BY b.name, b.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.boundary_line_ceremonial_counties_epc + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.boundary_line_ceremonial_counties_epc_data t + WITH NO DATA; + """ + ) + + +def downgrade() -> None: + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.boundary_line_ceremonial_counties_epc; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.boundary_line_ceremonial_counties_epc_data; + """ + ) + op.execute( + """ + DROP INDEX IF EXISTS iris.boundary_line_ceremonial_counties_geometry_idx; + """ + ) + + op.execute( + """ + DROP TABLE IF EXISTS iris.boundary_line_ceremonial_counties; + """ + ) + + op.execute( + """ + DROP SEQUENCE IF EXISTS iris.boundary_line_ceremonial_counties_fid_seq1; + """ + ) diff --git a/api/alembic/versions/e4517d52c442_002_create_epc_assessment_table.py b/api/alembic/versions/e4517d52c442_002_create_epc_assessment_table.py index 3b4e137..0a79210 100644 --- a/api/alembic/versions/e4517d52c442_002_create_epc_assessment_table.py +++ b/api/alembic/versions/e4517d52c442_002_create_epc_assessment_table.py @@ -42,7 +42,7 @@ def upgrade() -> None: op.execute( """ - CREATE INDEX idx_epc_uprn ON iris.epc_assessment(uprn); + CREATE INDEX epc_uprn_idx ON iris.epc_assessment(uprn); """ ) @@ -51,7 +51,7 @@ def downgrade() -> None: """Downgrade schema.""" op.execute( """ - DROP INDEX IF EXISTS idx_epc_uprn; + DROP INDEX IF EXISTS epc_uprn_idx; """ ) diff --git a/api/alembic/versions/e72a7ae5be78_021_update_regions_view.py b/api/alembic/versions/e72a7ae5be78_021_update_regions_view.py new file mode 100644 index 0000000..01b212c --- /dev/null +++ b/api/alembic/versions/e72a7ae5be78_021_update_regions_view.py @@ -0,0 +1,159 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""021_update_regions_view + +Revision ID: e72a7ae5be78 +Revises: 29a78cfac78a +Create Date: 2025-10-23 18:45:49.632786 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = 'e72a7ae5be78' +down_revision: Union[str, None] = '29a78cfac78a' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + + """ Recreate materialised views for regions""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc_data; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc_data + AS + WITH regions_buffered AS( + SELECT + name, + ST_Transform( -- convert back to WGS84 + ST_Buffer( -- buffer in metres (because 27700 uses metres) + ST_Transform(geom_simplified, 27700), + 5000 -- buffer distance in metres + ), + 4326 -- back to WGS84 + ) AS geometry + FROM iris.english_region + UNION ALL + SELECT + name, + ST_Transform( + ST_Buffer( + ST_Transform(geometry, 27700), + 5000 + ), + 4326 + ) AS geometry + FROM iris.country_region + WHERE name = 'Wales') + SELECT + r.name AS name, + SUM(e.total) AS total, + SUM(e.epc_a) AS epc_a, + SUM(e.epc_b) AS epc_b, + SUM(e.epc_c) AS epc_c, + SUM(e.epc_d) AS epc_d, + SUM(e.epc_e) AS epc_e, + SUM(e.epc_f) AS epc_f, + SUM(e.epc_g) AS epc_g, + SUM(e.epc_null) AS epc_null, + r.geometry + FROM regions_buffered r + JOIN iris.district_borough_unitary_epc_data e + ON ST_Contains(r.geometry, ST_Simplify(e.geometry, 0.0001)) + GROUP BY r.name, r.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.uk_region_epc_data t + WITH NO DATA; + """ + ) + + +def downgrade() -> None: + """Downgrade schema.""" + + """ Recreate materialised views for regions""" + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc; + """ + ) + + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc_data; + """ + ) + + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc_data + AS + WITH regions AS ( + SELECT name, geom_simplified AS geometry + FROM iris.english_region + UNION ALL + SELECT name, ST_Simplify(geometry, 0.0001) AS geometry + FROM iris.country_region + WHERE name = 'Wales' + ) + SELECT + r.name AS name, + SUM(e.total) AS total, + SUM(e.epc_a) AS epc_a, + SUM(e.epc_b) AS epc_b, + SUM(e.epc_c) AS epc_c, + SUM(e.epc_d) AS epc_d, + SUM(e.epc_e) AS epc_e, + SUM(e.epc_f) AS epc_f, + SUM(e.epc_g) AS epc_g, + SUM(e.epc_null) AS epc_null, + r.geometry + FROM regions r + JOIN iris.district_borough_unitary_epc_data e + ON ST_Contains(r.geometry, ST_Simplify(e.geometry, 0.0001)) + GROUP BY r.name, r.geometry + WITH NO DATA; + """ + ) + + """ Create materialized view containing GeoJSON.""" + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc + TABLESPACE pg_default + AS + SELECT jsonb_build_object('type', 'FeatureCollection', 'features', jsonb_agg(jsonb_build_object('type', 'Feature', 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, 'properties', to_jsonb(t.*) - 'geometry'::text))) AS geojson + FROM iris.uk_region_epc_data t + WITH NO DATA; + """ + ) diff --git a/api/alembic/versions/f7639f884c24_019_update_building_epc_view.py b/api/alembic/versions/f7639f884c24_019_update_building_epc_view.py new file mode 100644 index 0000000..6cfde93 --- /dev/null +++ b/api/alembic/versions/f7639f884c24_019_update_building_epc_view.py @@ -0,0 +1,330 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +"""019_update_building_epc_view + +Revision ID: f7639f884c24 +Revises: b55e05000f66 +Create Date: 2025-10-16 12:35:56.419788 + +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "f7639f884c24" +down_revision: Union[str, None] = "b55e05000f66" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def _drop_epc_dependants() -> None: + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_ward_epc; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_ward_epc_data; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.uk_region_epc_data; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.district_borough_unitary_epc; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.district_borough_unitary_epc_data; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.boundary_line_ceremonial_counties_epc; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.boundary_line_ceremonial_counties_epc_data; + """ + ) + + +def _recreate_epc_dependants() -> None: + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_ward_epc_data + AS + SELECT + ward.name, + COUNT(building.point) AS total, + COUNT(CASE WHEN building.epc_rating = 'A' THEN 1 END) AS epc_a, + COUNT(CASE WHEN building.epc_rating = 'B' THEN 1 END) AS epc_b, + COUNT(CASE WHEN building.epc_rating = 'C' THEN 1 END) AS epc_c, + COUNT(CASE WHEN building.epc_rating = 'D' THEN 1 END) AS epc_d, + COUNT(CASE WHEN building.epc_rating = 'E' THEN 1 END) AS epc_e, + COUNT(CASE WHEN building.epc_rating = 'F' THEN 1 END) AS epc_f, + COUNT(CASE WHEN building.epc_rating = 'G' THEN 1 END) AS epc_g, + COUNT(CASE WHEN building.epc_rating IS NULL THEN 1 END) AS epc_null, + ward.geometry + FROM iris.building_epc AS building + LEFT JOIN iris.uk_ward AS ward + ON ST_Intersects(ward.geometry, building.point) + GROUP BY ward.name, ward.geometry + WITH NO DATA; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_ward_epc + AS + SELECT jsonb_build_object( + 'type', 'FeatureCollection', + 'features', + jsonb_agg( + jsonb_build_object( + 'type', 'Feature', + 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, + 'properties', to_jsonb(t.*) - 'geometry'::text + ) + ) + ) AS geojson + FROM iris.uk_ward_epc_data AS t + WITH NO DATA; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc_data + AS + SELECT + region.name, + COUNT(building.point) AS total, + COUNT(CASE WHEN building.epc_rating = 'A' THEN 1 END) AS epc_a, + COUNT(CASE WHEN building.epc_rating = 'B' THEN 1 END) AS epc_b, + COUNT(CASE WHEN building.epc_rating = 'C' THEN 1 END) AS epc_c, + COUNT(CASE WHEN building.epc_rating = 'D' THEN 1 END) AS epc_d, + COUNT(CASE WHEN building.epc_rating = 'E' THEN 1 END) AS epc_e, + COUNT(CASE WHEN building.epc_rating = 'F' THEN 1 END) AS epc_f, + COUNT(CASE WHEN building.epc_rating = 'G' THEN 1 END) AS epc_g, + COUNT(CASE WHEN building.epc_rating IS NULL THEN 1 END) AS epc_null, + region.geometry + FROM iris.building_epc AS building + LEFT JOIN iris.uk_region AS region + ON ST_Intersects(region.geometry, building.point) + GROUP BY region.name, region.geometry + WITH NO DATA; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.uk_region_epc + AS + SELECT jsonb_build_object( + 'type', 'FeatureCollection', + 'features', + jsonb_agg( + jsonb_build_object( + 'type', 'Feature', + 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, + 'properties', to_jsonb(t.*) - 'geometry'::text + ) + ) + ) AS geojson + FROM iris.uk_region_epc_data AS t + WITH NO DATA; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.district_borough_unitary_epc_data + AS + SELECT + district.name, + COUNT(building.point) AS total, + COUNT(CASE WHEN building.epc_rating = 'A' THEN 1 END) AS epc_a, + COUNT(CASE WHEN building.epc_rating = 'B' THEN 1 END) AS epc_b, + COUNT(CASE WHEN building.epc_rating = 'C' THEN 1 END) AS epc_c, + COUNT(CASE WHEN building.epc_rating = 'D' THEN 1 END) AS epc_d, + COUNT(CASE WHEN building.epc_rating = 'E' THEN 1 END) AS epc_e, + COUNT(CASE WHEN building.epc_rating = 'F' THEN 1 END) AS epc_f, + COUNT(CASE WHEN building.epc_rating = 'G' THEN 1 END) AS epc_g, + COUNT(CASE WHEN building.epc_rating IS NULL THEN 1 END) AS epc_null, + district.geometry + FROM iris.building_epc AS building + LEFT JOIN iris.district_borough_unitary AS district + ON ST_Intersects(district.geometry, building.point) + GROUP BY district.name, district.geometry + WITH NO DATA; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.district_borough_unitary_epc + AS + SELECT jsonb_build_object( + 'type', 'FeatureCollection', + 'features', + jsonb_agg( + jsonb_build_object( + 'type', 'Feature', + 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, + 'properties', to_jsonb(t.*) - 'geometry'::text + ) + ) + ) AS geojson + FROM iris.district_borough_unitary_epc_data AS t + WITH NO DATA; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.boundary_line_ceremonial_counties_epc_data + AS + SELECT + county.name, + COUNT(building.point) AS total, + COUNT(CASE WHEN building.epc_rating = 'A' THEN 1 END) AS epc_a, + COUNT(CASE WHEN building.epc_rating = 'B' THEN 1 END) AS epc_b, + COUNT(CASE WHEN building.epc_rating = 'C' THEN 1 END) AS epc_c, + COUNT(CASE WHEN building.epc_rating = 'D' THEN 1 END) AS epc_d, + COUNT(CASE WHEN building.epc_rating = 'E' THEN 1 END) AS epc_e, + COUNT(CASE WHEN building.epc_rating = 'F' THEN 1 END) AS epc_f, + COUNT(CASE WHEN building.epc_rating = 'G' THEN 1 END) AS epc_g, + COUNT(CASE WHEN building.epc_rating IS NULL THEN 1 END) AS epc_null, + county.geometry + FROM iris.building_epc AS building + LEFT JOIN iris.boundary_line_ceremonial_counties AS county + ON ST_Intersects(county.geometry, building.point) + GROUP BY county.name, county.geometry + WITH NO DATA; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW IF NOT EXISTS iris.boundary_line_ceremonial_counties_epc + AS + SELECT jsonb_build_object( + 'type', 'FeatureCollection', + 'features', + jsonb_agg( + jsonb_build_object( + 'type', 'Feature', + 'geometry', st_asgeojson(ST_SIMPLIFY(geometry, 0.0001))::json, + 'properties', to_jsonb(t.*) - 'geometry'::text + ) + ) + ) AS geojson + FROM iris.boundary_line_ceremonial_counties_epc_data AS t + WITH NO DATA; + """ + ) + + +def _set_application_name() -> None: + op.execute( + sa.text("SET LOCAL application_name = 'alembic_019_update_building_epc_view';") + ) + + +def upgrade() -> None: + """Upgrade schema.""" + + _set_application_name() + _drop_epc_dependants() + op.execute( + """ + DROP INDEX IF EXISTS building_epc_point_idx; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW iris.building_epc AS + SELECT + bld.uprn, + latest.epc_rating, + bld.point + FROM iris.building AS bld + LEFT JOIN LATERAL ( + SELECT + assess.id, + assess.uprn, + assess.epc_rating, + assess.lodgement_date + FROM iris.epc_assessment AS assess + WHERE assess.uprn = bld.uprn + ORDER BY + assess.lodgement_date DESC NULLS LAST, + assess.id DESC + LIMIT 1 + ) AS latest ON TRUE + WHERE bld.is_residential IS TRUE + WITH NO DATA; + """ + ) + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_point_idx + ON iris.building_epc + USING GIST (point); + """ + ) + _recreate_epc_dependants() + + +def downgrade() -> None: + """Downgrade schema.""" + + _set_application_name() + _drop_epc_dependants() + op.execute( + """ + DROP INDEX IF EXISTS building_epc_point_idx; + """ + ) + op.execute( + """ + DROP MATERIALIZED VIEW IF EXISTS iris.building_epc; + """ + ) + op.execute( + """ + CREATE MATERIALIZED VIEW iris.building_epc AS + SELECT + building.uprn, + assessment.epc_rating, + building.point + FROM iris.building AS building + LEFT JOIN iris.epc_assessment AS assessment + ON building.uprn = assessment.uprn + WHERE building.is_residential IS TRUE + WITH NO DATA; + """ + ) + op.execute( + """ + CREATE INDEX IF NOT EXISTS building_epc_point_idx + ON iris.building_epc + USING GIST (point); + """ + ) + _recreate_epc_dependants() diff --git a/api/config.py b/api/config.py index d564946..4db6a63 100644 --- a/api/config.py +++ b/api/config.py @@ -33,6 +33,7 @@ class Settings(BaseSettings): LANDING_PAGE_URL: str = "http://localhost:5173" BOOTSTRAP_SERVERS: str = "localhost:9092" IES_TOPIC: str = "knowledge" + DB_QUERY_TIMEOUT: int = 29 def get_db_connection_string(self): if self.ENVIRONMENT == "TEST": diff --git a/api/db.py b/api/db.py index 54ad8fa..b6388e7 100644 --- a/api/db.py +++ b/api/db.py @@ -3,28 +3,60 @@ # and is legally attributed to the Department for Business and Trade (UK) as the governing entity. -from typing import Annotated +import logging +from typing import Optional -from config import Settings, get_settings -from fastapi import Depends -from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine +from config import get_settings +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker +logger = logging.getLogger(__name__) -async def get_db(settings: Annotated[Settings, Depends(get_settings)]): - db_connection_string = settings.get_db_connection_string() +settings = get_settings() +db_connection_string = settings.get_db_connection_string() - if db_connection_string.startswith("postgres"): - engine = create_async_engine( - db_connection_string, connect_args={}, pool_pre_ping=True - ) - elif db_connection_string.startswith("sqlite"): - engine = create_async_engine( - db_connection_string, connect_args={"check_same_thread": False} - ) +if db_connection_string and db_connection_string.startswith("postgres"): + statement_timeout_ms = str(settings.DB_QUERY_TIMEOUT * 1000) + logger.info(f"Connecting to PostgreSQL database with query timeout: ({statement_timeout_ms}ms)") + engine: Optional[AsyncEngine] = create_async_engine( + db_connection_string, + connect_args={"server_settings": {"statement_timeout": statement_timeout_ms}}, + pool_pre_ping=True, + ) +elif db_connection_string and db_connection_string.startswith("sqlite"): + engine: Optional[AsyncEngine] = create_async_engine( + db_connection_string, connect_args={"check_same_thread": False} + ) +else: + engine = None + +if engine: async_session_maker = sessionmaker( engine, class_=AsyncSession, expire_on_commit=False ) +else: + async_session_maker = None + +async def get_db(): + if async_session_maker is None: + raise RuntimeError("Database not configured") async with async_session_maker() as session: yield session + + +async def execute_with_timeout( + session: AsyncSession, + query: text, + timeout_seconds: int, + params: Optional[dict] = None, +): + """Execute a query with a timeout different from the global query timeout. + The timeout is set to the given timeout_seconds for the duration of the query. + After the query is executed, the timeout is reset to the global query timeout. + """ + await session.execute(text(f"SET LOCAL statement_timeout = '{timeout_seconds * 1000}'")) + result = await session.execute(query, params) + await session.execute(text(f"SET LOCAL statement_timeout = '{settings.DB_QUERY_TIMEOUT * 1000}'")) + return result diff --git a/api/main.py b/api/main.py index 87b562f..9355633 100644 --- a/api/main.py +++ b/api/main.py @@ -3,10 +3,13 @@ # and is legally attributed to the Department for Business and Trade (UK) as the governing entity. +import asyncpg.exceptions +import sqlalchemy.exc import uvicorn from config import get_settings -from fastapi import FastAPI +from fastapi import FastAPI, Request, status from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse from routes import router config_settings = get_settings() @@ -34,6 +37,21 @@ ) +@app.exception_handler(sqlalchemy.exc.DBAPIError) +async def query_timeout_handler(request: Request, exc: sqlalchemy.exc.DBAPIError): + sqlstate = getattr(exc.orig, "sqlstate", None) + if sqlstate == asyncpg.exceptions.QueryCanceledError.sqlstate: + return JSONResponse( + status_code=status.HTTP_504_GATEWAY_TIMEOUT, + content={ + "detail": "The request took too long to complete.", + "error": "QueryCanceledError", + }, + ) + + raise exc + + app.include_router(router) if __name__ == "__main__": diff --git a/api/mappers.py b/api/mappers.py index c33358b..664c909 100644 --- a/api/mappers.py +++ b/api/mappers.py @@ -2,9 +2,12 @@ # © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme # and is legally attributed to the Department for Business and Trade (UK) as the governing entity. +import datetime import re from models.dto_models import ( + BuildingAttributePercentage, + BuildingAttributePercentagesPerRegion, DetailedBuilding, EpcAndOsBuildingSchema, EpcStatistics, @@ -148,7 +151,8 @@ def map_wall_window_results(building: DetailedBuilding, results: dict) -> None: building.wall_insulation = get_value_from_result(result, "wallInsulation") building.window_glazing = get_value_from_result(result, "windowGlazing") -def map_fueltype_results(building: DetailedBuilding, results:dict) -> None: + +def map_fueltype_results(building: DetailedBuilding, results: dict) -> None: """ Maps fuel type data to a `SingleBuilding` instance. @@ -161,9 +165,78 @@ def map_fueltype_results(building: DetailedBuilding, results:dict) -> None: """ if results and results["results"] and results["results"]["bindings"]: for result in results["results"]["bindings"]: - building.fueltype = get_value_from_result( - result, "fuelType" + building.fueltype = get_value_from_result(result, "fuelType") + + +def map_ngd_roof_material_results(building: DetailedBuilding, results: dict) -> None: + + if results and results.get("results") and results["results"].get("bindings"): # check if results come from Fuseki or from PostGIS + for result in results["results"]["bindings"]: + building.roof_material = get_value_from_result(result, "roofMaterial") + else: + if 'roof_material' in results.keys(): + building.roof_material = results['roof_material'].replace(' ', '') if results['roof_material'] else results['roof_material'] + +def map_ngd_solar_panel_presence_results( + building: DetailedBuilding, results: dict +) -> None: + if results and results.get("results") and results["results"].get("bindings"): + for result in results["results"]["bindings"]: + building.solar_panel_presence = get_value_from_result( + result, "solarPanelPresence" ) + else: + if 'solar_panel_presence' in results.keys(): + building.solar_panel_presence = 'HasSolarPanels' if results['solar_panel_presence']=='True' else 'NoSolarPanels' + +def map_ngd_roof_shape_results(building: DetailedBuilding, results: dict) -> None: + if results and results.get("results") and results["results"].get("bindings"): + for result in results["results"]["bindings"]: + building.roof_shape = get_value_from_result(result, "roofShape") + else: + sag_alignment = { + 'Pitched': 'PitchedRoofShape', + 'Flat' : 'FlatRoofShape', + 'Mixed' : 'MixedRoofShape', + 'Unknown': 'UnknownRoofShape', + None: None + } + if 'roof_shape' in results.keys(): + building.roof_shape = sag_alignment[results['roof_shape']] + + +def map_ngd_roof_aspect_area_facings_results( + building: DetailedBuilding, results: dict +) -> None: + direction_to_field = { + "NorthFacingRoofSectionSum": "roof_aspect_area_facing_north_m2", + "NorthEastFacingRoofSectionSum": "roof_aspect_area_facing_north_east_m2", + "EastFacingRoofSectionSum": "roof_aspect_area_facing_east_m2", + "SouthEastFacingRoofSectionSum": "roof_aspect_area_facing_south_east_m2", + "SouthFacingRoofSectionSum": "roof_aspect_area_facing_south_m2", + "SouthWestFacingRoofSectionSum": "roof_aspect_area_facing_south_west_m2", + "WestFacingRoofSectionSum": "roof_aspect_area_facing_west_m2", + "NorthWestFacingRoofSectionSum": "roof_aspect_area_facing_north_west_m2", + "AreaIndeterminableRoofSectionSum": "roof_aspect_area_indeterminable_m2", + } + + def assign(field: str | None, m2) -> None: + if field and m2: + setattr(building, field, float(m2)) + + if results and results.get("results") and results["results"].get("bindings"): + for result in results["results"]["bindings"]: + assign( + direction_to_field.get(get_value_from_result(result, "direction")), + get_value_from_result(result, "m2"), + ) + return + + if "roof_aspect_area_facing_north_m2" in results: + for field, m2 in results.items(): + assign(field, m2) + + def map_single_building_response( uprn: str, @@ -171,7 +244,11 @@ def map_single_building_response( roof_results: dict, floor_results: dict, wall_window_results: dict, - fueltype_results: dict + fueltype_results: dict, + ngd_roof_material_results: dict | None = None, + ngd_solar_panel_presence_results: dict | None = None, + ngd_roof_shape_results: dict | None = None, + ngd_roof_aspect_area_facings_results: dict | None = None, ) -> DetailedBuilding: """ Maps a `DetailedBuilding` response from SPARQL queries for generic, roof, floor, wall and window data. @@ -193,6 +270,13 @@ def map_single_building_response( map_floor_results(building, floor_results) map_wall_window_results(building, wall_window_results) map_fueltype_results(building, fueltype_results) + + # check if roof material exists + map_ngd_roof_material_results(building, ngd_roof_material_results) + map_ngd_solar_panel_presence_results(building, ngd_solar_panel_presence_results) + map_ngd_roof_shape_results(building, ngd_roof_shape_results) + map_ngd_roof_aspect_area_facings_results(building, ngd_roof_aspect_area_facings_results) + return building @@ -257,6 +341,36 @@ def map_bounded_filterable_buildings_response( building.roof_construction = result.roof_construction building.roof_insulation_location = result.roof_insulation building.roof_insulation_thickness = result.roof_insulation_thickness + building.roof_material = ( + result.roof_material.replace(" ", "") + if result.roof_material + else result.roof_material + ) + building.has_roof_solar_panels = result.has_roof_solar_panels + building.roof_aspect_area_facing_north = ( + result.roof_aspect_area_facing_north_m2 + ) + building.roof_aspect_area_facing_north_east = ( + result.roof_aspect_area_facing_north_east_m2 + ) + building.roof_aspect_area_facing_north_west = ( + result.roof_aspect_area_facing_north_west_m2 + ) + building.roof_aspect_area_facing_east = ( + result.roof_aspect_area_facing_east_m2 + ) + building.roof_aspect_area_facing_south = ( + result.roof_aspect_area_facing_south_m2 + ) + building.roof_aspect_area_facing_south_east = ( + result.roof_aspect_area_facing_south_east_m2 + ) + building.roof_aspect_area_facing_south_west = ( + result.roof_aspect_area_facing_south_west_m2 + ) + building.roof_aspect_area_facing_west = ( + result.roof_aspect_area_facing_west_m2 + ) buildings.append(building) return buildings @@ -335,6 +449,159 @@ def map_flagged_buildings_response(results: dict) -> list[FlaggedBuilding]: return flags +def _add_postcode_filter(filter_summary: FilterSummary, post_code: str): + post_code_matches = re.search(r"^[0-9A-Z]{3,4}", post_code) + if post_code_matches: + transformed_post_code = post_code_matches.group() + filter_summary.postcode.add(transformed_post_code) + + +def _add_built_form_filter(filter_summary: FilterSummary, built_form: str): + if built_form and len(built_form) > 0: + filter_summary.built_form.add(built_form) + + +def _add_lodgment_date_filter( + filter_summary: FilterSummary, lodgement_date: datetime.date +): + if lodgement_date: + inspection_year = str(lodgement_date.year) + filter_summary.inspection_year.add(inspection_year) + + +def _add_fuel_type_filter(filter_summary: FilterSummary, fuel_type: str): + if fuel_type and len(fuel_type) > 0: + filter_summary.fuel_type.add(fuel_type) + + +def _add_window_glazing_filter(filter_summary: FilterSummary, window_glazing: str): + if window_glazing and len(window_glazing) > 0: + filter_summary.window_glazing.add(window_glazing) + + +def _add_wall_construction_filter( + filter_summary: FilterSummary, wall_construction: str +): + if wall_construction and len(wall_construction) > 0: + filter_summary.wall_construction.add(wall_construction) + + +def _add_wall_insulation_filter(filter_summary: FilterSummary, wall_insulation: str): + if wall_insulation and len(wall_insulation) > 0: + filter_summary.wall_insulation.add(wall_insulation) + + +def _add_floor_construction_filter( + filter_summary: FilterSummary, floor_construction: str +): + if floor_construction and len(floor_construction) > 0: + filter_summary.floor_construction.add(floor_construction) + + +def _add_floor_insulation_filter(filter_summary: FilterSummary, floor_insulation: str): + if floor_insulation and len(floor_insulation) > 0: + filter_summary.floor_insulation.add(floor_insulation) + + +def _add_roof_construction_filter( + filter_summary: FilterSummary, roof_construction: str +): + if roof_construction and len(roof_construction) > 0: + filter_summary.roof_construction.add(roof_construction) + + +def _add_roof_insulation_location_filter( + filter_summary: FilterSummary, roof_insulation_location: str +): + if roof_insulation_location and len(roof_insulation_location) > 0: + filter_summary.roof_insulation_location.add(roof_insulation_location) + + +def _add_roof_insulation_thickness_filter( + filter_summary: FilterSummary, roof_insulation_thickness: str +): + if roof_insulation_thickness and len(roof_insulation_thickness) > 0: + filter_summary.roof_insulation_thickness.add(roof_insulation_thickness) + + +def _add_has_roof_solar_panels_filter( + filter_summary: FilterSummary, has_roof_solar_panels: bool +): + if ( + has_roof_solar_panels is not None + and len(filter_summary.has_roof_solar_panels) < 2 + ): + filter_summary.has_roof_solar_panels.add( + "HasSolarPanels" if has_roof_solar_panels else "NoSolarPanels" + ) + + +def _add_roof_material_filter(filter_summary: FilterSummary, roof_material: str): + if roof_material and len(roof_material) > 0: + filter_summary.roof_material.add(roof_material.replace(" ", "")) + + +def _add_roof_aspect_area_direction_filter( + filter_summary: FilterSummary, + roof_aspect_area_facing_north_m2: float, + roof_aspect_area_facing_north_east_m2: float, + roof_aspect_area_facing_east_m2: float, + roof_aspect_area_facing_south_east_m2: float, + roof_aspect_area_facing_south_m2: float, + roof_aspect_area_facing_south_west_m2: float, + roof_aspect_area_facing_west_m2: float, + roof_aspect_area_facing_north_west_m2: float, +): + if ( + roof_aspect_area_facing_north_m2 + and roof_aspect_area_facing_north_m2 > 0 + and "North" not in filter_summary.roof_aspect_area_direction + ): + filter_summary.roof_aspect_area_direction.add("North") + if ( + roof_aspect_area_facing_north_east_m2 + and roof_aspect_area_facing_north_east_m2 > 0 + and "NorthEast" not in filter_summary.roof_aspect_area_direction + ): + filter_summary.roof_aspect_area_direction.add("NorthEast") + if ( + roof_aspect_area_facing_north_west_m2 + and roof_aspect_area_facing_north_west_m2 > 0 + and "NorthWest" not in filter_summary.roof_aspect_area_direction + ): + filter_summary.roof_aspect_area_direction.add("NorthWest") + if ( + roof_aspect_area_facing_south_m2 + and roof_aspect_area_facing_south_m2 > 0 + and "South" not in filter_summary.roof_aspect_area_direction + ): + filter_summary.roof_aspect_area_direction.add("South") + if ( + roof_aspect_area_facing_south_east_m2 + and roof_aspect_area_facing_south_east_m2 > 0 + and "SouthEast" not in filter_summary.roof_aspect_area_direction + ): + filter_summary.roof_aspect_area_direction.add("SouthEast") + if ( + roof_aspect_area_facing_south_west_m2 + and roof_aspect_area_facing_south_west_m2 > 0 + and "SouthWest" not in filter_summary.roof_aspect_area_direction + ): + filter_summary.roof_aspect_area_direction.add("SouthWest") + if ( + roof_aspect_area_facing_east_m2 + and roof_aspect_area_facing_east_m2 > 0 + and "East" not in filter_summary.roof_aspect_area_direction + ): + filter_summary.roof_aspect_area_direction.add("East") + if ( + roof_aspect_area_facing_west_m2 + and roof_aspect_area_facing_west_m2 > 0 + and "West" not in filter_summary.roof_aspect_area_direction + ): + filter_summary.roof_aspect_area_direction.add("West") + + def map_filter_summary_response(results: [FilterableBuildingSchema]) -> FilterSummary: """ Maps a `list[FilterableBuildingSchema]` to a `FilterSummary` @@ -347,36 +614,64 @@ def map_filter_summary_response(results: [FilterableBuildingSchema]) -> FilterSu """ mapped_result: FilterSummary = FilterSummary() for result in results: - post_code_matches = re.search(r"^[0-9A-Z]{3,4}", result.post_code) - if post_code_matches: - transformed_post_code = post_code_matches.group() - mapped_result.postcode.add(transformed_post_code) - if result.built_form and len(result.built_form) > 0: - mapped_result.built_form.add(result.built_form) - if result.lodgement_date: - inspection_year = str(result.lodgement_date.year) - mapped_result.inspection_year.add(inspection_year) - if result.fuel_type and len(result.fuel_type) > 0: - mapped_result.fuel_type.add(result.fuel_type) - if result.window_glazing and len(result.window_glazing) > 0: - mapped_result.window_glazing.add(result.window_glazing) - if result.wall_construction and len(result.wall_construction) > 0: - mapped_result.wall_construction.add(result.wall_construction) - if result.wall_insulation and len(result.wall_insulation) > 0: - mapped_result.wall_insulation.add(result.wall_insulation) - if result.floor_construction and len(result.floor_construction) > 0: - mapped_result.floor_construction.add(result.floor_construction) - if result.floor_insulation and len(result.floor_insulation) > 0: - mapped_result.floor_insulation.add(result.floor_insulation) - if result.roof_construction and len(result.roof_construction) > 0: - mapped_result.roof_construction.add(result.roof_construction) - if result.roof_insulation and len(result.roof_insulation) > 0: - mapped_result.roof_insulation_location.add(result.roof_insulation) - if ( - result.roof_insulation_thickness - and len(result.roof_insulation_thickness) > 0 - ): - mapped_result.roof_insulation_thickness.add( - result.roof_insulation_thickness - ) + _add_postcode_filter(mapped_result, result.post_code) + _add_built_form_filter(mapped_result, result.built_form) + _add_lodgment_date_filter(mapped_result, result.lodgement_date) + _add_fuel_type_filter(mapped_result, result.fuel_type) + _add_window_glazing_filter(mapped_result, result.window_glazing) + _add_wall_construction_filter(mapped_result, result.wall_construction) + _add_wall_insulation_filter(mapped_result, result.wall_insulation) + _add_floor_construction_filter(mapped_result, result.floor_construction) + _add_floor_insulation_filter(mapped_result, result.floor_insulation) + _add_has_roof_solar_panels_filter(mapped_result, result.has_roof_solar_panels) + _add_roof_material_filter(mapped_result, result.roof_material) + _add_roof_aspect_area_direction_filter( + mapped_result, + result.roof_aspect_area_facing_north_m2, + result.roof_aspect_area_facing_north_east_m2, + result.roof_aspect_area_facing_east_m2, + result.roof_aspect_area_facing_south_east_m2, + result.roof_aspect_area_facing_south_m2, + result.roof_aspect_area_facing_south_west_m2, + result.roof_aspect_area_facing_west_m2, + result.roof_aspect_area_facing_north_west_m2, + ) + _add_roof_construction_filter(mapped_result, result.roof_construction) + _add_roof_insulation_location_filter(mapped_result, result.roof_insulation) + _add_roof_insulation_thickness_filter( + mapped_result, result.roof_insulation_thickness + ) return mapped_result + + +def map_percentage_building_attributes_per_region_response(results) -> list[BuildingAttributePercentagesPerRegion]: + attribute_mappings = [ + ("percentage_single_glazing", "Single glazing"), + ("percentage_double_glazing", "Double glazing"), + ("percentage_triple_glazing", "Triple glazing"), + ("percentage_no_insulation", "No insulation"), + ("percentage_insulation_1_100mm", "1-100mm insulation"), + ("percentage_insulation_101_200mm", "101-200mm insulation"), + ("percentage_insulation_201_300mm", "201-300mm insulation"), + ("percentage_insulation_over_300mm", ">300mm insulation"), + ("percentage_suspended_flooring", "Suspended flooring"), + ("percentage_pitched_roof", "Pitched roofs"), + ("percentage_cavity_wall", "Cavity walls"), + ("percentage_roof_solar_panels", "Solar panels"), + ] + + mapped_results = [] + for row in results: + attributes = [] + for column_name, label in attribute_mappings: + value = getattr(row, column_name, 0.0) + attributes.append(BuildingAttributePercentage(label=label, value=float(value))) + + mapped_results.append( + BuildingAttributePercentagesPerRegion( + region_name=row.region_name, + attributes=attributes + ) + ) + + return mapped_results diff --git a/api/models/dto_models.py b/api/models/dto_models.py index 3296f71..229fac1 100644 --- a/api/models/dto_models.py +++ b/api/models/dto_models.py @@ -10,6 +10,7 @@ from pydantic import BaseModel from .ies_models import IesThing +from .utils import get_nullable_float print(pydantic.VERSION) @@ -47,6 +48,57 @@ class DetailedBuilding(Building): wall_insulation: Optional[str] = None window_glazing: Optional[str] = None fueltype: Optional[str] = None + # OS NGD Buildings attributes + roof_material: Optional[str] = None + solar_panel_presence: Optional[str] = None + roof_shape: Optional[str] = None + # Roof aspect areas (square meters) by direction + roof_aspect_area_facing_north_m2: Optional[float] = None + roof_aspect_area_facing_north_east_m2: Optional[float] = None + roof_aspect_area_facing_east_m2: Optional[float] = None + roof_aspect_area_facing_south_east_m2: Optional[float] = None + roof_aspect_area_facing_south_m2: Optional[float] = None + roof_aspect_area_facing_south_west_m2: Optional[float] = None + roof_aspect_area_facing_west_m2: Optional[float] = None + roof_aspect_area_facing_north_west_m2: Optional[float] = None + roof_aspect_area_indeterminable_m2: Optional[float] = None + + +class DetailedBuildingSchema(DetailedBuilding): + @classmethod + def from_orm(cls, obj): + return cls( + solar_panel_presence=str(obj.has_roof_solar_panels), + roof_material=obj.roof_material, + roof_shape=obj.roof_shape, + roof_aspect_area_facing_north_m2=get_nullable_float( + obj.roof_aspect_area_facing_north_m2 + ), + roof_aspect_area_facing_north_east_m2=get_nullable_float( + obj.roof_aspect_area_facing_north_east_m2 + ), + roof_aspect_area_facing_east_m2=get_nullable_float( + obj.roof_aspect_area_facing_east_m2 + ), + roof_aspect_area_facing_south_east_m2=get_nullable_float( + obj.roof_aspect_area_facing_south_east_m2 + ), + roof_aspect_area_facing_south_m2=get_nullable_float( + obj.roof_aspect_area_facing_south_m2 + ), + roof_aspect_area_facing_south_west_m2=get_nullable_float( + obj.roof_aspect_area_facing_south_west_m2 + ), + roof_aspect_area_facing_west_m2=get_nullable_float( + obj.roof_aspect_area_facing_west_m2 + ), + roof_aspect_area_facing_north_west_m2=get_nullable_float( + obj.roof_aspect_area_facing_north_west_m2 + ), + roof_aspect_area_indeterminable_m2=get_nullable_float( + obj.roof_aspect_area_indeterminable_m2 + ), + ) class FilterableBuilding(BaseModel): @@ -63,6 +115,16 @@ class FilterableBuilding(BaseModel): wall_construction: Optional[str] = None wall_insulation: Optional[str] = None window_glazing: Optional[str] = None + has_roof_solar_panels: Optional[bool] = None + roof_material: Optional[str] = None + roof_aspect_area_facing_north: Optional[float] = None + roof_aspect_area_facing_north_east: Optional[float] = None + roof_aspect_area_facing_east: Optional[float] = None + roof_aspect_area_facing_south_east: Optional[float] = None + roof_aspect_area_facing_south: Optional[float] = None + roof_aspect_area_facing_south_west: Optional[float] = None + roof_aspect_area_facing_west: Optional[float] = None + roof_aspect_area_facing_north_west: Optional[float] = None class EpcStatistics(IesThing): @@ -148,6 +210,16 @@ class FilterableBuildingSchema(BaseModel): wall_insulation: Optional[str] floor_construction: Optional[str] floor_insulation: Optional[str] + has_roof_solar_panels: Optional[bool] + roof_material: Optional[str] + roof_aspect_area_facing_north_m2: Optional[float] + roof_aspect_area_facing_north_east_m2: Optional[float] + roof_aspect_area_facing_east_m2: Optional[float] + roof_aspect_area_facing_south_east_m2: Optional[float] + roof_aspect_area_facing_south_m2: Optional[float] + roof_aspect_area_facing_south_west_m2: Optional[float] + roof_aspect_area_facing_west_m2: Optional[float] + roof_aspect_area_facing_north_west_m2: Optional[float] roof_construction: Optional[str] roof_insulation: Optional[str] roof_insulation_thickness: Optional[str] @@ -168,8 +240,21 @@ def from_orm(cls, obj): roof_construction=obj.roof_construction, roof_insulation=obj.roof_insulation, roof_insulation_thickness=obj.roof_insulation_thickness, + has_roof_solar_panels=obj.has_roof_solar_panels, + roof_material=obj.roof_material, + roof_aspect_area_facing_north_m2=obj.roof_aspect_area_facing_north_m2, + roof_aspect_area_facing_north_east_m2=obj.roof_aspect_area_facing_north_east_m2, + roof_aspect_area_facing_east_m2=obj.roof_aspect_area_facing_east_m2, + roof_aspect_area_facing_south_east_m2=obj.roof_aspect_area_facing_south_east_m2, + roof_aspect_area_facing_south_m2=obj.roof_aspect_area_facing_south_m2, + roof_aspect_area_facing_south_west_m2=obj.roof_aspect_area_facing_south_west_m2, + roof_aspect_area_facing_west_m2=obj.roof_aspect_area_facing_west_m2, + roof_aspect_area_facing_north_west_m2=obj.roof_aspect_area_facing_north_west_m2, ) + class Config: + from_orm = True + class FilterSummary(BaseModel): postcode: set[str] = set() @@ -182,6 +267,191 @@ class FilterSummary(BaseModel): wall_insulation: set[str] = set() floor_construction: set[str] = set() floor_insulation: set[str] = set() + has_roof_solar_panels: set[bool] = set() + roof_material: set[str] = set() + roof_aspect_area_direction: set[str] = set() roof_construction: set[str] = set() roof_insulation_location: set[str] = set() roof_insulation_thickness: set[str] = set() + + +class CountOfEpcRatings(BaseModel): + epc_a: int + epc_b: int + epc_c: int + epc_d: int + epc_e: int + epc_f: int + epc_g: int + + @classmethod + def from_orm(cls, obj): + return cls( + epc_a=obj.epc_a, + epc_b=obj.epc_b, + epc_c=obj.epc_c, + epc_d=obj.epc_d, + epc_e=obj.epc_e, + epc_f=obj.epc_f, + epc_g=obj.epc_g, + ) + + +class CountOfEpcRatingsPerRegion(CountOfEpcRatings): + region_name: str + + @classmethod + def from_orm(cls, obj): + return cls( + region_name=obj.region_name, + epc_a=obj.epc_a, + epc_b=obj.epc_b, + epc_c=obj.epc_c, + epc_d=obj.epc_d, + epc_e=obj.epc_e, + epc_f=obj.epc_f, + epc_g=obj.epc_g, + ) + + +class EPCRatingsByCategory(BaseModel): + name: str + epc_a: int + epc_b: int + epc_c: int + epc_d: int + epc_e: int + epc_f: int + epc_g: int + total: int + + @classmethod + def from_orm(cls, obj): + name = getattr(obj, "name", None) or getattr(obj, "area_name", None) + return cls( + name=name, + epc_a=obj.epc_a, + epc_b=obj.epc_b, + epc_c=obj.epc_c, + epc_d=obj.epc_d, + epc_e=obj.epc_e, + epc_f=obj.epc_f, + epc_g=obj.epc_g, + total=sum( + [ + obj.epc_a, + obj.epc_b, + obj.epc_c, + obj.epc_d, + obj.epc_e, + obj.epc_f, + obj.epc_g, + ] + ), + ) + + +class FuelTypesByBuildingType(BaseModel): + building_type: str + fuel_type: str + count: int + + @classmethod + def from_orm(cls, obj): + return cls( + building_type=obj.building_type, + fuel_type=obj.fuel_type, + count=obj.count, + ) + + +class AverageSapRatingPerLodgementDate(BaseModel): + date: datetime.date + national_avg_sap_rating: float + filtered_avg_sap_rating: Optional[float] + + @classmethod + def from_orm(cls, obj): + return cls( + date=obj.date, + national_avg_sap_rating=obj.national_avg_sap_rating, + filtered_avg_sap_rating=obj.filtered_avg_sap_rating, + ) + + +class SapRatingTimelineDataPoint(BaseModel): + date: datetime.date + name: str + avg_sap_rating: float + + @classmethod + def from_orm(cls, obj): + return cls( + date=obj.date, + name=obj.name, + avg_sap_rating=obj.avg_sap_rating, + ) + + +class EpcRatingCountsOvertime(BaseModel): + date: datetime.date + epc_a: int + epc_b: int + epc_c: int + epc_d: int + epc_e: int + epc_f: int + epc_g: int + + @classmethod + def from_orm(cls, obj): + return cls( + date=obj.date, + epc_a=obj.epc_a, + epc_b=obj.epc_b, + epc_c=obj.epc_c, + epc_d=obj.epc_d, + epc_e=obj.epc_e, + epc_f=obj.epc_f, + epc_g=obj.epc_g, + ) + + +class BuildingsAffectedByExtremeWeather(BaseModel): + number_of_buildings: int + filtered_number_of_buildings: Optional[int] = None + affected_by_icing_days: Optional[bool] + affected_by_hsds: Optional[bool] + affected_by_wdr: Optional[bool] + + @classmethod + def from_orm(cls, obj, has_filter: bool = True): + return cls( + number_of_buildings=obj.number_of_buildings, + filtered_number_of_buildings=obj.filtered_number_of_buildings + if has_filter + else None, + affected_by_icing_days=obj.affected_by_icing_days, + affected_by_hsds=obj.affected_by_hsds, + affected_by_wdr=obj.affected_by_wdr, + ) + + +class NumberOfInDateAndExpiredEpcs(BaseModel): + year: datetime.date + expired: int + active: int + + @classmethod + def from_orm(cls, obj): + return cls(year=obj.year, expired=obj.expired, active=obj.active) + + +class BuildingAttributePercentage(BaseModel): + label: str + value: float + + +class BuildingAttributePercentagesPerRegion(BaseModel): + region_name: str + attributes: List[BuildingAttributePercentage] diff --git a/api/models/utils.py b/api/models/utils.py new file mode 100644 index 0000000..cd9b472 --- /dev/null +++ b/api/models/utils.py @@ -0,0 +1,2 @@ +def get_nullable_float(value): + float(value) if value != None else None diff --git a/api/query.py b/api/query.py index 46fb052..075c7e3 100644 --- a/api/query.py +++ b/api/query.py @@ -2,6 +2,11 @@ # © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme # and is legally attributed to the Department for Business and Trade (UK) as the governing entity. +from utils import expand_wales_region, WELSH_REGIONS + +EPC_ACTIVE_TRUE = "epc_active = true" +WELSH_REGIONS_SQL = ", ".join(f"'{region}'" for region in sorted(WELSH_REGIONS)) + def get_building(uprn: str) -> str: return f""" @@ -21,14 +26,18 @@ def get_building(uprn: str) -> str: ?epc_result ies:isParticipantIn ?epc_assessment . ?epc_assessment building:assessedStateForEnergyPerformance ?structureUnitState . - ?_bf a ?builtForm . - ?builtForm a building:BuiltForm . - ?_bf ies:isStateOf ?structureUnit . + OPTIONAL {{ + ?_bf a ?builtForm . + ?builtForm a building:BuiltForm . + ?_bf ies:isStateOf ?structureUnit . + }} - ?_sut a ?structureUnitType . - ?structureUnitType a building:StructureUnitType . - ?_sut ies:isStateOf ?structureUnit . - }} + OPTIONAL {{ + ?_sut a ?structureUnitType . + ?structureUnitType a building:StructureUnitType . + ?_sut ies:isStateOf ?structureUnit . + }} + }} ORDER BY DESC(?lodgementDate) LIMIT 1 """ @@ -41,27 +50,35 @@ def get_roof_for_building(uprn: str) -> str: PREFIX building: PREFIX data: - SELECT ?uprn ?roofConstruction ?roofInsulation ?roofInsulationThickness + SELECT ?uprn ?roofConstruction ?roofInsulation ?roofInsulationThickness WHERE {{ ?structureUnit ies:isIdentifiedBy data:UPRN_{uprn} . ?structureUnit a building:StructureUnit . ?structureUnitState a building:StructureUnitState . ?structureUnitState ies:isStateOf ?structureUnit . - ?_rc a ?roofConstruction . - ?roofConstruction a building:RoofConstruction . - ?_rc ies:isPartOf ?structureUnitState . + ?epc_result building:lodgementDate ?lodgementDate . + ?epc_result ies:isParticipantIn ?epc_assessment . + ?epc_assessment building:assessedStateForEnergyPerformance ?structureUnitState . + + OPTIONAL {{ + ?_rc a ?roofConstruction . + ?roofConstruction a building:RoofConstruction . + ?_rc ies:isPartOf ?structureUnitState . + }} - ?_ri a ?roofInsulation . - ?roofInsulation a building:RoofInsulationLocation . - ?_ri ies:isPartOf ?structureUnitState . + OPTIONAL {{ + ?_ri a ?roofInsulation . + ?roofInsulation a building:RoofInsulationLocation . + ?_ri ies:isPartOf ?structureUnitState . + }} OPTIONAL {{ ?_rit a ?roofInsulationThickness . ?roofInsulationThickness a building:RoofInsulationThickness . ?_rit ies:isPartOf ?structureUnitState . }} - }} + }} ORDER BY DESC(?lodgementDate) LIMIT 1 """ @@ -81,15 +98,23 @@ def get_floor_for_building(uprn: str) -> str: ?structureUnitState a building:StructureUnitState . ?structureUnitState ies:isStateOf ?structureUnit . - ?_fc a ?floorConstruction . - ?floorConstruction a building:FloorConstruction . - ?_fc ies:isPartOf ?structureUnitState . + ?epc_result building:lodgementDate ?lodgementDate . + ?epc_result ies:isParticipantIn ?epc_assessment . + ?epc_assessment building:assessedStateForEnergyPerformance ?structureUnitState . - ?_fi a ?floorInsulation . - ?floorInsulation a building:FloorInsulation . - ?_fi ies:isPartOf ?structureUnitState . + OPTIONAL {{ + ?_fc a ?floorConstruction . + ?floorConstruction a building:FloorConstruction . + ?_fc ies:isPartOf ?structureUnitState . + }} + OPTIONAL {{ + ?_fi a ?floorInsulation . + ?floorInsulation a building:FloorInsulation . + ?_fi ies:isPartOf ?structureUnitState . }} + + }} ORDER BY DESC(?lodgementDate) LIMIT 1 """ @@ -109,37 +134,222 @@ def get_walls_and_windows_for_building(uprn: str) -> str: ?structureUnitState a building:StructureUnitState . ?structureUnitState ies:isStateOf ?structureUnit . - ?_wc a ?wallConstruction . - ?wallConstruction a building:WallConstruction . - ?_wc ies:isPartOf ?structureUnitState . + ?epc_result building:lodgementDate ?lodgementDate . + ?epc_result ies:isParticipantIn ?epc_assessment . + ?epc_assessment building:assessedStateForEnergyPerformance ?structureUnitState . - ?_wi a ?wallInsulation . - ?wallInsulation a building:WallInsulation . - ?_wi ies:isPartOf ?structureUnitState . + OPTIONAL {{ + ?_wc a ?wallConstruction . + ?wallConstruction a building:WallConstruction . + ?_wc ies:isPartOf ?structureUnitState . + }} - ?_wg a ?windowGlazing . - ?windowGlazing a building:GlazingType . - ?_wg ies:isPartOf ?structureUnitState . + OPTIONAL {{ + ?_wi a ?wallInsulation . + ?wallInsulation a building:WallInsulation . + ?_wi ies:isPartOf ?structureUnitState . + }} - }} + OPTIONAL {{ + ?_wg a ?windowGlazing . + ?windowGlazing a building:GlazingType . + ?_wg ies:isPartOf ?structureUnitState . + }} + + }} ORDER BY DESC(?lodgementDate) LIMIT 1 """ + def get_fueltype_for_building(uprn: str) -> str: return f""" PREFIX ies: PREFIX building: PREFIX data: + PREFIX xsd: SELECT ?fuelType WHERE {{ - ?structureUnit ies:isIdentifiedBy data:UPRN_{uprn} . - ?structureUnitState ies:isStateOf ?structureUnit . + {{ + SELECT ?structureUnit ?structureUnitState ?lodgement + WHERE {{ + ?structureUnit ies:isIdentifiedBy data:UPRN_{uprn} ; + a building:StructureUnit . + ?structureUnitState a building:StructureUnitState ; + ies:isStateOf ?structureUnit . + + BIND(STR(?structureUnitState) AS ?s) # cast structure unit state URI to a string + BIND(REPLACE(?s, ".*_([0-9]{{8}})$", "$1") AS ?yyyymmdd) # get last 8 digits + + # check if we managed to pull the last 8 digits if the regex pattern matched + FILTER( ?yyyymmdd != ?s ) + + # split into year, month and day components + BIND(SUBSTR(?yyyymmdd,1,4) AS ?yyyy) + BIND(SUBSTR(?yyyymmdd,5,2) AS ?mm) + BIND(SUBSTR(?yyyymmdd,7,2) AS ?dd) + + # check whether month and day are within expected intervals + FILTER ( xsd:integer(?mm) >= 1 && xsd:integer(?mm) <= 12 ) + FILTER ( xsd:integer(?dd) >= 1 && xsd:integer(?dd) <= 31 ) + + # create the final date component + BIND( xsd:date(CONCAT(?yyyy, "-", ?mm, "-", ?dd)) AS ?lodgement ) + }} ORDER BY DESC(?lodgement) LIMIT 1 + }} + GRAPH {{ + ?structureUnitState building:isServicedBy ?heatingSystem . + ?heatingSystem building:isOperableWithFuel ?fuelType . + }} + }} + """ + + +def get_all_ngd_attributes_pg() -> str: + return """ + SELECT + su.has_roof_solar_panels, + su.roof_material, + su.roof_shape, + su.roof_aspect_area_facing_north_m2, + su.roof_aspect_area_facing_north_east_m2, + su.roof_aspect_area_facing_east_m2, + su.roof_aspect_area_facing_south_east_m2, + su.roof_aspect_area_facing_south_m2, + su.roof_aspect_area_facing_south_west_m2, + su.roof_aspect_area_facing_west_m2, + su.roof_aspect_area_facing_north_west_m2, + su.roof_aspect_area_indeterminable_m2 + FROM iris.structure_unit su + WHERE su.uprn = :uprn + UNION + SELECT + su.has_roof_solar_panels, + su.roof_material, + su.roof_shape, + su.roof_aspect_area_facing_north_m2, + su.roof_aspect_area_facing_north_east_m2, + su.roof_aspect_area_facing_east_m2, + su.roof_aspect_area_facing_south_east_m2, + su.roof_aspect_area_facing_south_m2, + su.roof_aspect_area_facing_south_west_m2, + su.roof_aspect_area_facing_west_m2, + su.roof_aspect_area_facing_north_west_m2, + su.roof_aspect_area_indeterminable_m2 + FROM ( + SELECT + * + FROM + iris.epc_assessment + WHERE uprn = :uprn + ORDER BY + lodgement_date desc nulls last, + id desc + LIMIT 1) ea + JOIN iris.structure_unit su ON su.epc_assessment_id = ea.id + LIMIT 1; + """ + + +def get_ngd_roof_material_for_building(uprn: str) -> str: + return f""" + PREFIX building: + PREFIX data: + PREFIX ies: + + SELECT ?roofMaterial + WHERE {{ + data:StructureUnit_{uprn} ies:isPartOf ?building . + ?roof ies:isPartOf ?building . + ?roofState a building:RoofState ; + ies:isStateOf ?roof ; + building:isMadeOf ?roofMaterial . + }} + LIMIT 1 + """ + + +def get_ngd_solar_panel_presence_for_building(uprn: str) -> str: + return f""" + PREFIX building: + PREFIX data: + PREFIX ies: + + SELECT ?solarPanelPresence + WHERE {{ + data:StructureUnit_{uprn} ies:isPartOf ?building . + ?state ies:isStateOf ?building ; + a ?solarPanelPresence . + VALUES ?solarPanelPresence {{ + building:NoSolarPanels + building:HasSolarPanels + building:UnknownSolarPanelPresence + }} + }} + LIMIT 1 + """ + + +def get_ngd_roof_shape_for_building(uprn: str) -> str: + return f""" + PREFIX building: + PREFIX data: + PREFIX ies: + + SELECT DISTINCT ?roofShape + WHERE {{ + data:StructureUnit_{uprn} ies:isPartOf ?building . + ?shapeState ies:isStateOf ?building ; + a building:RoofState ; + a ?roofShape . + VALUES ?roofShape {{ + building:PitchedRoofShape + building:FlatRoofShape + building:MixedRoofShape + building:UnknownRoofShape + }} + }} + LIMIT 1 + """ + + +def get_ngd_roof_aspect_areas_for_building(uprn: str) -> str: + return f""" + PREFIX building: + PREFIX data: + PREFIX ies: + PREFIX qudt: + PREFIX unit: + PREFIX quantitykind: + + SELECT ?direction ?m2 + WHERE {{ + data:StructureUnit_{uprn} ies:isPartOf ?building . + ?roof ies:isPartOf ?building . + ?roofState a building:RoofState ; ies:isStateOf ?roof . + + ?aspect a ?directionClass ; + ies:isPartOf ?roofState ; + building:hasCombinedSurfaceArea [ + building:hasQuantity [ + qudt:hasQuantityKind quantitykind:Area ; + qudt:unit unit:M2 ; + qudt:value ?m2 + ] + ] . - GRAPH {{ - ?structureUnitState building:isServicedBy ?heatingSystem . - ?heatingSystem building:isOperableWithFuel ?fuelType . + VALUES ?directionClass {{ + building:NorthFacingRoofSectionSum + building:NorthEastFacingRoofSectionSum + building:EastFacingRoofSectionSum + building:SouthEastFacingRoofSectionSum + building:SouthFacingRoofSectionSum + building:SouthWestFacingRoofSectionSum + building:WestFacingRoofSectionSum + building:NorthWestFacingRoofSectionSum + building:AreaIndeterminableRoofSectionSum }} + BIND(STRAFTER(STR(?directionClass), "#") AS ?direction) }} """ @@ -149,15 +359,38 @@ def get_buildings_in_bounding_box_query() -> str: WITH filtered_buildings AS ( SELECT uprn, first_line_of_address, toid, point FROM iris.building - WHERE point && ST_MakeEnvelope(:min_long, :min_lat, :max_long, :max_lat, :srid) + WHERE is_residential = true AND + point && ST_MakeEnvelope(:min_long, :min_lat, :max_long, :max_lat, :srid) AND ST_Intersects(point, ST_MakeEnvelope(:min_long, :min_lat, :max_long, :max_lat, :srid)) ) - SELECT fb.uprn, fb.first_line_of_address, - fb.toid, fb.point, ea.epc_rating, - su.type AS structure_unit_type - FROM filtered_buildings fb - LEFT JOIN iris.epc_assessment ea ON fb.uprn = ea.uprn - LEFT JOIN iris.structure_unit su ON ea.id = su.epc_assessment_id; + SELECT + fb.uprn, + fb.first_line_of_address, + fb.toid, + fb.point, + ea.epc_rating, + su.type as structure_unit_type + FROM + filtered_buildings fb + LEFT JOIN LATERAL ( + SELECT + id, + uprn, + epc_rating, + lodgement_date + FROM + iris.epc_assessment + WHERE + uprn = fb.uprn + ORDER BY + lodgement_date desc nulls last, + id desc + LIMIT 1 + ) ea ON + TRUE + LEFT JOIN iris.structure_unit su + ON + su.epc_assessment_id = ea.id; """ @@ -166,42 +399,71 @@ def get_filterable_buildings_in_bounding_box_query() -> str: WITH filtered_buildings AS ( SELECT uprn, toid, post_code, point FROM iris.building - WHERE point && ST_MakeEnvelope(:min_long, :min_lat, :max_long, :max_lat, :srid) + WHERE is_residential = true + AND point && ST_MakeEnvelope(:min_long, :min_lat, :max_long, :max_lat, :srid) AND ST_Intersects(point, ST_MakeEnvelope(:min_long, :min_lat, :max_long, :max_lat, :srid)) ) - SELECT fb.uprn, fb.toid, fb.post_code, + SELECT + fb.uprn, fb.toid, fb.post_code, su.built_form, su.fuel_type, ea.lodgement_date, su.window_glazing, su.wall_construction, su.wall_insulation, su.floor_construction, su.floor_insulation, + su.has_roof_solar_panels, su.roof_material, + su.roof_aspect_area_facing_north_m2, + su.roof_aspect_area_facing_north_east_m2, + su.roof_aspect_area_facing_north_west_m2, + su.roof_aspect_area_facing_east_m2, + su.roof_aspect_area_facing_south_m2, + su.roof_aspect_area_facing_south_east_m2, + su.roof_aspect_area_facing_south_west_m2, + su.roof_aspect_area_facing_west_m2, su.roof_construction, su.roof_insulation, su.roof_insulation_thickness - FROM filtered_buildings fb - LEFT JOIN iris.epc_assessment ea ON fb.uprn = ea.uprn - LEFT JOIN iris.structure_unit su ON ea.id = su.epc_assessment_id; + FROM + filtered_buildings fb + LEFT JOIN LATERAL ( + SELECT + id, + uprn, + epc_rating, + lodgement_date + FROM + iris.epc_assessment + WHERE + uprn = fb.uprn + ORDER BY + lodgement_date desc nulls last, + id desc + LIMIT 1 + ) ea ON + TRUE + LEFT JOIN iris.structure_unit su + ON + su.epc_assessment_id = ea.id; """ def get_statistics_for_wards() -> str: return """ - PREFIX stats: - - SELECT ?wardName ?EPC_Rating_A ?EPC_Rating_B ?EPC_Rating_C ?EPC_Rating_D ?EPC_Rating_E ?EPC_Rating_F ?EPC_Rating_G ?No_EPC_Rating - WHERE { - GRAPH { - ?stats a stats:EPCWardStats ; - stats:wardName ?wardName ; - stats:EPC_Rating_A ?EPC_Rating_A ; - stats:EPC_Rating_B ?EPC_Rating_B ; - stats:EPC_Rating_C ?EPC_Rating_C ; - stats:EPC_Rating_D ?EPC_Rating_D ; - stats:EPC_Rating_E ?EPC_Rating_E ; - stats:EPC_Rating_F ?EPC_Rating_F ; - stats:EPC_Rating_G ?EPC_Rating_G ; - stats:No_EPC_Rating ?No_EPC_Rating . - } - } - ORDER BY ?wardName + PREFIX stats: + + SELECT ?wardName ?EPC_Rating_A ?EPC_Rating_B ?EPC_Rating_C ?EPC_Rating_D ?EPC_Rating_E ?EPC_Rating_F ?EPC_Rating_G ?No_EPC_Rating + WHERE { + GRAPH { + ?stats a stats:EPCWardStats ; + stats:wardName ?wardName ; + stats:EPC_Rating_A ?EPC_Rating_A ; + stats:EPC_Rating_B ?EPC_Rating_B ; + stats:EPC_Rating_C ?EPC_Rating_C ; + stats:EPC_Rating_D ?EPC_Rating_D ; + stats:EPC_Rating_E ?EPC_Rating_E ; + stats:EPC_Rating_F ?EPC_Rating_F ; + stats:EPC_Rating_G ?EPC_Rating_G ; + stats:No_EPC_Rating ?No_EPC_Rating . + } + } + ORDER BY ?wardName """ @@ -262,3 +524,653 @@ def get_flag_history(uprn: str) -> str: }} }} """ + + +def area_level_to_column(area_level: str) -> str: + try: + return { + "region": "region_name", + "county": "county_name", + "district": "district_name", + "ward": "ward_name", + }[area_level] + except KeyError: + raise ValueError( + f"Invalid area_level '{area_level}'. Must be one of region, county, district, ward." + ) + + +def _wales_grouped_column(column: str) -> str: + """Returns a CASE expression that groups Welsh regions into 'Wales'.""" + return f"""CASE + WHEN {column} IN ({WELSH_REGIONS_SQL}) + THEN 'Wales' + ELSE {column} + END""" + + +def _get_epc_rating_query_with_polygon(per_region: bool, polygon: str): + """Build EPC rating query for polygon filter using building_epc_analytics.""" + params = {"polygon": polygon} + where_conditions = [ + EPC_ACTIVE_TRUE, + "ST_Within(point, ST_GeomFromGeoJSON(:polygon))", + ] + if per_region: + where_conditions.append("region_name IS NOT NULL AND region_name != ''") + + region_select = ( + _wales_grouped_column("region_name") + " AS region_name," if per_region else "" + ) + group_by = "GROUP BY " + _wales_grouped_column("region_name") if per_region else "" + + query = f""" + SELECT {region_select} + COUNT(*) FILTER (WHERE epc_rating = 'A') AS epc_a, + COUNT(*) FILTER (WHERE epc_rating = 'B') AS epc_b, + COUNT(*) FILTER (WHERE epc_rating = 'C') AS epc_c, + COUNT(*) FILTER (WHERE epc_rating = 'D') AS epc_d, + COUNT(*) FILTER (WHERE epc_rating = 'E') AS epc_e, + COUNT(*) FILTER (WHERE epc_rating = 'F') AS epc_f, + COUNT(*) FILTER (WHERE epc_rating = 'G') AS epc_g + FROM iris.building_epc_analytics + WHERE {" AND ".join(where_conditions)} + {group_by}; + """ + return query, params + + +def _get_epc_rating_query_from_aggregates( + per_region: bool, area_level: str, area_names: list +): + """Build EPC rating query from pre-aggregated data.""" + params = {} + where_conditions = [ + "snapshot_date = (SELECT MAX(snapshot_date) FROM iris.building_epc_analytics_aggregates)" + ] + + if area_level and area_names: + area_names = expand_wales_region(area_names) + where_conditions.append( + f"{area_level_to_column(area_level)} = ANY(:area_names)" + ) + params["area_names"] = area_names + + if per_region: + where_conditions.append("region_name IS NOT NULL AND region_name != ''") + + region_select = ( + _wales_grouped_column("region_name") + " AS region_name," if per_region else "" + ) + group_by = "GROUP BY " + _wales_grouped_column("region_name") if per_region else "" + + query = f""" + SELECT {region_select} + SUM(count_rating_a) AS epc_a, + SUM(count_rating_b) AS epc_b, + SUM(count_rating_c) AS epc_c, + SUM(count_rating_d) AS epc_d, + SUM(count_rating_e) AS epc_e, + SUM(count_rating_f) AS epc_f, + SUM(count_rating_g) AS epc_g + FROM iris.building_epc_analytics_aggregates + WHERE {" AND ".join(where_conditions)} + {group_by}; + """ + return query, params + + +def get_count_of_epc_rating_query( + per_region: bool = False, + polygon: str = None, + area_level: str = None, + area_names: list = None, +): + if polygon: + return _get_epc_rating_query_with_polygon(per_region, polygon) + return _get_epc_rating_query_from_aggregates(per_region, area_level, area_names) + + +def get_percentage_of_buildings_attributes_per_region_query( + polygon: str = None, area_level: str = None, area_names: list = None +): + where_conditions = [] + params = {} + + def percentage_column(filter, alias): + return f""" + ROUND( + 100.0 * AVG(CASE WHEN {filter} THEN 1 ELSE 0 END)::numeric, + 2 + ) AS {alias} + """ + + where_conditions.append(EPC_ACTIVE_TRUE) + + if polygon: + where_conditions.append("ST_Within(point, ST_GeomFromGeoJSON(:polygon))") + params["polygon"] = polygon + elif area_level and area_names: + area_names = expand_wales_region(area_names) + where_conditions.append( + f"{area_level_to_column(area_level)} = ANY(:area_names)" + ) + params["area_names"] = area_names + + where_clause = "WHERE " + " AND ".join(where_conditions) + + query = f""" + SELECT {_wales_grouped_column("region_name")} AS region_name, + {percentage_column("window_glazing = 'SingleGlazing'", "percentage_single_glazing")}, + {percentage_column("window_glazing IN ('DoubleGlazing', 'DoubleGlazingBefore2002', 'DoubleGlazingAfter2002')", "percentage_double_glazing")}, + {percentage_column("window_glazing = 'TripleGlazing'", "percentage_triple_glazing")}, + {percentage_column("roof_insulation_thickness = '0mm'", "percentage_no_insulation")}, + {percentage_column("roof_insulation_thickness IN ('12mm', '25mm', '50mm', '75mm', '100mm')", "percentage_insulation_1_100mm")}, + {percentage_column("roof_insulation_thickness IN ('125mm', '150mm', '150+mm', '175mm', '200mm')", "percentage_insulation_101_200mm")}, + {percentage_column("roof_insulation_thickness IN ('225mm', '250mm', '270mm', '300mm')", "percentage_insulation_201_300mm")}, + {percentage_column("roof_insulation_thickness IN ('350mm', '400mm', '400+mm')", "percentage_insulation_over_300mm")}, + {percentage_column("floor_construction = 'Suspended'", "percentage_suspended_flooring")}, + {percentage_column("roof_construction = 'PitchedRoof'", "percentage_pitched_roof")}, + {percentage_column("wall_construction = 'CavityWall'", "percentage_cavity_wall")}, + {percentage_column("has_roof_solar_panels", "percentage_roof_solar_panels")} + FROM iris.building_epc_analytics + {where_clause} + GROUP BY {_wales_grouped_column("region_name")}; + """ + + return query, params + + +def get_fuel_types_by_building_type_query( + polygon: str = None, area_level: str = None, area_names: list = None +): + where_conditions = [] + params = {} + + where_conditions.append(EPC_ACTIVE_TRUE) + + if polygon: + where_conditions.append("ST_Within(point, ST_GeomFromGeoJSON(:polygon))") + params["polygon"] = polygon + elif area_level and area_names: + area_names = expand_wales_region(area_names) + where_conditions.append( + f"{area_level_to_column(area_level)} = ANY(:area_names)" + ) + params["area_names"] = area_names + + where_conditions.append("type IS NOT NULL") + where_conditions.append("fuel_type IS NOT NULL") + + where_clause = "WHERE " + " AND ".join(where_conditions) + + query = f""" + SELECT type AS building_type, + fuel_type, + COUNT(*) as count + FROM iris.building_epc_analytics + {where_clause} + GROUP BY type, fuel_type + ORDER BY type, count DESC; + """ + + return query, params + + +def get_national_avg_sap_rating_overtime_query(): + """Get national average SAP rating over time using pre-calculated aggregates.""" + query = """ + SELECT + snapshot_date as date, + SUM(sum_sap_rating) / NULLIF(SUM(active_epc_count), 0) as avg_sap_rating + FROM iris.building_epc_analytics_aggregates + GROUP BY snapshot_date + ORDER BY snapshot_date ASC; + """ + return query + + +def get_filtered_avg_sap_rating_overtime_query( + polygon: str = None, area_level: str = None, area_names: list = None +): + """Get filtered average SAP rating over time for a specific polygon area or named areas.""" + + if not polygon and not (area_level and area_names): + raise ValueError( + "either polygon or area_level/area_names filter must be provided" + ) + + if polygon: + query = """ + SELECT + unnest(active_snapshots) as date, + AVG(sap_rating) as avg_sap_rating + FROM iris.building_epc_analytics + WHERE active_snapshots IS NOT NULL + AND ST_Within(point, ST_GeomFromGeoJSON(:polygon)) + GROUP BY date + ORDER BY date ASC; + """ + return query, {"polygon": polygon} + + area_names = expand_wales_region(area_names) + query = f""" + SELECT + snapshot_date as date, + SUM(sum_sap_rating) / NULLIF(SUM(active_epc_count), 0) as avg_sap_rating + FROM iris.building_epc_analytics_aggregates + WHERE {area_level_to_column(area_level)} = ANY(:area_names) + GROUP BY snapshot_date + ORDER BY snapshot_date ASC; + """ + return query, {"area_names": area_names} + + +def get_buildings_affected_by_extreme_weather_data_query( + polygon: str = None, area_level: str = None, area_names: list = None +): + """Get buildings affected by extreme weather, optionally filtered by area.""" + params = {} + + if polygon: + params["polygon"] = polygon + filter_condition = "ST_Within(point, ST_GeomFromGeoJSON(:polygon))" + elif area_level and area_names: + area_names = expand_wales_region(area_names) + params["area_names"] = area_names + filter_condition = f"{area_level_to_column(area_level)} = ANY(:area_names)" + else: + filter_condition = "FALSE" + + query = f""" + SELECT + COUNT(*) AS number_of_buildings, + COUNT(*) FILTER (WHERE {filter_condition}) AS filtered_number_of_buildings, + affected_by_icing_days, + affected_by_hsds, + affected_by_wdr + FROM iris.building_extreme_weather_analytics + GROUP BY affected_by_icing_days, affected_by_hsds, affected_by_wdr + """ + + return query, params + + +def get_number_of_in_date_and_expired_epcs_query( + polygon: str = None, area_level: str = None, area_names: list = None +): + """Get in-date and expired EPC counts over time, optionally filtered by area.""" + params = {} + + # For polygon filters, calculate dynamically from building_epc_analytics (spatial query required) + if polygon: + params["polygon"] = polygon + + query = """ + WITH snapshot_dates AS ( + SELECT generate_series( + DATE_TRUNC('year', (SELECT MIN(lodgement_date) FROM iris.building_epc_analytics WHERE lodgement_date IS NOT NULL))::date + interval '1 year' - interval '1 day', + DATE_TRUNC('year', CURRENT_DATE)::date + interval '1 year' - interval '1 day', + interval '1 year' + )::date as snapshot_date + ), + filtered_buildings AS ( + SELECT uprn, lodgement_date, active_snapshots + FROM iris.building_epc_analytics + WHERE active_snapshots IS NOT NULL + AND ST_Within(point, ST_GeomFromGeoJSON(:polygon)) + ), + issued_counts AS ( + SELECT + sd.snapshot_date, + COUNT(DISTINCT fb.uprn) as total_issued_count + FROM snapshot_dates sd + CROSS JOIN filtered_buildings fb + WHERE fb.lodgement_date <= sd.snapshot_date + GROUP BY sd.snapshot_date + ), + active_counts AS ( + SELECT + unnest(active_snapshots) as snapshot_date, + COUNT(*) as active_epc_count + FROM filtered_buildings + GROUP BY unnest(active_snapshots) + ) + SELECT + ic.snapshot_date AS year, + COALESCE(ac.active_epc_count, 0) AS active, + (ic.total_issued_count - COALESCE(ac.active_epc_count, 0)) AS expired + FROM issued_counts ic + LEFT JOIN active_counts ac ON ic.snapshot_date = ac.snapshot_date + ORDER BY ic.snapshot_date; + """ + return query, params + + where_conditions = [] + if area_level and area_names: + area_names = expand_wales_region(area_names) + where_conditions.append( + f"{area_level_to_column(area_level)} = ANY(:area_names)" + ) + params["area_names"] = area_names + + where_clause = "WHERE " + " AND ".join(where_conditions) if where_conditions else "" + + query = f""" + SELECT + snapshot_date AS year, + SUM(active_epc_count) AS active, + SUM(expired_epc_count) AS expired + FROM iris.building_epc_analytics_aggregates + {where_clause} + GROUP BY snapshot_date + ORDER BY snapshot_date; + """ + + return query, params + + +def get_region_names_query() -> str: + return f""" + SELECT DISTINCT {_wales_grouped_column("region_name")} AS region_name + FROM iris.building_epc_analytics + WHERE region_name IS NOT NULL + ORDER BY region_name + """ + + +def get_county_names_query() -> str: + return """ + SELECT DISTINCT county_name + FROM iris.building_epc_analytics + WHERE county_name IS NOT NULL + ORDER BY county_name + """ + + +def get_district_names_query() -> str: + return """ + SELECT DISTINCT district_name + FROM iris.building_epc_analytics + WHERE district_name IS NOT NULL + ORDER BY district_name + """ + + +def get_ward_names_query() -> str: + return """ + SELECT DISTINCT ward_name + FROM iris.building_epc_analytics + WHERE ward_name IS NOT NULL + ORDER BY ward_name + """ + + +def get_count_of_epc_rating_by_area_level_query( + group_by_level: str, + filter_area_level: str = None, + filter_area_names: list = None, +): + params = {} + where_conditions = [] + + group_column = area_level_to_column(group_by_level) + + if filter_area_level and filter_area_names: + filter_area_names = expand_wales_region(filter_area_names) + filter_column = area_level_to_column(filter_area_level) + where_conditions.append(f"{filter_column} = ANY(:filter_area_names)") + params["filter_area_names"] = filter_area_names + + where_conditions.append(f"{group_column} IS NOT NULL AND {group_column} != ''") + + where_conditions.append( + "snapshot_date = (SELECT MAX(snapshot_date) FROM iris.building_epc_analytics_aggregates)" + ) + + where_clause = "WHERE " + " AND ".join(where_conditions) + + if group_by_level == "region": + area_select = _wales_grouped_column(group_column) + " AS area_name" + group_by = "GROUP BY " + _wales_grouped_column(group_column) + else: + area_select = f"{group_column} AS area_name" + group_by = f"GROUP BY {group_column}" + + query = f""" + SELECT + {area_select}, + SUM(count_rating_a) AS epc_a, + SUM(count_rating_b) AS epc_b, + SUM(count_rating_c) AS epc_c, + SUM(count_rating_d) AS epc_d, + SUM(count_rating_e) AS epc_e, + SUM(count_rating_f) AS epc_f, + SUM(count_rating_g) AS epc_g + FROM iris.building_epc_analytics_aggregates + {where_clause} + {group_by}; + """ + + return query, params + + +def _get_feature_query_config(feature: str) -> dict: + """Returns select clause and where condition for a given feature.""" + configs = { + "glazing_types": { + "select": """CASE + WHEN window_glazing IN ('DoubleGlazing', 'DoubleGlazingAfter2002', 'DoubleGlazingBefore2002') + THEN 'DoubleGlazing' + ELSE window_glazing + END""", + "where": "window_glazing IS NOT NULL AND window_glazing != ''", + }, + "fuel_types": { + "select": "fuel_type", + "where": "fuel_type IS NOT NULL AND fuel_type != ''", + }, + "wall_construction": { + "select": "wall_construction", + "where": "wall_construction IS NOT NULL AND wall_construction != ''", + }, + "wall_insulation": { + "select": "wall_insulation", + "where": "wall_insulation IS NOT NULL AND wall_insulation != ''", + }, + "floor_construction": { + "select": "floor_construction", + "where": "floor_construction IS NOT NULL AND floor_construction != ''", + }, + "floor_insulation": { + "select": "floor_insulation", + "where": "floor_insulation IS NOT NULL AND floor_insulation != ''", + }, + "roof_construction": { + "select": "roof_construction", + "where": "roof_construction IS NOT NULL AND roof_construction != ''", + }, + "roof_material": { + "select": "roof_material", + "where": "roof_material IS NOT NULL AND roof_material != ''", + }, + "roof_insulation": { + "select": "roof_insulation", + "where": "roof_insulation IS NOT NULL AND roof_insulation != ''", + }, + "roof_insulation_thickness": { + "select": "roof_insulation_thickness", + "where": "roof_insulation_thickness IS NOT NULL AND roof_insulation_thickness != ''", + }, + "solar_panels": { + "select": "CASE WHEN has_roof_solar_panels THEN 'Yes' ELSE 'No' END", + "where": "has_roof_solar_panels IS NOT NULL", + }, + "roof_aspect": { + "select": "direction", + "where": "direction IS NOT NULL", + "from_clause": """iris.building_epc_analytics + CROSS JOIN LATERAL ( + VALUES + (CASE WHEN roof_aspect_area_facing_north_m2 > 0 THEN 'North' END), + (CASE WHEN roof_aspect_area_facing_north_east_m2 > 0 THEN 'NorthEast' END), + (CASE WHEN roof_aspect_area_facing_east_m2 > 0 THEN 'East' END), + (CASE WHEN roof_aspect_area_facing_south_east_m2 > 0 THEN 'SouthEast' END), + (CASE WHEN roof_aspect_area_facing_south_m2 > 0 THEN 'South' END), + (CASE WHEN roof_aspect_area_facing_south_west_m2 > 0 THEN 'SouthWest' END), + (CASE WHEN roof_aspect_area_facing_west_m2 > 0 THEN 'West' END), + (CASE WHEN roof_aspect_area_facing_north_west_m2 > 0 THEN 'NorthWest' END) + ) AS directions(direction)""", + }, + } + if feature not in configs: + raise ValueError(f"Invalid feature: {feature}") + return configs[feature] + + +def get_count_of_epc_rating_by_features_query( + feature: str, polygon: str = None, area_level: str = None, area_names: list = None +): + config = _get_feature_query_config(feature) + params = {} + where_conditions = [EPC_ACTIVE_TRUE, "epc_rating IS NOT NULL", config["where"]] + + if polygon: + where_conditions.append("ST_Within(point, ST_GeomFromGeoJSON(:polygon))") + params["polygon"] = polygon + elif area_level and area_names: + area_names = expand_wales_region(area_names) + filter_column = area_level_to_column(area_level) + where_conditions.append(f"{filter_column} = ANY(:area_names)") + params["area_names"] = area_names + + where_clause = "WHERE " + " AND ".join(where_conditions) + from_clause = config.get("from_clause", "iris.building_epc_analytics") + + query = f""" + SELECT + {config["select"]} as name, + COUNT(*) FILTER (WHERE epc_rating = 'A') as epc_a, + COUNT(*) FILTER (WHERE epc_rating = 'B') as epc_b, + COUNT(*) FILTER (WHERE epc_rating = 'C') as epc_c, + COUNT(*) FILTER (WHERE epc_rating = 'D') as epc_d, + COUNT(*) FILTER (WHERE epc_rating = 'E') as epc_e, + COUNT(*) FILTER (WHERE epc_rating = 'F') as epc_f, + COUNT(*) FILTER (WHERE epc_rating = 'G') as epc_g + FROM {from_clause} + {where_clause} + GROUP BY name; + """ + + return query, params + + +def get_epc_ratings_overtime_query( + polygon: str = None, area_level: str = None, area_names: list = None +): + params = {} + + if polygon: + query = """ + SELECT + unnest(active_snapshots) as date, + COUNT(*) FILTER (WHERE epc_rating = 'A') AS epc_a, + COUNT(*) FILTER (WHERE epc_rating = 'B') AS epc_b, + COUNT(*) FILTER (WHERE epc_rating = 'C') AS epc_c, + COUNT(*) FILTER (WHERE epc_rating = 'D') AS epc_d, + COUNT(*) FILTER (WHERE epc_rating = 'E') AS epc_e, + COUNT(*) FILTER (WHERE epc_rating = 'F') AS epc_f, + COUNT(*) FILTER (WHERE epc_rating = 'G') AS epc_g + FROM iris.building_epc_analytics + WHERE active_snapshots IS NOT NULL + AND ST_Within(point, ST_GeomFromGeoJSON(:polygon)) + GROUP BY date + ORDER BY date ASC; + """ + params["polygon"] = polygon + else: + where_clause = "" + if area_level and area_names: + area_names = expand_wales_region(area_names) + where_clause = ( + f"WHERE {area_level_to_column(area_level)} = ANY(:area_names)" + ) + params["area_names"] = area_names + + query = f""" + SELECT + snapshot_date as date, + SUM(count_rating_a) as epc_a, + SUM(count_rating_b) as epc_b, + SUM(count_rating_c) as epc_c, + SUM(count_rating_d) as epc_d, + SUM(count_rating_e) as epc_e, + SUM(count_rating_f) as epc_f, + SUM(count_rating_g) as epc_g + FROM iris.building_epc_analytics_aggregates + {where_clause} + GROUP BY snapshot_date + ORDER BY snapshot_date ASC; + """ + + return query, params + + +def get_sap_rating_overtime_by_property_type_query(polygon: str): + """Get average SAP rating over time grouped by property type for a polygon area.""" + query = """ + SELECT + unnest(active_snapshots) as date, + type as name, + AVG(sap_rating) as avg_sap_rating + FROM iris.building_epc_analytics + WHERE active_snapshots IS NOT NULL + AND type IS NOT NULL + AND type != '' + AND ST_Within(point, ST_GeomFromGeoJSON(:polygon)) + GROUP BY date, type + ORDER BY date ASC, type ASC; + """ + return query, {"polygon": polygon} + + +def get_sap_rating_overtime_by_area_query( + group_by_level: str, + filter_area_level: str = None, + filter_area_names: list = None, +): + """Get average SAP rating over time for each area at the specified grouping level.""" + params = {} + where_conditions = [] + + group_column = area_level_to_column(group_by_level) + + if filter_area_level and filter_area_names: + filter_area_names = expand_wales_region(filter_area_names) + filter_column = area_level_to_column(filter_area_level) + where_conditions.append(f"{filter_column} = ANY(:filter_area_names)") + params["filter_area_names"] = filter_area_names + + where_conditions.append(f"{group_column} IS NOT NULL AND {group_column} != ''") + + where_clause = "WHERE " + " AND ".join(where_conditions) + + if group_by_level == "region": + area_select = _wales_grouped_column(group_column) + " AS name" + group_by = f"snapshot_date, {_wales_grouped_column(group_column)}" + else: + area_select = f"{group_column} AS name" + group_by = f"snapshot_date, {group_column}" + + query = f""" + SELECT + snapshot_date as date, + {area_select}, + SUM(sum_sap_rating) / NULLIF(SUM(active_epc_count), 0) as avg_sap_rating + FROM iris.building_epc_analytics_aggregates + {where_clause} + GROUP BY {group_by} + ORDER BY date ASC, name ASC; + """ + + return query, params diff --git a/api/routes.py b/api/routes.py index a26bb31..49e34a0 100644 --- a/api/routes.py +++ b/api/routes.py @@ -5,40 +5,113 @@ import configparser import uuid from datetime import datetime -from typing import List +from typing import Annotated, List, Optional import requests from access import AccessClient from config import get_settings -from db import get_db -from fastapi import APIRouter, Depends, HTTPException, Request, Response -from mappers import (map_bounded_buildings_response, - map_bounded_filterable_buildings_response, - map_epc_statistics_response, map_filter_summary_response, - map_flagged_buildings_response, - map_single_building_response, - map_structure_unit_flag_history_response) -from models.dto_models import (DetailedBuilding, EpcAndOsBuildingSchema, - EpcStatistics, FilterableBuilding, - FilterableBuildingSchema, FilterSummary, - FlaggedBuilding, FlagHistory, SimpleBuilding) -from models.ies_models import (EDH, ClassificationEmum, IesAccount, - IesAssessment, IesAssessToBeFalse, - IesAssessToBeTrue, IesClass, IesEntity, - IesPerson, IesState, IesThing, ies) -from pydantic import BaseModel -from query import (get_building, get_buildings_in_bounding_box_query, - get_filterable_buildings_in_bounding_box_query, - get_flag_history, get_flagged_buildings, - get_floor_for_building, get_roof_for_building, - get_statistics_for_wards, - get_walls_and_windows_for_building, - get_fueltype_for_building) +from db import execute_with_timeout, get_db +from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response +from mappers import ( + map_bounded_buildings_response, + map_bounded_filterable_buildings_response, + map_percentage_building_attributes_per_region_response, + map_epc_statistics_response, + map_filter_summary_response, + map_flagged_buildings_response, + map_single_building_response, + map_structure_unit_flag_history_response, +) +from models.dto_models import ( + AverageSapRatingPerLodgementDate, + BuildingAttributePercentagesPerRegion, + BuildingsAffectedByExtremeWeather, + CountOfEpcRatings, + CountOfEpcRatingsPerRegion, + DetailedBuilding, + DetailedBuildingSchema, + EPCRatingsByCategory, + EpcAndOsBuildingSchema, + EpcRatingCountsOvertime, + EpcStatistics, + FilterableBuilding, + FilterableBuildingSchema, + FilterSummary, + FlaggedBuilding, + FlagHistory, + FuelTypesByBuildingType, + NumberOfInDateAndExpiredEpcs, + SapRatingTimelineDataPoint, + SimpleBuilding, +) +from models.ies_models import ( + EDH, + ClassificationEmum, + IesAccount, + IesAssessment, + IesAssessToBeFalse, + IesAssessToBeTrue, + IesClass, + IesEntity, + IesPerson, + IesState, + IesThing, + ies, +) +from pydantic import AfterValidator, BaseModel +from query import ( + get_all_ngd_attributes_pg, + get_building, + get_buildings_affected_by_extreme_weather_data_query, + get_buildings_in_bounding_box_query, + get_count_of_epc_rating_by_area_level_query, + get_count_of_epc_rating_by_features_query, + get_count_of_epc_rating_query, + get_county_names_query, + get_district_names_query, + get_epc_ratings_overtime_query, + get_filterable_buildings_in_bounding_box_query, + get_filtered_avg_sap_rating_overtime_query, + get_flag_history, + get_flagged_buildings, + get_floor_for_building, + get_fuel_types_by_building_type_query, + get_fueltype_for_building, + get_national_avg_sap_rating_overtime_query, + get_ngd_roof_aspect_areas_for_building, + get_ngd_roof_material_for_building, + get_ngd_roof_shape_for_building, + get_ngd_solar_panel_presence_for_building, + get_number_of_in_date_and_expired_epcs_query, + get_percentage_of_buildings_attributes_per_region_query, + get_region_names_query, + get_roof_for_building, + get_sap_rating_overtime_by_area_query, + get_sap_rating_overtime_by_property_type_query, + get_statistics_for_wards, + get_walls_and_windows_for_building, + get_ward_names_query, +) from rdflib import Graph from requests import codes, exceptions +from services.climate_service import ( + fetch_geojson_for_hot_summer_days, + fetch_geojson_for_icing_days, + fetch_geojson_for_wind_driven_rain, +) +from services.energy_performance_service import ( + fetch_geojson_for_energy_performance_by_counties, + fetch_geojson_for_energy_performance_by_districts, + fetch_geojson_for_energy_performance_by_regions, + fetch_geojson_for_energy_performance_by_wards, +) from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession from utils import get_headers as get_forwarding_headers +from utils import has_bindings, validate_geojson_polygon + +AREA_LEVEL_PATTERN = "^(region|county|district|ward)$" +FEATURE_PATTERN = "^(glazing_types|fuel_types|wall_construction|wall_insulation|floor_construction|floor_insulation|roof_construction|roof_material|roof_insulation|roof_insulation_thickness|solar_panels|roof_aspect)$" router = APIRouter() @@ -50,7 +123,9 @@ ACCESS_API_CALL_ERROR = "Error calling Access, Internal Server Error" IDENTITY_API_CALL_ERROR = "Error calling Identity API, Internal Server Error" ISO_8601_URL = "http://iso.org/iso8601#" +APPLICATION_JSON = "application/json" +GeoJSONPolygon = Annotated[str, AfterValidator(validate_geojson_polygon)] if config_settings.UPDATE_MODE == "KAFKA": from ia_map_lib import Adapter, Record, RecordUtils @@ -351,6 +426,243 @@ def post_person(per: IesPerson): return per.uri +@router.get("/dashboard/epc-ratings", response_model=List[CountOfEpcRatings]) +async def get_epc_ratings_for_dashboard( + db: AsyncSession = Depends(get_db), + polygon: Optional[GeoJSONPolygon] = Query(None), + area_level: Optional[str] = Query(None, pattern=AREA_LEVEL_PATTERN), + area_names: Optional[List[str]] = Query(None), +): + query, params = get_count_of_epc_rating_query( + polygon=polygon, area_level=area_level, area_names=area_names + ) + results = await db.execute(text(query), params) + mapped_results = [CountOfEpcRatings.from_orm(row) for row in results] + + return mapped_results + + +@router.get( + "/dashboard/epc-ratings-per-region", response_model=List[CountOfEpcRatingsPerRegion] +) +async def get_epc_ratings_per_region_for_dashboard( + db: AsyncSession = Depends(get_db), + polygon: Optional[GeoJSONPolygon] = Query(None), + area_level: Optional[str] = Query(None, pattern=AREA_LEVEL_PATTERN), + area_names: Optional[List[str]] = Query(None), +): + query, params = get_count_of_epc_rating_query( + per_region=True, polygon=polygon, area_level=area_level, area_names=area_names + ) + results = await db.execute(text(query), params) + mapped_results = [CountOfEpcRatingsPerRegion.from_orm(row) for row in results] + + return mapped_results + + +@router.get( + "/dashboard/epc-ratings-by-area-level", response_model=List[EPCRatingsByCategory] +) +async def get_epc_ratings_by_area_level_for_dashboard( + db: AsyncSession = Depends(get_db), + group_by_level: str = Query(..., pattern=AREA_LEVEL_PATTERN), + filter_area_level: Optional[str] = Query(None, pattern=AREA_LEVEL_PATTERN), + filter_area_names: Optional[List[str]] = Query(None), +): + query, params = get_count_of_epc_rating_by_area_level_query( + group_by_level=group_by_level, + filter_area_level=filter_area_level, + filter_area_names=filter_area_names, + ) + results = await db.execute(text(query), params) + + return [EPCRatingsByCategory.from_orm(row) for row in results] + + +@router.get( + "/dashboard/epc-ratings-by-feature", response_model=List[EPCRatingsByCategory] +) +async def get_epc_ratings_by_feature_for_dashboard( + db: AsyncSession = Depends(get_db), + feature: str = Query(..., pattern=FEATURE_PATTERN), + polygon: Optional[GeoJSONPolygon] = Query(None), + area_level: Optional[str] = Query(None, pattern=AREA_LEVEL_PATTERN), + area_names: Optional[List[str]] = Query(None), +): + query, params = get_count_of_epc_rating_by_features_query( + feature=feature, polygon=polygon, area_level=area_level, area_names=area_names + ) + results = await db.execute(text(query), params) + + return [EPCRatingsByCategory.from_orm(row) for row in results] + + +@router.get( + "/dashboard/building-attributes-percentage-per-region", + response_model=List[BuildingAttributePercentagesPerRegion], +) +async def get_percentage_building_attributes_per_region( + db: AsyncSession = Depends(get_db), + polygon: Optional[GeoJSONPolygon] = Query(None), + area_level: Optional[str] = Query(None, pattern=AREA_LEVEL_PATTERN), + area_names: Optional[List[str]] = Query(None), +): + query, params = get_percentage_of_buildings_attributes_per_region_query( + polygon=polygon, area_level=area_level, area_names=area_names + ) + results = await db.execute(text(query), params) + + return map_percentage_building_attributes_per_region_response(results) + + +@router.get( + "/dashboard/sap-rating-overtime", + response_model=List[AverageSapRatingPerLodgementDate], +) +async def get_sap_rating_overtime( + db: AsyncSession = Depends(get_db), + polygon: Optional[GeoJSONPolygon] = Query(None), + area_level: Optional[str] = Query(None, pattern=AREA_LEVEL_PATTERN), + area_names: Optional[List[str]] = Query(None), +): + national_query = get_national_avg_sap_rating_overtime_query() + national_results = await db.execute(text(national_query)) + + results_by_date = { + row.date: AverageSapRatingPerLodgementDate( + date=row.date, + national_avg_sap_rating=row.avg_sap_rating, + filtered_avg_sap_rating=None, + ) + for row in national_results + } + + if polygon or (area_level and area_names): + filtered_query, params = get_filtered_avg_sap_rating_overtime_query( + polygon=polygon, area_level=area_level, area_names=area_names + ) + filtered_results = await db.execute(text(filtered_query), params) + + for row in filtered_results: + if row.date in results_by_date: + results_by_date[row.date].filtered_avg_sap_rating = row.avg_sap_rating + + return list(results_by_date.values()) + + +@router.get( + "/dashboard/sap-rating-overtime-by-property-type", + response_model=List[SapRatingTimelineDataPoint], +) +async def get_sap_rating_overtime_by_property_type( + db: AsyncSession = Depends(get_db), + polygon: GeoJSONPolygon = Query(...), +): + query, params = get_sap_rating_overtime_by_property_type_query(polygon=polygon) + results = await db.execute(text(query), params) + return [SapRatingTimelineDataPoint.from_orm(row) for row in results] + + +@router.get( + "/dashboard/sap-rating-overtime-by-area", + response_model=List[SapRatingTimelineDataPoint], +) +async def get_sap_rating_overtime_by_area( + db: AsyncSession = Depends(get_db), + group_by_level: str = Query(..., pattern=AREA_LEVEL_PATTERN), + filter_area_level: Optional[str] = Query(None, pattern=AREA_LEVEL_PATTERN), + filter_area_names: Optional[List[str]] = Query(None), +): + query, params = get_sap_rating_overtime_by_area_query( + group_by_level=group_by_level, + filter_area_level=filter_area_level, + filter_area_names=filter_area_names, + ) + results = await db.execute(text(query), params) + return [SapRatingTimelineDataPoint.from_orm(row) for row in results] + + +@router.get( + "/dashboard/epc-ratings-overtime", + response_model=List[EpcRatingCountsOvertime], +) +async def get_epc_ratings_overtime( + db: AsyncSession = Depends(get_db), + polygon: Optional[GeoJSONPolygon] = Query(None), + area_level: Optional[str] = Query(None), + area_names: Optional[List[str]] = Query(None), +): + query, params = get_epc_ratings_overtime_query( + polygon=polygon, area_level=area_level, area_names=area_names + ) + results = await db.execute(text(query), params) + mapped_results = [EpcRatingCountsOvertime.from_orm(row) for row in results] + + return mapped_results + + +@router.get( + "/dashboard/fuel-types-by-building-type", + response_model=List[FuelTypesByBuildingType], +) +async def get_fuel_types_by_building_type( + db: AsyncSession = Depends(get_db), + polygon: Optional[GeoJSONPolygon] = Query(None), + area_level: Optional[str] = Query(None, pattern=AREA_LEVEL_PATTERN), + area_names: Optional[List[str]] = Query(None), +): + query, params = get_fuel_types_by_building_type_query( + polygon=polygon, area_level=area_level, area_names=area_names + ) + results = await db.execute(text(query), params) + mapped_results = [FuelTypesByBuildingType.from_orm(row) for row in results] + + return mapped_results + + +@router.get( + "/dashboard/buildings-affected-by-extreme-weather", + response_model=List[BuildingsAffectedByExtremeWeather], +) +async def get_buildings_affected_by_extreme_weather( + db: AsyncSession = Depends(get_db), + polygon: Optional[GeoJSONPolygon] = Query(None), + area_level: Optional[str] = Query(None, pattern=AREA_LEVEL_PATTERN), + area_names: Optional[List[str]] = Query(None), +): + has_filter = bool(polygon or (area_level and area_names)) + query, params = get_buildings_affected_by_extreme_weather_data_query( + polygon=polygon, area_level=area_level, area_names=area_names + ) + + results = await db.execute(text(query), params) + mapped_results = [ + BuildingsAffectedByExtremeWeather.from_orm(row, has_filter=has_filter) + for row in results + ] + + return mapped_results + + +@router.get( + "/dashboard/no-of-in-date-and-expired-epcs", + response_model=List[NumberOfInDateAndExpiredEpcs], +) +async def get_number_of_in_date_and_expired_epcs( + db: AsyncSession = Depends(get_db), + polygon: Optional[GeoJSONPolygon] = Query(None), + area_level: Optional[str] = Query(None), + area_names: Optional[List[str]] = Query(None), +): + query, params = get_number_of_in_date_and_expired_epcs_query( + polygon=polygon, area_level=area_level, area_names=area_names + ) + results = await db.execute(text(query), params) + mapped_results = [NumberOfInDateAndExpiredEpcs.from_orm(row) for row in results] + + return mapped_results + + @router.get( "/buildings", response_model=List[SimpleBuilding], @@ -366,7 +678,13 @@ async def get_buildings_in_bounding_box( ): buildings_in_bounding_box_results = await db.execute( text(get_buildings_in_bounding_box_query()), - {"min_long": min_long, "max_long": max_long, "min_lat": min_lat, "max_lat": max_lat, "srid": 4326} + { + "min_long": min_long, + "max_long": max_long, + "min_lat": min_lat, + "max_lat": max_lat, + "srid": 4326, + }, ) results = [ EpcAndOsBuildingSchema.from_orm(result) @@ -388,10 +706,19 @@ async def get_filter_summary( req: Request, db: AsyncSession = Depends(get_db), ): - detailed_buildings_in_bounding_box_results = await db.execute( + detailed_buildings_in_bounding_box_results = await execute_with_timeout( + db, text(get_filterable_buildings_in_bounding_box_query()), - {"min_long": min_long, "max_long": max_long, "min_lat": min_lat, "max_lat": max_lat, "srid": 4326}, + timeout_seconds=60, + params={ + "min_long": min_long, + "max_long": max_long, + "min_lat": min_lat, + "max_lat": max_lat, + "srid": 4326, + }, ) + results = [ FilterableBuildingSchema.from_orm(result) for result in detailed_buildings_in_bounding_box_results @@ -414,7 +741,13 @@ async def get_filterable_buildings_in_bounding_box( ): filterable_buildings_in_bounding_box_results = await db.execute( text(get_filterable_buildings_in_bounding_box_query()), - {"min_long": min_long, "max_long": max_long, "min_lat": min_lat, "max_lat": max_lat, "srid": 4326}, + { + "min_long": min_long, + "max_long": max_long, + "min_lat": min_lat, + "max_lat": max_lat, + "srid": 4326, + }, ) results = [ FilterableBuildingSchema.from_orm(result) @@ -489,7 +822,9 @@ def invalidate_flag(request: Request, invalid: InvalidateFlag): response_model=DetailedBuilding, description="returns the building that corresponds to the provided UPRN", ) -def get_building_by_uprn(uprn: str, req: Request): +async def get_building_by_uprn( + uprn: str, req: Request, db: AsyncSession = Depends(get_db) +): building_results = run_sparql_query( get_building(uprn), get_forwarding_headers(req.headers) ) @@ -505,8 +840,77 @@ def get_building_by_uprn(uprn: str, req: Request): fueltype_results = run_sparql_query( get_fueltype_for_building(uprn), get_forwarding_headers(req.headers) ) + ngd_roof_material_results = run_sparql_query( + get_ngd_roof_material_for_building(uprn), get_forwarding_headers(req.headers) + ) + ngd_solar_panel_presence_results = run_sparql_query( + get_ngd_solar_panel_presence_for_building(uprn), + get_forwarding_headers(req.headers), + ) + ngd_roof_shape_results = run_sparql_query( + get_ngd_roof_shape_for_building(uprn), get_forwarding_headers(req.headers) + ) + ngd_roof_aspect_areas_results = run_sparql_query( + get_ngd_roof_aspect_areas_for_building(uprn), + get_forwarding_headers(req.headers), + ) + + # OS NGD Buildings PG fallback + fallback_required = any( + not has_bindings(r) + for r in ( + ngd_roof_material_results, + ngd_solar_panel_presence_results, + ngd_roof_shape_results, + ngd_roof_aspect_areas_results, + ) + ) + if fallback_required: + data = await db.execute(text(get_all_ngd_attributes_pg()), {"uprn": uprn}) + rows = [DetailedBuildingSchema.from_orm(row) for row in data] + + if len(rows) == 1: + pg = rows[0] + + def or_pg(current, builder): + return current if has_bindings(current) else builder() + + ngd_roof_material_results = or_pg( + ngd_roof_material_results, lambda: {"roof_material": pg.roof_material} + ) + ngd_solar_panel_presence_results = or_pg( + ngd_solar_panel_presence_results, + lambda: {"solar_panel_presence": pg.solar_panel_presence}, + ) + ngd_roof_shape_results = or_pg( + ngd_roof_shape_results, lambda: {"roof_shape": pg.roof_shape} + ) + ngd_roof_aspect_areas_results = or_pg( + ngd_roof_aspect_areas_results, + lambda: { + "roof_aspect_area_facing_north_m2": pg.roof_aspect_area_facing_north_m2, + "roof_aspect_area_facing_north_east_m2": pg.roof_aspect_area_facing_north_east_m2, + "roof_aspect_area_facing_east_m2": pg.roof_aspect_area_facing_east_m2, + "roof_aspect_area_facing_south_east_m2": pg.roof_aspect_area_facing_east_m2, + "roof_aspect_area_facing_south_m2": pg.roof_aspect_area_facing_south_m2, + "roof_aspect_area_facing_south_west_m2": pg.roof_aspect_area_facing_south_west_m2, + "roof_aspect_area_facing_west_m2": pg.roof_aspect_area_facing_west_m2, + "roof_aspect_area_facing_north_west_m2": pg.roof_aspect_area_facing_north_west_m2, + "roof_aspect_area_indeterminable_m2": pg.roof_aspect_area_indeterminable_m2, + }, + ) + return map_single_building_response( - uprn, building_results, roof_results, floor_results, wall_window_results, fueltype_results + uprn, + building_results, + roof_results, + floor_results, + wall_window_results, + fueltype_results, + ngd_roof_material_results, + ngd_solar_panel_presence_results, + ngd_roof_shape_results, + ngd_roof_aspect_areas_results, ) @@ -572,6 +976,83 @@ def post_flag_investigate(request: Request, visited: IesEntity): return flag_state +@router.get("/data/climate/wind-driven-rain") +async def get_wind_driven_rain_data( + geojson=Depends(fetch_geojson_for_wind_driven_rain), +): + return Response(content=geojson, media_type=APPLICATION_JSON) + + +@router.get("/data/climate/icing-days") +async def get_icing_days_data(geojson=Depends(fetch_geojson_for_icing_days)): + return Response(content=geojson, media_type=APPLICATION_JSON) + + +@router.get("/data/climate/hot-summer-days") +async def get_hot_summer_days_data(geojson=Depends(fetch_geojson_for_hot_summer_days)): + return Response(content=geojson, media_type=APPLICATION_JSON) + + +@router.get("/data/energy-performance/wards") +async def get_energy_performance_data_by_wards( + geojson=Depends(fetch_geojson_for_energy_performance_by_wards), +): + return Response(content=geojson, media_type=APPLICATION_JSON) + + +@router.get("/data/energy-performance/districts") +async def get_energy_performance_data_by_districts( + geojson=Depends(fetch_geojson_for_energy_performance_by_districts), +): + return Response(content=geojson, media_type=APPLICATION_JSON) + + +@router.get("/data/energy-performance/counties") +async def get_energy_performance_data_by_counties( + geojson=Depends(fetch_geojson_for_energy_performance_by_counties), +): + return Response(content=geojson, media_type=APPLICATION_JSON) + + +@router.get("/data/energy-performance/regions") +async def get_energy_performance_data_by_regions( + geojson=Depends(fetch_geojson_for_energy_performance_by_regions), +): + return Response(content=geojson, media_type=APPLICATION_JSON) + + +@router.get("/areas/regions", response_model=List[str]) +async def get_regions(db: AsyncSession = Depends(get_db)): + """Get list of all distinct region names.""" + query = get_region_names_query() + result = await db.execute(text(query)) + return [row[0] for row in result] + + +@router.get("/areas/counties", response_model=List[str]) +async def get_counties(db: AsyncSession = Depends(get_db)): + """Get list of all distinct county names.""" + query = get_county_names_query() + result = await db.execute(text(query)) + return [row[0] for row in result] + + +@router.get("/areas/districts", response_model=List[str]) +async def get_districts(db: AsyncSession = Depends(get_db)): + """Get list of all distinct district names.""" + query = get_district_names_query() + result = await db.execute(text(query)) + return [row[0] for row in result] + + +@router.get("/areas/wards", response_model=List[str]) +async def get_wards(db: AsyncSession = Depends(get_db)): + """Get list of all distinct ward names.""" + query = get_ward_names_query() + result = await db.execute(text(query)) + return [row[0] for row in result] + + # @app.post("/buildings/states",description="Add a new state to a building") def post_building_state(bs: IesState): if bs.stateType not in building_state_classes: diff --git a/api/services/__init__.py b/api/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/api/services/climate_service.py b/api/services/climate_service.py new file mode 100644 index 0000000..6b632d9 --- /dev/null +++ b/api/services/climate_service.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + + +from sqlalchemy import text +from db import get_db +from fastapi import Depends +from sqlalchemy.ext.asyncio import AsyncSession + +async def fetch_geojson_for_wind_driven_rain( + db: AsyncSession = Depends(get_db),): + """ Query the database to fetch wind-driven rain data in GeoJSON format. + + Keyword arguments: + db -- an AsyncSession for sql alchemy + """ + result = await db.execute(text("SELECT geojson::text AS geojson FROM iris.wind_driven_rain_projections_geojson;")) + row = result.fetchone() + return row[0] + +async def fetch_geojson_for_icing_days( + db: AsyncSession = Depends(get_db),): + """ Query the database to fetch icing days data in GeoJSON format. + + Keyword arguments: + db -- an AsyncSession for sql alchemy + """ + result = await db.execute(text("SELECT geojson::text AS geojson FROM iris.icing_days_geojson;")) + row = result.fetchone() + return row[0] + +async def fetch_geojson_for_hot_summer_days( + db: AsyncSession = Depends(get_db),): + """ Query the database to fetch hot summer days data in GeoJSON format. + + Keyword arguments: + db -- an AsyncSession for sql alchemy + """ + result = await db.execute(text("SELECT geojson::text AS geojson FROM iris.hot_summer_days_geojson;")) + row = result.fetchone() + return row[0] + \ No newline at end of file diff --git a/api/services/energy_performance_service.py b/api/services/energy_performance_service.py new file mode 100644 index 0000000..dfcf8b2 --- /dev/null +++ b/api/services/energy_performance_service.py @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + + +from sqlalchemy import text +from db import get_db +from fastapi import Depends +from sqlalchemy.ext.asyncio import AsyncSession + +async def fetch_geojson_for_energy_performance_by_wards( + db: AsyncSession = Depends(get_db),): + """ Query the database to fetch EPC data for wards in GeoJSON format. + + Keyword arguments: + db -- an AsyncSession for sql alchemy + """ + result = await db.execute(text("SELECT geojson::text AS geojson FROM iris.uk_ward_epc;")) + row = result.fetchone() + return row[0] + +async def fetch_geojson_for_energy_performance_by_districts( + db: AsyncSession = Depends(get_db),): + """ Query the database to fetch EPC data for districts in GeoJSON format. + + Keyword arguments: + db -- an AsyncSession for sql alchemy + """ + result = await db.execute(text("SELECT geojson::text AS geojson FROM iris.district_borough_unitary_epc;")) + row = result.fetchone() + return row[0] + +async def fetch_geojson_for_energy_performance_by_counties( + db: AsyncSession = Depends(get_db),): + """ Query the database to fetch EPC data for counties in GeoJSON format. + + Keyword arguments: + db -- an AsyncSession for sql alchemy + """ + result = await db.execute(text("SELECT geojson::text AS geojson FROM iris.boundary_line_ceremonial_counties_epc;")) + row = result.fetchone() + return row[0] + +async def fetch_geojson_for_energy_performance_by_regions( + db: AsyncSession = Depends(get_db),): + """ Query the database to fetch EPC data for regions in GeoJSON format. + + Keyword arguments: + db -- an AsyncSession for sql alchemy + """ + result = await db.execute(text("SELECT geojson::text AS geojson FROM iris.uk_region_epc;")) + row = result.fetchone() + return row[0] + \ No newline at end of file diff --git a/api/utils.py b/api/utils.py index 530bb4d..aadea00 100644 --- a/api/utils.py +++ b/api/utils.py @@ -7,10 +7,122 @@ "Authorization", ] + def get_headers(headers): forward_headers = {} for header in pass_through_headers: hv = headers.get(header) if hv is not None: forward_headers[header] = hv - return forward_headers \ No newline at end of file + return forward_headers + + +def has_bindings(r): + return bool(r and r.get("results") and r.get("results").get("bindings")) + + +def validate_geojson_polygon(geojson_str: str) -> str: + """ + Validates that a string is valid GeoJSON Polygon or MultiPolygon compatible with ST_GeomFromGeoJSON. + + Raises: + ValueError: If the GeoJSON is invalid or not a Polygon/MultiPolygon type + + Returns: + str: The validated GeoJSON string + """ + import json + + try: + geojson = json.loads(geojson_str) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON: {str(e)}") + + if not isinstance(geojson, dict): + raise ValueError("GeoJSON must be a JSON object") + + if "type" not in geojson: + raise ValueError("GeoJSON must have a 'type' field") + + valid_types = {"Polygon", "MultiPolygon"} + if geojson["type"] not in valid_types: + raise ValueError( + f"GeoJSON type must be 'Polygon' or 'MultiPolygon', got '{geojson['type']}'" + ) + + if "coordinates" not in geojson: + raise ValueError("GeoJSON must have a 'coordinates' field") + + if not isinstance(geojson["coordinates"], list): + raise ValueError("GeoJSON 'coordinates' must be an array") + + return geojson_str + + +WELSH_REGIONS = { + "South Wales East PER", + "North Wales PER", + "Mid and West Wales PER", + "South Wales West PER", + "South Wales Central PER", +} + + +def is_welsh_region(region_name: str) -> bool: + return region_name in WELSH_REGIONS + + +def expand_wales_region(area_names: list[str] | None) -> list[str] | None: + """ + Expand 'Wales' to individual Welsh regions for database queries. + + If the list contains 'Wales', it is replaced with all individual Welsh regions. + Other region names are passed through unchanged. + + Args: + area_names: List of area names, potentially including 'Wales' + + Returns: + list[str] | None: List with 'Wales' expanded to individual regions, or None if input is None + + Examples: + >>> expand_wales_region(['Wales']) + ['South Wales East PER', 'North Wales PER', ...] + >>> expand_wales_region(['Wales', 'London']) + ['London', 'South Wales East PER', 'North Wales PER', ...] + >>> expand_wales_region(['London']) + ['London'] + """ + if area_names is None or "Wales" not in area_names: + return area_names + + return [name for name in area_names if name != "Wales"] + sorted(WELSH_REGIONS) + + +def collapse_welsh_regions(regions: list[str]) -> list[str]: + """ + Collapse individual Welsh regions to 'Wales' in a region list. + + If all or any Welsh regions are present in the list, they are replaced with 'Wales'. + Other region names are passed through unchanged. + + Args: + regions: List of region names, potentially including Welsh regions + + Returns: + list[str]: List with Welsh regions collapsed to 'Wales', sorted alphabetically + + Examples: + >>> collapse_welsh_regions_in_list(['South Wales East PER', 'North Wales PER', 'London']) + ['London', 'Wales'] + >>> collapse_welsh_regions_in_list(['London', 'North East']) + ['London', 'North East'] + """ + if not regions: + return regions + + non_welsh = [r for r in regions if r not in WELSH_REGIONS] + if len(non_welsh) == len(regions): + return regions + + return non_welsh + ["Wales"] diff --git a/developer-resources/docker-compose.yml b/developer-resources/docker-compose.yml index 24959fa..22e025d 100644 --- a/developer-resources/docker-compose.yml +++ b/developer-resources/docker-compose.yml @@ -13,3 +13,9 @@ services: POSTGRES_DB: iris ports: - "5432:5432" + networks: + - iris + +networks: + iris: + driver: bridge diff --git a/developer-resources/epc-backfill/README.md b/developer-resources/epc-backfill/README.md new file mode 100644 index 0000000..45feb3d --- /dev/null +++ b/developer-resources/epc-backfill/README.md @@ -0,0 +1,55 @@ +# EPC Data Backfill + +Bulk backfill SAP ratings and expiry date EPC data from CSV files (local or S3) into PostgreSQL `iris.epc_assessment`. + +The CSV files used are from a previous import, they require the columns: + +| Column | Description | Example | +| --------------- | --------------------------------------- | ------------- | +| `UPRN` | Unique Property Reference Number | `10033219288` | +| `SAPBand` | EPC rating band (A-G) | `C` | +| `SAPRating` | SAP numeric score | `72` | +| `LodgementDate` | Certificate lodgement date (YYYY-MM-DD) | `2008-10-01` | + +**Note:** Rows with missing or empty values for `SAPBand`, `SAPScore`, `UPRN`, or `LodgementDate` will be skipped. + +## Installation + +```bash +pip install -r requirements.txt +``` + +## Usage + +### From S3 + +```bash +CSV_PATH=s3://bucket/prefix/ python update_from_csv.py +``` + +### From Local Folder + +```bash +CSV_PATH=/path/to/csv/files/ python update_from_csv.py +``` + +### From Local File + +```bash +CSV_PATH=/path/to/file.csv python update_from_csv.py +``` + +## Environment Variables + +| Variable | Required | Default | Description | +| ------------------- | -------- | ----------- | ------------------------------------------------------------- | +| `CSV_PATH` | Yes | - | S3 path (s3://bucket/prefix), local directory, or single file | +| `DB_HOST` | No | `localhost` | PostgreSQL host | +| `DB_USERNAME` | No | `postgres` | PostgreSQL username | +| `DB_PASSWORD` | No | `postgres` | PostgreSQL password | +| `DB_NAME` | No | `iris` | Database name | +| `DB_PORT` | No | `5432` | PostgreSQL port | + +### AWS Credentials + +For S3 access, configure [AWS environment variables for boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#environment-variables). diff --git a/developer-resources/epc-backfill/infrastructure/Dockerfile b/developer-resources/epc-backfill/infrastructure/Dockerfile new file mode 100644 index 0000000..491ccdf --- /dev/null +++ b/developer-resources/epc-backfill/infrastructure/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.9.24 + +# Create a non-root user and group +RUN groupadd -r appuser && useradd -r -g appuser appuser + +WORKDIR /usr/src/app + +COPY ../requirements.txt . +COPY ../update_from_csv.py . + +RUN pip install -r requirements.txt && chown -R appuser:appuser update_from_csv.py + +# Switch to the non-root user +USER appuser + +CMD ["python", "update_from_csv.py"] diff --git a/developer-resources/epc-backfill/infrastructure/epc-backfill-job.yaml b/developer-resources/epc-backfill/infrastructure/epc-backfill-job.yaml new file mode 100644 index 0000000..f310bc9 --- /dev/null +++ b/developer-resources/epc-backfill/infrastructure/epc-backfill-job.yaml @@ -0,0 +1,38 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: epc-backfill-job + namespace: iris +spec: + template: + spec: + serviceAccountName: s3-csv-postgres-importer-sa + containers: + - name: epc-backfill-python + image: 537124944113.dkr.ecr.eu-west-2.amazonaws.com/iris/epc-backfill-python:latest + resources: + requests: + ephemeral-storage: "2Gi" + memory: 512Mi + cpu: 0.5 + limits: + memory: 1Gi + env: + - name: AWS_ROLE_ARN + value: "arn:aws:iam::537124944113:role/s3-csv-postgres-importer-role" + - name: AWS_WEB_IDENTITY_TOKEN_FILE + value: "/var/run/secrets/eks.amazonaws.com/serviceaccount/token" + - name: DB_HOST + value: "{REPLACE ME}" + - name: DB_NAME + value: "{REPLACE ME}" + - name: DB_USERNAME + value: "{REPLACE ME}" + - name: DB_PASSWORD + value: "{REPLACE ME}" + - name: CSV_PATH + value: "{REPLACE ME}" + restartPolicy: Never + securityContext: + fsGroup: 999 + backoffLimit: 4 diff --git a/developer-resources/epc-backfill/requirements.txt b/developer-resources/epc-backfill/requirements.txt new file mode 100644 index 0000000..01c4a51 --- /dev/null +++ b/developer-resources/epc-backfill/requirements.txt @@ -0,0 +1,4 @@ +boto3>=1.34.0 +psycopg2-binary>=2.9.0 +sqlalchemy >= 2.0.0 +smart-open[s3]>=7.0.0 diff --git a/developer-resources/epc-backfill/update_from_csv.py b/developer-resources/epc-backfill/update_from_csv.py new file mode 100644 index 0000000..8733f0e --- /dev/null +++ b/developer-resources/epc-backfill/update_from_csv.py @@ -0,0 +1,255 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +import csv +import io +import logging +import os +import sys +from datetime import datetime +from pathlib import Path + +import boto3 +import sqlalchemy as sa +from smart_open import open as smart_open +from sqlalchemy import text + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], +) +logger = logging.getLogger(__name__) + +BATCH_SIZE = 50000 + + +def parse_date(date_str): + if not date_str or not date_str.strip(): + return None + try: + return datetime.strptime(date_str.strip(), "%Y-%m-%d").date() + except ValueError: + return None + + +def parse_int(int_str): + if not int_str or not int_str.strip(): + return None + try: + return int(float(int_str.strip())) + except ValueError: + return None + + +def list_s3_csv_files(path): + """List CSV files from S3 bucket.""" + path_parts = path[5:].split("/", 1) + bucket = path_parts[0] + prefix = path_parts[1] if len(path_parts) > 1 else "" + + s3 = boto3.client("s3") + paginator = s3.get_paginator("list_objects_v2") + csv_files = [] + + for page in paginator.paginate(Bucket=bucket, Prefix=prefix): + if "Contents" not in page: + continue + for obj in page["Contents"]: + if obj["Key"].lower().endswith(".csv"): + csv_files.append(f"s3://{bucket}/{obj['Key']}") + + logger.info(f"Found {len(csv_files)} CSV files in {path}") + return csv_files + + +def list_local_csv_files(path): + """List CSV files from local filesystem.""" + p = Path(path) + if not p.exists(): + logger.error(f"Path does not exist: {path}") + return [] + + csv_files = [] + if p.is_file() and p.suffix.lower() == ".csv": + csv_files.append(str(p)) + elif p.is_dir(): + csv_files = [str(f) for f in p.glob("**/*.csv")] + + logger.info(f"Found {len(csv_files)} CSV files in {path}") + return csv_files + + +def list_csv_files(path): + """List CSV files from S3 bucket or local filesystem.""" + if path.startswith("s3://"): + return list_s3_csv_files(path) + else: + return list_local_csv_files(path) + + +def process_batch(engine, batch): + """Process a batch of EPC data.""" + if not batch: + return 0 + + with engine.begin() as conn: + raw_conn = conn.connection + + conn.execute( + text( + """ + CREATE TEMP TABLE epc_temp ( + uprn TEXT, + lodgement_date DATE, + sap_rating INTEGER + ) ON COMMIT DROP; + """ + ) + ) + + buffer = io.StringIO() + for row in batch: + buffer.write( + f"{row['uprn']}\t{row['lodgement_date']}\t{row['sap_rating']}\n" + ) + buffer.seek(0) + + raw_conn.cursor().copy_from( + buffer, + "epc_temp", + columns=["uprn", "lodgement_date", "sap_rating"], + ) + + conn.execute( + text( + """ + UPDATE iris.epc_assessment ea + SET sap_rating = et.sap_rating + FROM epc_temp et + WHERE ea.uprn = et.uprn AND ea.lodgement_date = et.lodgement_date; + """ + ) + ) + + +def process_csv_file(csv_path, engine): + logger.info(f"Processing {csv_path}") + + batch = [] + file_rows = 0 + file_stats = {"processed": 0, "skipped": 0} + + with smart_open(csv_path, "r", encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter=",") + + for row in reader: + file_rows += 1 + + certificate_type = row.get("CertificateType", "").strip() + + if certificate_type != "domestic": + file_stats["skipped"] += 1 + continue + + sap_band = row.get("SAPBand", "").strip() + if not sap_band: + file_stats["skipped"] += 1 + continue + + uprn = row.get("UPRN", "").strip() + lodgement_date = parse_date(row.get("LodgementDate", "")) + sap_rating = parse_int(row.get("SAPRating", "")) + if not uprn or not lodgement_date or sap_rating is None: + file_stats["skipped"] += 1 + continue + + batch.append( + { + "uprn": uprn, + "lodgement_date": lodgement_date, + "sap_rating": sap_rating, + } + ) + + if len(batch) >= BATCH_SIZE: + process_batch(engine, batch) + file_stats["processed"] += len(batch) + logger.info( + f" Batch: {file_stats['processed']:,} processed, {file_stats['skipped']:,} skipped" + ) + batch = [] + + if batch: + process_batch(engine, batch) + file_stats["processed"] += len(batch) + + logger.info( + f"Completed: {file_rows:,} scanned, " + f"{file_stats['processed']:,} processed, " + f"{file_stats['skipped']:,} skipped" + ) + + return file_stats + + +def main(): + # CSV_PATH can be: + # - s3://bucket/prefix (S3 bucket) + # - /path/to/folder (local directory) + # - /path/to/file.csv (single local file) + csv_path = os.getenv("CSV_PATH") + db_host = os.getenv("DB_HOST", "localhost") + db_port = os.getenv("DB_PORT", "5432") + db_user = os.getenv("DB_USERNAME", "postgres") + db_password = os.getenv("DB_PASSWORD", "postgres") + db_name = os.getenv("DB_NAME", "iris") + + if not all([csv_path, db_host, db_user, db_password]): + logger.error( + "Missing one or more required environment variables: CSV_PATH, DB_HOST, DB_USERNAME, DB_PASSWORD" + ) + sys.exit(1) + + engine = sa.create_engine( + f"postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}" + ) + + csv_files = list_csv_files(csv_path) + if not csv_files: + logger.warning("No CSV files found") + return + + stats = {"processed": 0, "skipped": 0} + start_time = datetime.now() + logger.info(f"Starting import of {len(csv_files)} file(s)") + + for csv_file in csv_files: + file_start = datetime.now() + try: + file_stats = process_csv_file(csv_file, engine) + stats["processed"] += file_stats["processed"] + stats["skipped"] += file_stats["skipped"] + + file_elapsed = (datetime.now() - file_start).total_seconds() + total_elapsed = (datetime.now() - start_time).total_seconds() + logger.info( + f"File took {file_elapsed:.2f}s | Total time so far: {total_elapsed:.2f}s" + ) + except Exception as e: + logger.error(f"Failed to process {csv_file}: {e}") + continue + + elapsed = (datetime.now() - start_time).total_seconds() + logger.info("=" * 60) + logger.info("IMPORT SUMMARY") + logger.info("=" * 60) + logger.info(f"Rows processed: {stats['processed']:,}") + logger.info(f"Rows skipped: {stats['skipped']:,}") + logger.info(f"Total time: {elapsed:.2f} seconds") + logger.info("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/developer-resources/load_gpkg_to_postgis.py b/developer-resources/load_gpkg_to_postgis.py new file mode 100644 index 0000000..8cdb086 --- /dev/null +++ b/developer-resources/load_gpkg_to_postgis.py @@ -0,0 +1,210 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +import os +import shutil +import subprocess +import tempfile +import urllib.request +import zipfile +from pathlib import Path + +import psycopg2 + +GPKG_SOURCE = os.getenv("GPKG_SOURCE") +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = os.getenv("DB_PORT", "5432") +DB_NAME = os.getenv("DB_NAME", "iris") +DB_USERNAME = os.getenv("DB_USERNAME", "postgres") +DB_PASSWORD = os.getenv("DB_PASSWORD", "postgres") +GPKG_TABLE = os.getenv("GPKG_TABLE") +TARGET_SCHEMA = os.getenv("TARGET_SCHEMA", "iris") +TARGET_TABLE = os.getenv("TARGET_TABLE") +MATERIALIZED_VIEW = os.getenv("MATERIALIZED_VIEW") +JOIN_VIEW = os.getenv("JOIN_VIEW") +DATA_VIEW = os.getenv("DATA_VIEW") +GPKG_EXTENSION = ".gpkg" + + +def download_file(url: str, dest: Path): + """Download a file from URL to destination.""" + print(f"Downloading {url} → {dest}") + if url.endswith(GPKG_EXTENSION): + with urllib.request.urlopen(url) as response, open(dest, "wb") as out_file: + out_file.write(response.read()) + print("Download complete.") + else: + zip_file = os.path.join(dest, "data.zip") + unzip_file = os.path.join(dest, "data") + urllib.request.urlretrieve(url, zip_file) + print(f"Zip file downloaded: {zip_file}") + with zipfile.ZipFile(zip_file, "r") as zip_ref: + zip_ref.extractall(unzip_file) + print(f"Extracted to : {unzip_file}") + for root, dirs, files in os.walk(unzip_file): + for file in files: + if file.endswith(GPKG_EXTENSION): + file_gpkg = os.path.join(root, file) + print(f"GeoPackage available: {file_gpkg}") + shutil.copyfile(file_gpkg, f"{dest}/data{GPKG_EXTENSION}") + outfile = f"{dest}/data.gpkg" + print(f"Download complete: {outfile}") + else: + print(f"No GeoPAckage found {file}") + + +def run_db_command(command: str, fetchone=False): + conn = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + dbname=DB_NAME, + user=DB_USERNAME, + password=DB_PASSWORD, + ) + conn.autocommit = True + with conn.cursor() as cur: + cur.execute(command) + if fetchone: + row = cur.fetchone() + return row[0] if row else None + conn.close() + + +def is_table_populated(): + count = run_db_command( + f"SELECT COUNT(*) FROM {TARGET_SCHEMA}.{TARGET_TABLE};", fetchone=True + ) + return count > 0 + + +def run_ogr2ogr(gpkg_path: Path): + """Run ogr2ogr to import GPKG into PostGIS.""" + pg_conn_str = ( + f"PG:host={DB_HOST} port={DB_PORT} " + f"dbname={DB_NAME} user={DB_USERNAME} password={DB_PASSWORD}" + ) + run_db_command(f"TRUNCATE {TARGET_SCHEMA}.{TARGET_TABLE};") + cmd = [ + "ogr2ogr", + "-f", + "PostgreSQL", + pg_conn_str, + str(gpkg_path), + "-nln", + f"{TARGET_SCHEMA}.{TARGET_TABLE}", + "-lco", + "SCHEMA=" + TARGET_SCHEMA, + "-append", + "-t_srs", + "EPSG:4326", + ] + # Redact password in the connection string for logging + redacted_cmd = " ".join(cmd.copy()) + if "password=" in redacted_cmd: + import re + + redacted_cmd = re.sub(r"password=[^ ]+", "password=****", redacted_cmd) + print("Running:", redacted_cmd) + subprocess.run(cmd, check=True) + print("ogr2ogr import complete.") + + +def run_ogr2ogr_table(gpkg_path: Path): + """Run ogr2ogr_table to import a single table GPKG into PostGIS.""" + pg_conn_str = ( + f"PG:host={DB_HOST} port={DB_PORT} " + f"dbname={DB_NAME} user={DB_USERNAME} password={DB_PASSWORD}" + ) + run_db_command(f"TRUNCATE {TARGET_SCHEMA}.{TARGET_TABLE} CASCADE;") + cmd = [ + "ogr2ogr", + "-f", + "PostgreSQL", + pg_conn_str, + str(gpkg_path), + "-nln", + f"{TARGET_SCHEMA}.{TARGET_TABLE}", + "-lco", + "SCHEMA=" + TARGET_SCHEMA, + "-sql", + f"SELECT * FROM {GPKG_TABLE}", + "-append", + "-t_srs", + "EPSG:4326", + ] + # Redact password in the connection string for logging + redacted_cmd = " ".join(cmd.copy()) + if "password=" in redacted_cmd: + import re + + redacted_cmd = re.sub(r"password=[^ ]+", "password=****", redacted_cmd) + print("Running:", redacted_cmd) + subprocess.run(cmd, check=True) + print("ogr2ogr import complete.") + + +def refresh_materialized_view(): + if MATERIALIZED_VIEW is not None and MATERIALIZED_VIEW != "": + print(f"Refreshing materialized view {MATERIALIZED_VIEW}") + run_db_command(f"REFRESH MATERIALIZED VIEW {MATERIALIZED_VIEW};") + print("Materialized view refresh complete.") + else: + print("No materialized view given to refresh.") + + +def refresh_join_view(): + if JOIN_VIEW is not None and JOIN_VIEW != "": + print(f"Refreshing materialized view {JOIN_VIEW}") + run_db_command(f"REFRESH MATERIALIZED VIEW {JOIN_VIEW};") + print("Materialized view refresh complete.") + else: + print("No join view given to refresh.") + + +def refresh_data_view(): + if DATA_VIEW is not None and DATA_VIEW != "": + print(f"Refreshing materialized view {DATA_VIEW}") + run_db_command(f"REFRESH MATERIALIZED VIEW {DATA_VIEW};") + print("Materialized view refresh complete.") + else: + print("No data view given to refresh.") + + +def handle_geopackage(tmpdir): + gpkg_file = Path(tmpdir) / f"data{GPKG_EXTENSION}" + download_file(GPKG_SOURCE, gpkg_file) + if GPKG_TABLE == "none": + run_ogr2ogr(gpkg_file) + else: + run_ogr2ogr_table(gpkg_file) + + +def handle_zip(tmpdir): + zip_file = Path(tmpdir) + gpkg_file = Path(tmpdir) / f"data{GPKG_EXTENSION}" + download_file(GPKG_SOURCE, zip_file) + if GPKG_TABLE == "none": + run_ogr2ogr(gpkg_file) + else: + run_ogr2ogr_table(gpkg_file) + refresh_join_view() + refresh_data_view() + + +def main(): + if not is_table_populated(): + with tempfile.TemporaryDirectory() as tmpdir: + if GPKG_SOURCE.endswith(GPKG_EXTENSION): + handle_geopackage(tmpdir) + else: + handle_zip(tmpdir) + refresh_materialized_view() + else: + print( + f"Table {TARGET_SCHEMA}.{TARGET_TABLE} already populated. Skipping data load." + ) + + +if __name__ == "__main__": + main() diff --git a/developer-resources/run-query-job.yaml b/developer-resources/run-query-job.yaml deleted file mode 100644 index 11fc886..0000000 --- a/developer-resources/run-query-job.yaml +++ /dev/null @@ -1,56 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: run-query-job - namespace: iris -spec: - template: - spec: - containers: - - name: postgres-import-data-clearer - image: 537124944113.dkr.ecr.eu-west-2.amazonaws.com/iris/s3-csv-postgres-importer:1.0 - resources: - limits: - memory: "2GiB" - requests: - memory: "2GiB" - cpu: 2 - env: - - name: DB_HOST - value: REPLACE_ME - - name: DB_PORT - value: "5432" - - name: DB_NAME - value: "iris" - - name: DB_USERNAME - value: REPLACE_ME - - name: DB_PASSWORD - value: REPLACE_ME - command: ["sh", "-c"] - args: - - | - # Wait for PostgreSQL to be ready - while ! pg_isready -h ${DB_HOST} -p ${DB_PORT}; do - echo "Waiting for database connection..." - sleep 2 - done - - echo "Running provided query..." - - PGPASSWORD=${DB_PASSWORD} psql \ - -h ${DB_HOST} \ - -p ${DB_PORT} \ - -U ${DB_USERNAME} \ - -d ${DB_NAME} \ - -t -c "REPLACE ME WITH ACTUAL QUERY" - - # Check for errors - if [ $? -ne 0 ]; then - echo "Failed to run provided query!" - exit 1 - fi - - echo "Successfully ran provided query!" - restartPolicy: OnFailure - securityContext: - fsGroup: 999 diff --git a/developer-resources/sync_region_fks_dbu.py b/developer-resources/sync_region_fks_dbu.py new file mode 100644 index 0000000..a093beb --- /dev/null +++ b/developer-resources/sync_region_fks_dbu.py @@ -0,0 +1,92 @@ +import os + +import psycopg2 + +DB_HOST = os.getenv("DB_HOST", "localhost") +DB_PORT = os.getenv("DB_PORT", "5432") +DB_NAME = os.getenv("DB_NAME", "iris") +DB_USERNAME = os.getenv("DB_USERNAME", "postgres") +DB_PASSWORD = os.getenv("DB_PASSWORD", "postgres") + +UNSYNCED_CHECK_SCRIPT = """ + SELECT COUNT(*) FROM iris.district_borough_unitary + WHERE english_region_fid IS NULL AND scotland_and_wales_region_fid IS NULL; +""" + +SYNCED_CHECK_SCRIPT = """ + SELECT COUNT(*) FROM iris.district_borough_unitary + WHERE english_region_fid IS NOT NULL AND scotland_and_wales_region_fid IS NULL + OR english_region_fid IS NULL AND scotland_and_wales_region_fid IS NOT NULL; +""" + +ENGLISH_REGION_UPDATE_SCRIPT = """ + CREATE TEMP TABLE temp_english_region( + fid INT, + geometry geometry(MultiPolygon, 4326) + ); + + INSERT INTO temp_english_region + SELECT fid, ST_TRANSFORM(ST_BUFFER(ST_TRANSFORM(geometry, 27700), 5000::double precision), 4326) as geometry + FROM iris.english_region; + + UPDATE iris.district_borough_unitary dbu + SET english_region_fid = r.fid + FROM ( + SELECT fid, geometry + FROM temp_english_region + ) r + WHERE ST_CONTAINS(r.geometry, ST_SIMPLIFY(dbu.geometry, 0.0001::double precision)); +""" + +SCOTLAND_AND_WALES_REGION_UPDATE_SCRIPT = """ + CREATE TEMP TABLE temp_scotland_and_wales_region( + fid INT, + geometry geometry(MultiPolygon, 4326) + ); + + INSERT INTO temp_scotland_and_wales_region + SELECT fid, ST_TRANSFORM(ST_BUFFER(ST_TRANSFORM(geometry, 27700), 5000::double precision), 4326) as geometry + FROM iris.scotland_and_wales_region; + + UPDATE iris.district_borough_unitary dbu + SET scotland_and_wales_region_fid = r.fid + FROM ( + SELECT fid, geometry + FROM temp_scotland_and_wales_region + ) r + WHERE ST_CONTAINS(r.geometry, ST_SIMPLIFY(dbu.geometry, 0.0001::double precision)); +""" + + +def run_db_command(command: str, fetchone=False): + conn = psycopg2.connect( + host=DB_HOST, + port=DB_PORT, + dbname=DB_NAME, + user=DB_USERNAME, + password=DB_PASSWORD, + ) + conn.autocommit = True + with conn.cursor() as cur: + cur.execute(command) + if fetchone: + row = cur.fetchone() + return row[0] if row else None + conn.close() + + +if __name__ == "__main__": + print("Syncing region foriegn keys on district_borough_unitary table...") + unsynced_records = run_db_command(UNSYNCED_CHECK_SCRIPT, fetchone=True) + + print(f"{unsynced_records} records to sync.") + + if unsynced_records > 0: + run_db_command(ENGLISH_REGION_UPDATE_SCRIPT) + run_db_command(SCOTLAND_AND_WALES_REGION_UPDATE_SCRIPT) + + synced_records = run_db_command(SYNCED_CHECK_SCRIPT, fetchone=True) + + print(f"Synced region foriegn keys for {synced_records} records.") + else: + print("No records to sync.") diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..e635361 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,16 @@ +#!/bin/bash +set -e + +alembic upgrade head +MATERIALIZED_VIEW=iris.wind_driven_rain_projections_geojson TARGET_TABLE=wind_driven_rain_projections GPKG_SOURCE=https://services.arcgis.com/Lq3V5RFuTBC9I7kv/arcgis/rest/services/Annual_Index_of_Wind_Driven_Rain_Projections_5km/FeatureServer/replicafilescache/Annual_Index_of_Wind_Driven_Rain_Projections_5km_-6134910210859057092.gpkg GPKG_TABLE=Annual_Index_of_Wind_Driven_Rain___Projections__5km_ python load_gpkg_to_postgis.py +MATERIALIZED_VIEW=iris.icing_days_geojson TARGET_TABLE=annual_count_of_icing_days_1991_2020 GPKG_SOURCE=https://services.arcgis.com/Lq3V5RFuTBC9I7kv/arcgis/rest/services/Annual_Count_of_Icing_Days_1991_2020/FeatureServer/replicafilescache/Annual_Count_of_Icing_Days_1991_2020_5977951113111576455.gpkg GPKG_TABLE=annual_count_of_icing_days_1991_2020 python load_gpkg_to_postgis.py +MATERIALIZED_VIEW=iris.hot_summer_days_geojson TARGET_TABLE=annual_count_of_hot_summer_days_projections_12km GPKG_SOURCE=https://services.arcgis.com/Lq3V5RFuTBC9I7kv/arcgis/rest/services/Annual_Count_of_Hot_Days___Projections__12km_grid_/FeatureServer/replicafilescache/Annual_Count_of_Hot_Days___Projections__12km_grid__5151054028377652076.gpkg GPKG_TABLE=annual_count_of_hot_summer_days_projections_12km python developer-resources/load_gpkg_to_postgis.py +JOIN_VIEW=iris.uk_ward DATA_VIEW=iris.uk_ward_epc_data MATERIALIZED_VIEW=iris.uk_ward_epc TARGET_TABLE=district_borough_unitary_ward GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=district_borough_unitary_ward python developer-resources/load_gpkg_to_postgis.py +JOIN_VIEW=iris.uk_ward DATA_VIEW=iris.uk_ward_epc_data MATERIALIZED_VIEW=iris.uk_ward_epc TARGET_TABLE=unitary_electoral_division GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=unitary_electoral_division python developer-resources/load_gpkg_to_postgis.py +JOIN_VIEW=iris.uk_region DATA_VIEW=iris.uk_region_epc_data MATERIALIZED_VIEW=iris.uk_region_epc TARGET_TABLE=scotland_and_wales_region GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=scotland_and_wales_region python developer-resources/load_gpkg_to_postgis.py +JOIN_VIEW=iris.uk_region DATA_VIEW=iris.uk_region_epc_data MATERIALIZED_VIEW=iris.uk_region_epc TARGET_TABLE=english_region GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=english_region python developer-resources/load_gpkg_to_postgis.py +DATA_VIEW=iris.district_borough_unitary_epc_data MATERIALIZED_VIEW=iris.district_borough_unitary_epc TARGET_TABLE=district_borough_unitary GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=district_borough_unitary python developer-resources/load_gpkg_to_postgis.py +DATA_VIEW=iris.boundary_line_ceremonial_counties_epc_data MATERIALIZED_VIEW=iris.boundary_line_ceremonial_counties_epc TARGET_TABLE=boundary_line_ceremonial_counties GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=boundary_line_ceremonial_counties python developer-resources/load_gpkg_to_postgis.py +TARGET_TABLE=country_region GPKG_SOURCE='https://api.os.uk/downloads/v1/products/BoundaryLine/downloads?area=GB&format=GeoPackage&redirect' GPKG_TABLE=country_region python developer-resources/load_gpkg_to_postgis.py +python developer-resources/sync_region_fks_dbu.py +python api/main.py --host 0.0.0.0 diff --git a/requirements.txt b/requirements.txt index 9fb5c50..8ce8d3c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ fastapi==0.115.11 mock>=4.0.3 pydantic==2.5.0 pytest>=7.0.0 +pytest-asyncio>=0.21.0 pytest-mock>=3.10.0 python-dotenv==1.0.0 rdflib==7.1.3 diff --git a/unit_tests/conftest.py b/unit_tests/conftest.py index e4e0047..6d8a128 100644 --- a/unit_tests/conftest.py +++ b/unit_tests/conftest.py @@ -6,6 +6,10 @@ import sys os.environ["IDENTITY_API_URL"] = "https://test.com" +os.environ["ENVIRONMENT"] = "TEST" +os.environ["DB_USERNAME"] = "test" +os.environ["DB_PASSWORD"] = "test" +os.environ["DB_HOST"] = "localhost" api_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../api")) if api_dir not in sys.path: diff --git a/unit_tests/query_response_mocks.py b/unit_tests/query_response_mocks.py index 5c8d82d..a8c1bf7 100644 --- a/unit_tests/query_response_mocks.py +++ b/unit_tests/query_response_mocks.py @@ -408,5 +408,5 @@ def mock_known_building(query, headers): if query == get_walls_and_windows_for_building(uprn): return wall_window_query_response(uprn) else: - return "default response" + return empty_query_response() diff --git a/unit_tests/test_building_retrieval.py b/unit_tests/test_building_retrieval.py index 0b2b188..245e7f8 100644 --- a/unit_tests/test_building_retrieval.py +++ b/unit_tests/test_building_retrieval.py @@ -10,14 +10,19 @@ from api.query import ( get_building, - get_buildings_in_bounding_box_query, - get_filterable_buildings_in_bounding_box_query, get_floor_for_building, + get_fueltype_for_building, + get_ngd_roof_aspect_areas_for_building, + get_ngd_roof_material_for_building, + get_ngd_roof_shape_for_building, + get_ngd_solar_panel_presence_for_building, get_roof_for_building, get_walls_and_windows_for_building, ) from api.routes import router from unit_tests.query_response_mocks import empty_query_response, mock_known_building +from unittest.mock import AsyncMock +import db as db_module @pytest.fixture(autouse=True) @@ -33,9 +38,16 @@ def set_identity_api_url(monkeypatch): @pytest.fixture def client(): - """Create a test client with the router mounted on a FastAPI app.""" + async def mock_get_db(): + mock_db_session = AsyncMock() + mock_db_result = AsyncMock() + mock_db_result.__iter__ = lambda self: iter([]) + mock_db_session.execute.return_value = mock_db_result + yield mock_db_session + app = FastAPI() app.include_router(router) + app.dependency_overrides[db_module.get_db] = mock_get_db return TestClient(app) @@ -140,6 +152,7 @@ def verify_building_data( assert result["longitude"] == exp_long assert result["latitude"] == exp_lat + ##TODO: reinstate this test with bounding box params # def verify_query_run_with_correct_args(self, mock_query): # polygon = "POLYGON((-1.1835 50.6445, -1.1507 50.6445, -1.1507 50.7261, -1.1835 50.7261, -1.1835 50.6445))" @@ -196,6 +209,7 @@ def test_empty_results(self, client, monkeypatch): # # self.verify_query_run_with_correct_args(mock_query) + ##TODO: reinstate this test with bounding box params # def verify_query_run_with_correct_args(self, mock_query): # polygon = "POLYGON((-1.1835 50.6445, -1.1507 50.6445, -1.1507 50.7261, -1.1835 50.7261, -1.1835 50.6445))" @@ -208,8 +222,6 @@ def test_empty_results(self, client, monkeypatch): class TestGetBuildingByUprn: def test_successful_get_building(self, client, monkeypatch): - """Test successful retrieval of a building by UPRN""" - # Mock the run_sparql_query function uprn = 10023456789 mock_query = MagicMock() mock_query.side_effect = mock_known_building @@ -220,7 +232,6 @@ def test_successful_get_building(self, client, monkeypatch): assert response.status_code == 200 data = response.json() - # Check the data assert data["uprn"] == f"{uprn}" assert data["lodgement_date"] == "2024-03-30" assert data["built_form"] == "SemiDetached" @@ -234,26 +245,28 @@ def test_successful_get_building(self, client, monkeypatch): assert data["wall_insulation"] == "InsulatedWall" assert data["window_glazing"] == "DoubleGlazingBefore2002" - # Verify run_sparql_query was called with the correct params - assert mock_query.call_count == 4 + assert mock_query.call_count == 9 mock_query.assert_any_call(get_building(uprn), ANY) mock_query.assert_any_call(get_roof_for_building(uprn), ANY) mock_query.assert_any_call(get_floor_for_building(uprn), ANY) mock_query.assert_any_call(get_walls_and_windows_for_building(uprn), ANY) - call_args = mock_query.call_args[0] - assert str(uprn) in call_args[0] + mock_query.assert_any_call(get_fueltype_for_building(uprn), ANY) + mock_query.assert_any_call(get_ngd_roof_material_for_building(uprn), ANY) + mock_query.assert_any_call(get_ngd_solar_panel_presence_for_building(uprn), ANY) + mock_query.assert_any_call(get_ngd_roof_shape_for_building(uprn), ANY) + mock_query.assert_any_call(get_ngd_roof_aspect_areas_for_building(uprn), ANY) def test_building_not_found(self, client, monkeypatch): - """Test when building is not found""" - # Mock the run_sparql_query function to return empty results mock_query = MagicMock(return_value=empty_query_response()) monkeypatch.setattr("api.routes.run_sparql_query", mock_query) uprn = 99999999999 response = client.get(f"/buildings/{uprn}") - assert response.status_code == 404 - assert response.json() == {"detail": f"Building with UPRN {uprn} not found"} + assert response.status_code == 200 + data = response.json() + assert data["uprn"] == str(uprn) + assert data["lodgement_date"] is None if __name__ == "__main__": diff --git a/unit_tests/test_db.py b/unit_tests/test_db.py new file mode 100644 index 0000000..02cc30c --- /dev/null +++ b/unit_tests/test_db.py @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +import pytest +from unittest.mock import AsyncMock, patch +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession +from db import execute_with_timeout + + +@pytest.mark.asyncio +async def test_execute_with_timeout_sets_and_resets_timeout(): + """Test that execute_with_timeout sets LOCAL timeout before query and resets after success""" + mock_session = AsyncMock(spec=AsyncSession) + mock_result = AsyncMock() + mock_session.execute.return_value = mock_result + + query = text("SELECT * FROM buildings") + timeout_seconds = 60 + params = {"uprn": "12345"} + + with patch("db.settings.DB_QUERY_TIMEOUT", 10): + result = await execute_with_timeout(mock_session, query, timeout_seconds, params) + + # Verify timeout was set before query execution + assert mock_session.execute.call_count == 3 + + # First call: SET LOCAL statement_timeout + first_call = mock_session.execute.call_args_list[0] + set_timeout_query = first_call[0][0] + assert set_timeout_query.text == "SET LOCAL statement_timeout = '60000'" + + # Second call: the actual query with params + second_call = mock_session.execute.call_args_list[1] + assert second_call[0][0] == query + assert second_call[0][1] == params + + # Third call: reset timeout - should reset to query_timeout value + third_call = mock_session.execute.call_args_list[2] + reset_timeout_query = third_call[0][0] + assert reset_timeout_query.text == "SET LOCAL statement_timeout = '10000'" + + assert result == mock_result + + +@pytest.mark.asyncio +async def test_execute_with_timeout_does_not_reset_on_error(): + """Test that execute_with_timeout does NOT try to reset timeout when query fails. + + This is important because when a query times out, the transaction is in a failed state + and cannot execute any more SQL (including the reset). By not attempting the reset, + we avoid InFailedSQLTransactionError. + """ + mock_session = AsyncMock(spec=AsyncSession) + + # Make the second execute call (the actual query) raise an exception + mock_session.execute.side_effect = [ + AsyncMock(), # First call (SET LOCAL timeout) succeeds + Exception("Query failed"), # Second call (query) fails + ] + + query = text("SELECT * FROM buildings") + timeout_seconds = 30 + + with pytest.raises(Exception, match="Query failed"): + await execute_with_timeout(mock_session, query, timeout_seconds) + + # Verify reset was NOT attempted after error (only 2 calls, not 3) + assert mock_session.execute.call_count == 2 diff --git a/unit_tests/test_main.py b/unit_tests/test_main.py new file mode 100644 index 0000000..956a30b --- /dev/null +++ b/unit_tests/test_main.py @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +import asyncio +import json + +import asyncpg.exceptions +import pytest +import sqlalchemy.exc +from fastapi import APIRouter, Depends, FastAPI, status +from fastapi.testclient import TestClient +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession +from unittest.mock import AsyncMock, MagicMock + +import db as db_module +from main import app, query_timeout_handler + + +class MockRequest: + pass + + +def test_query_timeout_handler_returns_504_for_query_canceled(): + original_db_error = MagicMock() + original_db_error.sqlstate = asyncpg.exceptions.QueryCanceledError.sqlstate + + ex = sqlalchemy.exc.DBAPIError("statement", {}, original_db_error, None) + request = MockRequest() + + response = asyncio.run(query_timeout_handler(request, ex)) + + assert response.status_code == status.HTTP_504_GATEWAY_TIMEOUT + assert response.body == b'{"detail":"The request took too long to complete.","error":"QueryCanceledError"}' + + +def test_query_timeout_handler_returns_json_error_response(): + original_db_error = MagicMock() + original_db_error.sqlstate = asyncpg.exceptions.QueryCanceledError.sqlstate + + ex = sqlalchemy.exc.DBAPIError("statement", {}, original_db_error, None) + request = MockRequest() + + response = asyncio.run(query_timeout_handler(request, ex)) + + body = json.loads(response.body) + + assert "detail" in body + assert "error" in body + assert body["detail"] == "The request took too long to complete." + assert body["error"] == "QueryCanceledError" + + +def test_query_timeout_handler_reraises_other_dbapi_errors(): + original_db_error = MagicMock() + original_db_error.sqlstate = "23505" + + ex = sqlalchemy.exc.DBAPIError("statement", {}, original_db_error, None) + request = MockRequest() + + with pytest.raises(sqlalchemy.exc.DBAPIError): + asyncio.run(query_timeout_handler(request, ex)) + + +def test_query_timeout_handler_reraises_when_no_sqlstate(): + original_db_error = MagicMock(spec=[]) + delattr(original_db_error, "sqlstate") if hasattr(original_db_error, "sqlstate") else None + + ex = sqlalchemy.exc.DBAPIError("statement", {}, original_db_error, None) + request = MockRequest() + + with pytest.raises(sqlalchemy.exc.DBAPIError): + asyncio.run(query_timeout_handler(request, ex)) + + +def test_query_timeout_handler_reraises_when_sqlstate_is_none(): + original_db_error = MagicMock() + original_db_error.sqlstate = None + + ex = sqlalchemy.exc.DBAPIError("statement", {}, original_db_error, None) + request = MockRequest() + + with pytest.raises(sqlalchemy.exc.DBAPIError): + asyncio.run(query_timeout_handler(request, ex)) + + +def test_exception_handler_is_registered(): + assert sqlalchemy.exc.DBAPIError in app.exception_handlers + assert app.exception_handlers[sqlalchemy.exc.DBAPIError] == query_timeout_handler + + +def test_query_timeout_integration(): + async def mock_get_db_that_fails(): + mock_session = AsyncMock() + + original_db_error = MagicMock() + original_db_error.sqlstate = asyncpg.exceptions.QueryCanceledError.sqlstate + ex = sqlalchemy.exc.DBAPIError("statement", {}, original_db_error, None) + + mock_session.execute.side_effect = ex + yield mock_session + + test_router = APIRouter() + + @test_router.get("/test-timeout") + async def test_timeout_endpoint(db: AsyncSession = Depends(db_module.get_db)): + await db.execute(text("SELECT * FROM buildings")) + return {"status": "ok"} + + test_app = FastAPI() + test_app.include_router(test_router) + test_app.add_exception_handler(sqlalchemy.exc.DBAPIError, query_timeout_handler) + test_app.dependency_overrides[db_module.get_db] = mock_get_db_that_fails + + client = TestClient(test_app) + + response = client.get("/test-timeout") + + assert response.status_code == status.HTTP_504_GATEWAY_TIMEOUT + data = response.json() + assert data["error"] == "QueryCanceledError" + assert "took too long" in data["detail"] diff --git a/unit_tests/test_routes.py b/unit_tests/test_routes.py index f86007c..269db92 100644 --- a/unit_tests/test_routes.py +++ b/unit_tests/test_routes.py @@ -12,6 +12,11 @@ import api.routes as routes from api.config import get_settings +from fastapi import FastAPI +from unittest.mock import AsyncMock +import db as db_module + + # --- Dummy classes and helper functions for testing --- @@ -67,10 +72,16 @@ def send(self, record): # --- Fixtures for FastAPI endpoints --- @pytest.fixture def client(): - from fastapi import FastAPI + async def mock_get_db(): + mock_db_session = AsyncMock() + mock_db_result = AsyncMock() + mock_db_result.__iter__ = lambda self: iter([]) + mock_db_session.execute.return_value = mock_db_result + yield mock_db_session app = FastAPI() app.include_router(routes.router) + app.dependency_overrides[db_module.get_db] = mock_get_db return TestClient(app) @@ -585,3 +596,183 @@ def __init__( with pytest.raises(HTTPException) as excinfo: routes.post_assessment(dummy_ass) assert excinfo.value.status_code == 404 + + +def test_epc_ratings_invalid_area_level(client): + response = client.get( + "/dashboard/epc-ratings", + params={"area_level": "invalid", "area_names": ["Test"]} + ) + assert response.status_code == 422 + + +def test_epc_ratings_valid_area_level(client, monkeypatch): + mock_result = AsyncMock() + mock_result.__iter__ = lambda self: iter([]) + mock_db = AsyncMock() + mock_db.execute.return_value = mock_result + + async def mock_get_db(): + yield mock_db + + monkeypatch.setattr(db_module, "get_db", mock_get_db) + + response = client.get( + "/dashboard/epc-ratings", + params={"area_level": "region", "area_names": ["Test"]} + ) + assert response.status_code == 200 + + +def test_sap_rating_overtime_invalid_area_level(client, monkeypatch): + mock_result = AsyncMock() + mock_result.__iter__ = lambda self: iter([]) + mock_db = AsyncMock() + mock_db.execute.return_value = mock_result + + async def mock_get_db(): + yield mock_db + + monkeypatch.setattr(db_module, "get_db", mock_get_db) + + response = client.get( + "/dashboard/sap-rating-overtime", + params={"area_level": "invalid", "area_names": ["Test"]} + ) + assert response.status_code == 422 + + +def test_fuel_types_invalid_area_level(client): + response = client.get( + "/dashboard/fuel-types-by-building-type", + params={"area_level": "invalid", "area_names": ["Test"]} + ) + assert response.status_code == 422 + + +def test_building_attributes_invalid_area_level(client): + response = client.get( + "/dashboard/building-attributes-percentage-per-region", + params={"area_level": "invalid", "area_names": ["Test"]} + ) + assert response.status_code == 422 + + +def test_epc_ratings_by_feature_valid_feature(client, monkeypatch): + mock_result = AsyncMock() + mock_result.__iter__ = lambda self: iter([]) + mock_db = AsyncMock() + mock_db.execute.return_value = mock_result + + async def mock_get_db(): + yield mock_db + + monkeypatch.setattr(db_module, "get_db", mock_get_db) + + response = client.get( + "/dashboard/epc-ratings-by-feature", + params={"feature": "glazing_types"} + ) + assert response.status_code == 200 + + +def test_epc_ratings_by_feature_invalid_feature(client): + response = client.get( + "/dashboard/epc-ratings-by-feature", + params={"feature": "invalid_feature"} + ) + assert response.status_code == 422 + + +def test_epc_ratings_by_feature_with_area_filter(client, monkeypatch): + mock_result = AsyncMock() + mock_result.__iter__ = lambda self: iter([]) + mock_db = AsyncMock() + mock_db.execute.return_value = mock_result + + async def mock_get_db(): + yield mock_db + + monkeypatch.setattr(db_module, "get_db", mock_get_db) + + response = client.get( + "/dashboard/epc-ratings-by-feature", + params={ + "feature": "fuel_types", + "area_level": "region", + "area_names": ["East Midlands", "Eastern"] + } + ) + assert response.status_code == 200 + + +def test_epc_ratings_by_feature_invalid_area_level(client): + response = client.get( + "/dashboard/epc-ratings-by-feature", + params={ + "feature": "glazing_types", + "area_level": "invalid", + "area_names": ["Test"] + } + ) + assert response.status_code == 422 + + +def test_epc_ratings_by_area_level_valid(client, monkeypatch): + mock_result = AsyncMock() + mock_result.__iter__ = lambda self: iter([]) + mock_db = AsyncMock() + mock_db.execute.return_value = mock_result + + async def mock_get_db(): + yield mock_db + + monkeypatch.setattr(db_module, "get_db", mock_get_db) + + response = client.get( + "/dashboard/epc-ratings-by-area-level", + params={"group_by_level": "region"} + ) + assert response.status_code == 200 + + +def test_epc_ratings_by_area_level_invalid_group_by(client): + response = client.get( + "/dashboard/epc-ratings-by-area-level", + params={"group_by_level": "invalid"} + ) + assert response.status_code == 422 + + +def test_epc_ratings_by_area_level_with_filter(client, monkeypatch): + mock_result = AsyncMock() + mock_result.__iter__ = lambda self: iter([]) + mock_db = AsyncMock() + mock_db.execute.return_value = mock_result + + async def mock_get_db(): + yield mock_db + + monkeypatch.setattr(db_module, "get_db", mock_get_db) + + response = client.get( + "/dashboard/epc-ratings-by-area-level", + params={ + "group_by_level": "county", + "filter_area_level": "region", + "filter_area_names": ["East Midlands"] + } + ) + assert response.status_code == 200 + + +def test_epc_ratings_by_area_level_invalid_filter_level(client): + response = client.get( + "/dashboard/epc-ratings-by-area-level", + params={ + "group_by_level": "county", + "filter_area_level": "invalid", + "filter_area_names": ["Test"] + } + ) + assert response.status_code == 422 diff --git a/unit_tests/test_utils.py b/unit_tests/test_utils.py new file mode 100644 index 0000000..8984217 --- /dev/null +++ b/unit_tests/test_utils.py @@ -0,0 +1,179 @@ +# SPDX-License-Identifier: Apache-2.0 +# © Crown Copyright 2025. This work has been developed by the National Digital Twin Programme +# and is legally attributed to the Department for Business and Trade (UK) as the governing entity. + +import pytest +from api.utils import ( + validate_geojson_polygon, + is_welsh_region, + expand_wales_region, + collapse_welsh_regions, + WELSH_REGIONS, +) + + +class TestValidateGeojsonPolygon: + def test_valid_polygon(self): + valid_polygon = '{"type": "Polygon", "coordinates": [[[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]}' + validate_geojson_polygon(valid_polygon) + + def test_valid_multipolygon(self): + valid_multipolygon = '{"type": "MultiPolygon", "coordinates": [[[[0, 0], [1, 0], [1, 1], [0, 0]]]]}' + validate_geojson_polygon(valid_multipolygon) + + def test_invalid_json(self): + with pytest.raises(ValueError, match="Invalid JSON"): + validate_geojson_polygon("not valid json") + + def test_json_not_object(self): + with pytest.raises(ValueError, match="GeoJSON must be a JSON object"): + validate_geojson_polygon('["array", "not", "object"]') + + def test_missing_type_field(self): + with pytest.raises(ValueError, match="GeoJSON must have a 'type' field"): + validate_geojson_polygon('{"coordinates": [[0, 0]]}') + + def test_invalid_type(self): + with pytest.raises( + ValueError, match="GeoJSON type must be 'Polygon' or 'MultiPolygon'" + ): + validate_geojson_polygon('{"type": "InvalidType", "coordinates": [[0, 0]]}') + + def test_point_not_allowed(self): + with pytest.raises( + ValueError, match="GeoJSON type must be 'Polygon' or 'MultiPolygon'" + ): + validate_geojson_polygon('{"type": "Point", "coordinates": [0, 0]}') + + def test_linestring_not_allowed(self): + with pytest.raises( + ValueError, match="GeoJSON type must be 'Polygon' or 'MultiPolygon'" + ): + validate_geojson_polygon( + '{"type": "LineString", "coordinates": [[0, 0], [1, 1]]}' + ) + + def test_missing_coordinates_field(self): + with pytest.raises(ValueError, match="GeoJSON must have a 'coordinates' field"): + validate_geojson_polygon('{"type": "Polygon"}') + + def test_coordinates_not_array(self): + with pytest.raises(ValueError, match="GeoJSON 'coordinates' must be an array"): + validate_geojson_polygon( + '{"type": "Polygon", "coordinates": "not an array"}' + ) + + +class TestIsWelshRegion: + def test_identifies_welsh_regions(self): + """Test that all Welsh regions are correctly identified.""" + for region in WELSH_REGIONS: + assert is_welsh_region(region) is True + + def test_rejects_english_regions(self): + """Test that English regions are not identified as Welsh.""" + english_regions = [ + "London English Region", + "North East English Region", + "South East English Region", + ] + for region in english_regions: + assert is_welsh_region(region) is False + + def test_rejects_invalid_names(self): + """Test that invalid region names are not identified as Welsh.""" + assert is_welsh_region("Invalid Region") is False + assert is_welsh_region("") is False + assert is_welsh_region("Wales") is False + + +class TestExpandWalesRegion: + def test_expands_wales_to_all_regions(self): + """Test that 'Wales' is expanded to all 5 Welsh regions.""" + result = expand_wales_region(["Wales"]) + assert len(result) == 5 + assert set(result) == WELSH_REGIONS + + def test_expands_wales_with_other_regions(self): + """Test that 'Wales' is expanded while keeping other regions.""" + result = expand_wales_region(["Wales", "London English Region"]) + assert len(result) == 6 + assert "London English Region" in result + assert "Wales" not in result + assert all(region in result for region in WELSH_REGIONS) + + def test_passes_through_non_wales_regions(self): + """Test that non-Wales regions are passed through unchanged.""" + input_regions = ["London English Region", "North East English Region"] + result = expand_wales_region(input_regions) + assert result == input_regions + + def test_handles_none_input(self): + """Test that None input returns None.""" + assert expand_wales_region(None) is None + + def test_handles_empty_list(self): + """Test that empty list is passed through.""" + assert expand_wales_region([]) == [] + + def test_expands_multiple_wales_instances(self): + """Test that multiple 'Wales' entries are handled correctly.""" + result = expand_wales_region(["Wales", "Wales", "London English Region"]) + # Should only add Welsh regions once + assert len(result) == 6 + assert result.count("London English Region") == 1 + + +class TestCollapseWelshRegions: + def test_collapses_all_welsh_regions_to_wales(self): + """Test that all 5 Welsh regions are collapsed to 'Wales'.""" + input_regions = list(WELSH_REGIONS) + result = collapse_welsh_regions(input_regions) + assert len(result) == 1 + assert result == ["Wales"] + + def test_collapses_partial_welsh_regions(self): + """Test that even partial Welsh regions are collapsed to 'Wales'.""" + input_regions = ["South Wales East PER", "North Wales PER"] + result = collapse_welsh_regions(input_regions) + assert len(result) == 1 + assert result == ["Wales"] + + def test_collapses_welsh_with_english_regions(self): + """Test that Welsh regions are collapsed while keeping English regions.""" + input_regions = list(WELSH_REGIONS) + [ + "London English Region", + "North East English Region", + ] + result = collapse_welsh_regions(input_regions) + assert len(result) == 3 + assert "Wales" in result + assert "London English Region" in result + assert "North East English Region" in result + # Ensure all Welsh regions are removed + assert all(region not in result for region in WELSH_REGIONS) + + def test_returns_sorted_list(self): + """Test that the result is sorted alphabetically.""" + input_regions = [ + "South Wales East PER", + "London English Region", + "North East English Region", + ] + result = collapse_welsh_regions(input_regions) + assert result == sorted(result) + assert result == ["London English Region", "North East English Region", "Wales"] + + def test_passes_through_non_welsh_regions(self): + """Test that lists without Welsh regions are passed through.""" + input_regions = ["London English Region", "North East English Region"] + result = collapse_welsh_regions(input_regions) + assert result == input_regions + + def test_handles_empty_list(self): + """Test that empty list is passed through.""" + assert collapse_welsh_regions([]) == [] + + def test_handles_none_input(self): + """Test that None input returns None.""" + assert collapse_welsh_regions(None) is None