diff --git a/definitions/output/reports/tech_crux.js b/definitions/output/reports/tech_crux.js index 4f20a01..6b16ae6 100644 --- a/definitions/output/reports/tech_crux.js +++ b/definitions/output/reports/tech_crux.js @@ -30,6 +30,31 @@ CREATE TEMP FUNCTION IS_NON_ZERO( ) RETURNS BOOL AS ( good + needs_improvement + poor > 0 ); + +CREATE TEMP FUNCTION get_passed_audits(lighthouse JSON) +RETURNS ARRAY<STRUCT< + category STRING, + id STRING +>> +LANGUAGE js AS """ +const results = [] + +for (const category of Object.keys(lighthouse?.categories ? lighthouse.categories : {})) { + for (const audit of lighthouse.categories[category].auditRefs) { + if ( + lighthouse.audits[audit.id].score === 1 && + !['metrics', 'hidden'].includes(audit.group) + ) { + results.push({ + category, + id: audit.id + }) + } + } +} + +return results; +"""; `).query(ctx => ` WITH pages AS ( SELECT @@ -172,7 +197,6 @@ technologies AS ( WHERE tech.technology IS NOT NULL - UNION ALL SELECT @@ -183,26 +207,7 @@ technologies AS ( FROM pages ), -categories AS ( - SELECT - tech.technology, - ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category - FROM pages, - UNNEST(technologies) AS tech, - UNNEST(tech.categories) AS category - GROUP BY technology - - UNION ALL - - SELECT - 'ALL' AS technology, - ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category - FROM pages, - UNNEST(technologies) AS tech, - UNNEST(tech.categories) AS category -), - -lab_metrics AS ( +lab_data AS ( SELECT client, page, @@ -213,36 +218,168 @@ lab_metrics AS ( SAFE.FLOAT64(lighthouse.categories.accessibility.score) AS accessibility, SAFE.FLOAT64(lighthouse.categories['best-practices'].score) AS best_practices, SAFE.FLOAT64(lighthouse.categories.performance.score) AS performance, - SAFE.FLOAT64(lighthouse.categories.pwa.score) AS pwa, SAFE.FLOAT64(lighthouse.categories.seo.score) AS seo FROM pages ), -lab_data AS ( +audits AS ( + SELECT DISTINCT + client, + root_page, + technology, + version, + audit_category, + audit_id + FROM ( + SELECT + client, + page, + root_page, + audits.category AS audit_category, + audits.id AS audit_id + FROM pages + INNER JOIN UNNEST(get_passed_audits(pages.lighthouse)) AS audits + ) AS audits_data + INNER JOIN technologies + USING (client, page) +), + +lab_metrics AS ( SELECT client, root_page, technology, version, - ANY_VALUE(category) AS category, AVG(bytesTotal) AS bytesTotal, AVG(bytesJS) AS bytesJS, AVG(bytesImg) AS bytesImg, AVG(accessibility) AS accessibility, AVG(best_practices) AS best_practices, AVG(performance) AS performance, - AVG(pwa) AS pwa, AVG(seo) AS seo - FROM lab_metrics + FROM lab_data INNER JOIN technologies USING (client, page) - INNER JOIN categories - USING (technology) GROUP BY client, root_page, technology, version +), + +origins_summary AS ( + SELECT + geo, + client, + rank, + technology, + version, + COUNT(DISTINCT root_page) AS origins + FROM lab_metrics + INNER JOIN crux + USING (client, root_page) + GROUP BY + geo, + client, + rank, + technology, + version + +), + + +audits_summary AS ( + SELECT + geo, + client, + rank, + technology, + version, + ARRAY_AGG(STRUCT( + audit_category AS category, + audit_id AS id, + SAFE_DIVIDE(audits.origins, origins_summary.origins) AS pass_rate + )) AS audits + FROM ( + SELECT + geo, + client, + rank, + technology, + version, + audit_category, + audit_id, + COUNT(DISTINCT root_page) AS origins + FROM audits + INNER JOIN crux + USING (client, root_page) + GROUP BY + geo, + client, + rank, + technology, + version, + audit_category, + audit_id + ) AS audits + LEFT JOIN origins_summary + USING (geo, client, rank, technology, version) + GROUP BY + geo, + client, + rank, + technology, + version +), + +other_summary AS ( + SELECT + geo, + client, + rank, + technology, + version, + + STRUCT( + COUNTIF(good_fid) AS origins_with_good_fid, + COUNTIF(good_cls) AS origins_with_good_cls, + COUNTIF(good_lcp) AS origins_with_good_lcp, + COUNTIF(good_fcp) AS origins_with_good_fcp, + COUNTIF(good_ttfb) AS origins_with_good_ttfb, + COUNTIF(good_inp) AS origins_with_good_inp, + COUNTIF(any_fid) AS origins_with_any_fid, + COUNTIF(any_cls) AS origins_with_any_cls, + COUNTIF(any_lcp) AS origins_with_any_lcp, + COUNTIF(any_fcp) AS origins_with_any_fcp, + COUNTIF(any_ttfb) AS origins_with_any_ttfb, + COUNTIF(any_inp) AS origins_with_any_inp, + COUNTIF(good_cwv) AS origins_with_good_cwv, + COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv, + SAFE_DIVIDE(COUNTIF(good_cwv), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv + ) AS crux, + + STRUCT( + SAFE_CAST(APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS NUMERIC) AS accessibility, + SAFE_CAST(APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS NUMERIC) AS practices, + SAFE_CAST(APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS NUMERIC) AS performance, + SAFE_CAST(APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS NUMERIC) AS seo + ) AS median_lighthouse_score, + + STRUCT( + SAFE_CAST(APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS INT64) AS total, + SAFE_CAST(APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS INT64) AS js, + SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS images + ) AS median_page_weight_bytes + + FROM lab_metrics + INNER JOIN crux + USING (client, root_page) + GROUP BY + geo, + client, + rank, + technology, + version ) SELECT @@ -252,44 +389,16 @@ SELECT rank, technology, version, - COUNT(DISTINCT root_page) AS origins, - - # CrUX data - COUNTIF(good_fid) AS origins_with_good_fid, - COUNTIF(good_cls) AS origins_with_good_cls, - COUNTIF(good_lcp) AS origins_with_good_lcp, - COUNTIF(good_fcp) AS origins_with_good_fcp, - COUNTIF(good_ttfb) AS origins_with_good_ttfb, - COUNTIF(good_inp) AS origins_with_good_inp, - COUNTIF(any_fid) AS origins_with_any_fid, - COUNTIF(any_cls) AS origins_with_any_cls, - COUNTIF(any_lcp) AS origins_with_any_lcp, - COUNTIF(any_fcp) AS origins_with_any_fcp, - COUNTIF(any_ttfb) AS origins_with_any_ttfb, - COUNTIF(any_inp) AS origins_with_any_inp, - COUNTIF(good_cwv) AS origins_with_good_cwv, - COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv, - SAFE_DIVIDE(COUNTIF(good_cwv), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv, - - # Lighthouse data - SAFE_CAST(APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_accessibility, - SAFE_CAST(APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_best_practices, - SAFE_CAST(APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_performance, - SAFE_CAST(APPROX_QUANTILES(pwa, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_pwa, - SAFE_CAST(APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_seo, - # Page weight stats - SAFE_CAST(APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS INT64) AS median_bytes_total, - SAFE_CAST(APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS INT64) AS median_bytes_js, - SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image - -FROM lab_data -INNER JOIN crux -USING (client, root_page) -GROUP BY - geo, - client, - rank, - technology, - version + # Metrics + origins, + crux, + median_lighthouse_score, + median_page_weight_bytes, + audits +FROM origins_summary +LEFT JOIN other_summary +USING (geo, client, rank, technology, version) +LEFT JOIN audits_summary +USING (geo, client, rank, technology, version) `) diff --git a/definitions/output/reports/tech_report_audits.js b/definitions/output/reports/tech_report_audits.js new file mode 100644 index 0000000..87fde69 --- /dev/null +++ b/definitions/output/reports/tech_report_audits.js @@ -0,0 +1,89 @@ +const pastMonth = constants.fnPastMonth(constants.currentMonth) + +publish('tech_report_audits', { + schema: 'reports', + type: 'incremental', + protected: true, + bigquery: { + partitionBy: 'date', + clusterBy: ['rank', 'geo'] + }, + tags: ['tech_report'] +}).preOps(ctx => ` +CREATE TEMP FUNCTION GET_AUDITS( + records ARRAY<STRUCT< + client STRING, + audits ARRAY<STRUCT< + category STRING, + id STRING, + pass_rate FLOAT64 + >> + >> +) +RETURNS ARRAY<STRUCT< + category STRING, + id STRING, + mobile STRUCT< + pass_rate FLOAT64 + >, + desktop STRUCT< + pass_rate FLOAT64 + > +>> +LANGUAGE js AS ''' +// Create a map to accumulate audits based on a unique key (category + id). +var auditMap = {}; + +// Loop over each record. +records.forEach(function(record) { + // Loop over each audit in the record. + record.audits.forEach(function(audit) { + // Create a unique key for combining audits. + var key = audit.category + '|' + audit.id; + // Initialize the audit in the map if not present. + if (!auditMap[key]) { + auditMap[key] = { + category: audit.category, + id: audit.id, + mobile: { pass_rate: 0 }, + desktop: { pass_rate: 0 } + }; + } + // Add the pass_rate to the proper client type. + if (record.client === 'mobile') { + auditMap[key].mobile.pass_rate += audit.pass_rate; + } else if (record.client === 'desktop') { + auditMap[key].desktop.pass_rate += audit.pass_rate; + } + }); +}); + +// Convert the map into an array of audits. +return Object.keys(auditMap).map(function(key) { + return auditMap[key]; +}); +'''; + +DELETE FROM ${ctx.self()} +WHERE date = '${pastMonth}'; +`).query(ctx => ` +/* {"dataform_trigger": "tech_report_complete", "date": "${pastMonth}", "name": "audits", "type": "report"} */ +SELECT + date, + geo, + rank, + technology, + version, + GET_AUDITS(ARRAY_AGG(STRUCT( + client, + audits + ))) AS audits +FROM ${ctx.ref('reports', 'tech_crux')} +WHERE date = '${pastMonth}' +GROUP BY + date, + geo, + rank, + technology, + version +`) diff --git a/definitions/output/reports/tech_report_core_web_vitals.js b/definitions/output/reports/tech_report_core_web_vitals.js index 2d26f90..cb663d1 100644 --- a/definitions/output/reports/tech_report_core_web_vitals.js +++ b/definitions/output/reports/tech_report_core_web_vitals.js @@ -77,20 +77,20 @@ SELECT version, GET_VITALS(ARRAY_AGG(STRUCT( client, - origins_with_good_fid, - origins_with_good_cls, - origins_with_good_lcp, - origins_with_good_fcp, - origins_with_good_ttfb, - origins_with_good_inp, - origins_with_any_fid, - origins_with_any_cls, - origins_with_any_lcp, - origins_with_any_fcp, - origins_with_any_ttfb, - origins_with_any_inp, - origins_with_good_cwv, - origins_eligible_for_cwv + crux.origins_with_good_fid, + crux.origins_with_good_cls, + crux.origins_with_good_lcp, + crux.origins_with_good_fcp, + crux.origins_with_good_ttfb, + crux.origins_with_good_inp, + crux.origins_with_any_fid, + crux.origins_with_any_cls, + crux.origins_with_any_lcp, + crux.origins_with_any_fcp, + crux.origins_with_any_ttfb, + crux.origins_with_any_inp, + crux.origins_with_good_cwv, + crux.origins_eligible_for_cwv ))) AS vitals FROM ${ctx.ref('reports', 'tech_crux')} WHERE date = '${pastMonth}' diff --git a/definitions/output/reports/tech_report_lighthouse.js b/definitions/output/reports/tech_report_lighthouse.js index 504686c..8d438ec 100644 --- a/definitions/output/reports/tech_report_lighthouse.js +++ b/definitions/output/reports/tech_report_lighthouse.js @@ -16,9 +16,9 @@ CREATE TEMPORARY FUNCTION GET_LIGHTHOUSE( median_lighthouse_score_accessibility NUMERIC, median_lighthouse_score_best_practices NUMERIC, median_lighthouse_score_performance NUMERIC, - median_lighthouse_score_pwa NUMERIC, median_lighthouse_score_seo NUMERIC ->>) + >> +) RETURNS ARRAY<STRUCT< name STRING, desktop STRUCT< @@ -26,13 +26,13 @@ RETURNS ARRAY<STRUCT< >, mobile STRUCT< median_score FLOAT64 ->>> + > +>> LANGUAGE js AS ''' const METRIC_MAP = { accessibility: 'median_lighthouse_score_accessibility', best_practices: 'median_lighthouse_score_best_practices', performance: 'median_lighthouse_score_performance', - pwa: 'median_lighthouse_score_pwa', seo: 'median_lighthouse_score_seo', } @@ -63,11 +63,10 @@ SELECT version, GET_LIGHTHOUSE(ARRAY_AGG(STRUCT( client, - median_lighthouse_score_accessibility, - median_lighthouse_score_best_practices, - median_lighthouse_score_performance, - median_lighthouse_score_pwa, - median_lighthouse_score_seo + median_lighthouse_score.accessibility, + median_lighthouse_score.best_practices, + median_lighthouse_score.performance, + median_lighthouse_score.seo ))) AS lighthouse FROM ${ctx.ref('reports', 'tech_crux')} WHERE date = '${pastMonth}' diff --git a/definitions/output/reports/tech_report_page_weight.js b/definitions/output/reports/tech_report_page_weight.js index 50b1ef2..b115097 100644 --- a/definitions/output/reports/tech_report_page_weight.js +++ b/definitions/output/reports/tech_report_page_weight.js @@ -57,10 +57,10 @@ SELECT version, GET_PAGE_WEIGHT(ARRAY_AGG(STRUCT( client, - median_bytes_total, - median_bytes_js, - median_bytes_image - ))) AS pageWeight + median_page_weight_bytes.total, + median_page_weight_bytes.js, + median_page_weight_bytes.images + ))) AS page_weight FROM ${ctx.ref('reports', 'tech_crux')} WHERE date = '${pastMonth}' GROUP BY