diff --git a/cmd/workforce_absence.go b/cmd/workforce_absence.go new file mode 100644 index 0000000..c8046e1 --- /dev/null +++ b/cmd/workforce_absence.go @@ -0,0 +1,24 @@ +package cmd + +import ( + "github.com/lukecarr/dfe-attendance/internal/scraper" + "github.com/spf13/cobra" +) + +func MakeWorkforceAbsenceCmd() *cobra.Command { + return &cobra.Command{ + Use: "workforce", + Short: "Scrapes (daily) workforce absence data for schools", + Run: func(cmd *cobra.Command, args []string) { + url, _ := cmd.Root().Flags().GetString("dfe-url") + out, _ := cmd.Flags().GetString("output") + scraper.WorkforceAbsence(url, out) + }, + } +} + +func init() { + cmd := MakeWorkforceAbsenceCmd() + cmd.PersistentFlags().String("output", "workforce_absence.csv", "The output CSV file for workforce absence data") + rootCmd.AddCommand(cmd) +} diff --git a/internal/scraper/workforce_absence.go b/internal/scraper/workforce_absence.go new file mode 100644 index 0000000..da2d555 --- /dev/null +++ b/internal/scraper/workforce_absence.go @@ -0,0 +1,5 @@ +package scraper + +func WorkforceAbsence(url, out string) { + Generic(url, "data/table_1d_daily_workforce_absence_in_education_settings_during_covid_19_.csv", out) +} diff --git a/web/pages/api/daily.ts b/web/pages/api/daily.ts index acb7c98..3d04915 100644 --- a/web/pages/api/daily.ts +++ b/web/pages/api/daily.ts @@ -20,9 +20,10 @@ const getCsvText = async () => { export default async function handler(_: NextApiRequest, res: NextApiResponse) { const csvText = await getCsvText() + if (process.env.NODE_ENV === 'production') res.setHeader('Cache-Control', `public,max-age=${60 * 60 * 24},immutable`) + res.status(200) .setHeader('Content-Type', 'text/csv') .setHeader('Content-Disposition', 'attachment;filename=daily_attendance.csv') - .setHeader('Cache-Control', `public,max-age=${60 * 60 * 24},immutable`) .send(csvText) } diff --git a/web/pages/api/workforce.ts b/web/pages/api/workforce.ts new file mode 100644 index 0000000..cd4a835 --- /dev/null +++ b/web/pages/api/workforce.ts @@ -0,0 +1,29 @@ +import type { NextApiRequest, NextApiResponse } from 'next' +import axios from 'axios' +import * as cheerio from 'cheerio' +import AdmZip from 'adm-zip' + +const getCsvText = async () => { + const { data } = await axios.get('https://explore-education-statistics.service.gov.uk/find-statistics/attendance-in-education-and-early-years-settings-during-the-coronavirus-covid-19-outbreak') + + const $ = cheerio.load(data) + + const downloadUrl = $('a[href]').filter(function () { return $(this).text() === 'Download all data' }).first().attr('href') + + const { data: zipData } = await axios.get(downloadUrl, { responseType: 'arraybuffer' }) + const zip = new AdmZip(zipData) + const csvText = zip.readAsText('data/table_1d_daily_workforce_absence_in_education_settings_during_covid_19_.csv', 'utf8') + + return csvText +} + +export default async function handler(_: NextApiRequest, res: NextApiResponse) { + const csvText = await getCsvText() + + if (process.env.NODE_ENV === 'production') res.setHeader('Cache-Control', `public,max-age=${60 * 60 * 24},immutable`) + + res.status(200) + .setHeader('Content-Type', 'text/csv') + .setHeader('Content-Disposition', 'attachment;filename=daily_workforce_absence.csv') + .send(csvText) +} diff --git a/web/pages/index.tsx b/web/pages/index.tsx index ae16d48..7b58854 100644 --- a/web/pages/index.tsx +++ b/web/pages/index.tsx @@ -24,9 +24,12 @@ const Home: FunctionComponent & { title?: string } = () => {
- + Daily attendance data for schools from Sep 2020 to present! + + Daily workforce absence data for schools from Sep 2020 to present! +