Skip to content

Commit

Permalink
✨ Added scraper for daily workforce absence data
Browse files Browse the repository at this point in the history
  • Loading branch information
lukecarr committed Dec 17, 2021
1 parent f6cb38e commit a7035f2
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 2 deletions.
24 changes: 24 additions & 0 deletions cmd/workforce_absence.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package cmd

import (
"github.com/lukecarr/dfe-attendance/internal/scraper"
"github.com/spf13/cobra"
)

func MakeWorkforceAbsenceCmd() *cobra.Command {
return &cobra.Command{
Use: "workforce",
Short: "Scrapes (daily) workforce absence data for schools",
Run: func(cmd *cobra.Command, args []string) {
url, _ := cmd.Root().Flags().GetString("dfe-url")
out, _ := cmd.Flags().GetString("output")
scraper.WorkforceAbsence(url, out)
},
}
}

func init() {
cmd := MakeWorkforceAbsenceCmd()
cmd.PersistentFlags().String("output", "workforce_absence.csv", "The output CSV file for workforce absence data")
rootCmd.AddCommand(cmd)
}
5 changes: 5 additions & 0 deletions internal/scraper/workforce_absence.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package scraper

func WorkforceAbsence(url, out string) {
Generic(url, "data/table_1d_daily_workforce_absence_in_education_settings_during_covid_19_.csv", out)
}
3 changes: 2 additions & 1 deletion web/pages/api/daily.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ const getCsvText = async () => {
export default async function handler(_: NextApiRequest, res: NextApiResponse) {
const csvText = await getCsvText()

if (process.env.NODE_ENV === 'production') res.setHeader('Cache-Control', `public,max-age=${60 * 60 * 24},immutable`)

res.status(200)
.setHeader('Content-Type', 'text/csv')
.setHeader('Content-Disposition', 'attachment;filename=daily_attendance.csv')
.setHeader('Cache-Control', `public,max-age=${60 * 60 * 24},immutable`)
.send(csvText)
}
29 changes: 29 additions & 0 deletions web/pages/api/workforce.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import type { NextApiRequest, NextApiResponse } from 'next'
import axios from 'axios'
import * as cheerio from 'cheerio'
import AdmZip from 'adm-zip'

const getCsvText = async () => {
const { data } = await axios.get('https://explore-education-statistics.service.gov.uk/find-statistics/attendance-in-education-and-early-years-settings-during-the-coronavirus-covid-19-outbreak')

const $ = cheerio.load(data)

const downloadUrl = $('a[href]').filter(function () { return $(this).text() === 'Download all data' }).first().attr('href')

const { data: zipData } = await axios.get(downloadUrl, { responseType: 'arraybuffer' })
const zip = new AdmZip(zipData)
const csvText = zip.readAsText('data/table_1d_daily_workforce_absence_in_education_settings_during_covid_19_.csv', 'utf8')

return csvText
}

export default async function handler(_: NextApiRequest, res: NextApiResponse) {
const csvText = await getCsvText()

if (process.env.NODE_ENV === 'production') res.setHeader('Cache-Control', `public,max-age=${60 * 60 * 24},immutable`)

res.status(200)
.setHeader('Content-Type', 'text/csv')
.setHeader('Content-Disposition', 'attachment;filename=daily_workforce_absence.csv')
.send(csvText)
}
5 changes: 4 additions & 1 deletion web/pages/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@ const Home: FunctionComponent & { title?: string } = () => {
</section>
<div className="mx-auto w-full px-4 sm:px-6 md:px-8 max-w-screen-xl" id="endpoints">
<div className="space-y-6 md:grid md:space-y-0 grid-cols-6 grid-rows-3 gap-6">
<Endpoint className="bg-gradient-to-tr from-blue-400 to-blue-700" title="Daily Attendance" href="daily" width={2} height={2} icon={faCalendarDay}>
<Endpoint className="bg-gradient-to-tr from-blue-400 to-blue-700" title="Daily Attendance" href="daily" width={3} height={2} icon={faCalendarDay}>
Daily attendance data for schools from Sep 2020 to present!
</Endpoint>
<Endpoint className="bg-gradient-to-tr from-orange-400 to-red-700" title="Workforce Absence" href="workforce" width={3} height={2} icon={faCalendarDay}>
Daily workforce absence data for schools from Sep 2020 to present!
</Endpoint>
</div>
</div>
</>
Expand Down

1 comment on commit a7035f2

@vercel
Copy link

@vercel vercel bot commented on a7035f2 Dec 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.