Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions db/scripts/importer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
appropriate_body_csv = Rails.root.join("tmp/import/appropriatebody.csv") # 537
teachers_csv = Rails.root.join("tmp/import/teachers.csv") # 1_799_170
induction_period_csv = Rails.root.join("tmp/import/inductionperiods.csv") # 829_189
dfe_sign_in_mapping_csv = Rails.root.join("tmp/import/dfe-sign-in-mappings.csv") # 86
admin_csv = Rails.root.join("tmp/import/admins.csv") # 30
cutoff_csv = Rails.root.join("tmp/import/old-abs.csv") # 433

# AppropriateBodies::Importers::Importer.new(
# appropriate_body_csv:,
# teachers_csv:,
# induction_period_csv:,
# dfe_sign_in_mapping_csv:,
# admin_csv:,
# cutoff_csv:
# ).import!

# https://teacher-cpd.design-history.education.gov.uk/ecf-v2/fixing-dqt-data/

# AppropriateBody.count # 532 (5 omitted)
# InductionPeriod.count # 87_271 (741_918 omitted)
# Teacher.count # 75_082 (1_724_088 omitted)
# Event.count # 126_485

# Need to import all inductions that have an end date before `18 Feb 2025`

# import_boundary = Date.parse("2025-02-18")

# InductionPeriod.count #=> 87271
# InductionPeriod.finished_before(import_boundary).count #=> 37643
# InductionPeriod.ongoing.count #=> 49628

# 49628 + 37643

ab_csv = CSV.read(appropriate_body_csv, headers: true)
csv_ab_names = ab_csv.map { |r| r["name"] }

current_ab_names = AppropriateBody.all.map(&:name)

debugger

csv_ab_names - current_ab_names
122 changes: 122 additions & 0 deletions lib/appropriate_bodies/importers/appropriate_body_importer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
require "csv"

module AppropriateBodies::Importers
class AppropriateBodyImporter
Copy link
Contributor

@craigmdavidson craigmdavidson Jan 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't really an Importer, as it doesn't write anything? The import is done exclusively by the Importer class.

This class just provides the data to be imported, as such should, it might be more intention revealing if this class was named something like AppropriateBodyData? or if we wanted to be totally explicit AppropriateBodyFilteredImportData?

IMPORT_ERROR_LOG = "tmp/appropriate_body_import.log"

Row = Struct.new(:dqt_id, :name, :dfe_sign_in_organisation_id, :local_authority_code, :establishment_number) do
def to_h
{
name:,
dqt_id:,
# establishment_number:,
# local_authority_code:,
dfe_sign_in_organisation_id:
}
end
end

attr_accessor :logger

def initialize(filename, wanted_legacy_ids, dfe_sign_in_mapping_filename, csv: nil, dfe_sign_in_mapping_csv: nil, logger: nil)
@csv = csv || CSV.read(filename, headers: true)
@wanted_legacy_ids = wanted_legacy_ids

@mapping_csv = dfe_sign_in_mapping_csv || CSV.read(dfe_sign_in_mapping_filename, headers: true)

File.open(IMPORT_ERROR_LOG, "w") { |f| f.truncate(0) }
@logger = logger || Logger.new(IMPORT_ERROR_LOG, File::CREAT)
end

def rows
@csv.map { |row|
next unless row["id"].in?(@wanted_legacy_ids)

Row.new(**build(row))
}.compact
end

private

def build(row)
# id , name , dfe_sign_in_organisation_id, local_authority_code, establishment_number
# 025e61e7-ec32-eb11-a813-000d3a228dfc, Test Appropriate Body, , 1000 ,
# 69748633-ed32-eb11-a813-000d3a228dfc, ETS Test Organisation, 1234 , ,

{
name: select_name(row).strip,
dfe_sign_in_organisation_id: select_dfe_sign_in_organsation_id(row),
dqt_id: row["id"],

# **extract_local_authority_code_and_establishment_number(row)
}
end

def select_name(row)
if mappings_by_legacy_id.key?(row["id"])
mappings_by_legacy_id[row["id"]].fetch("appropriate_body_name")
else
row["name"]
end
end

def select_dfe_sign_in_organsation_id(row)
if mappings_by_legacy_id.key?(row["id"])
mappings_by_legacy_id[row["id"]].fetch("dfe_sign_in_organisation_id")
end
end

def mappings_by_legacy_id
@mappings_by_legacy_id ||= @mapping_csv.map(&:to_h).index_by { |r| r["dqt_id"] }
end

def mappings_by_ab_name
@mappings_by_ab_name ||= @mapping_csv.map(&:to_h).index_by { |r| r["appropriate_body_name"] }
end

def extract_local_authority_code_and_establishment_number(row)
local_authority_code = row["local_authority_code"]

# the local authority code contains a mix of data in various
# formats, e.g.,: # 51, 052, 101//101, 202, 885/5403
#
# 3 numbers is a local authority code (https://www.get-information-schools.service.gov.uk/Guidance/LaNameCodes)
# e.g., 123
#
# 4 numbers is a establishment number
# e.g., 1234
#
# 3 numbers, a slash, followed by 4 numbers is the local authority code combined
# with with the establishment number to form the establishment ID (aka the 'DfE number')
# e.g., 123/1234
params = case local_authority_code
when %r{\A\d{3}\z}
{
local_authority_code:
}
when %r{\A\d{4}\z}
{
establishment_number: local_authority_code
}
when %r{\A\d{3}/\d{4}\z}
{
local_authority_code: local_authority_code[0..2],
establishment_number: local_authority_code[4..8]
}
when %r{\A\d{7}\z}
{
local_authority_code: local_authority_code[0..2],
establishment_number: local_authority_code[3..7]
}
else
logger.error "#########################"
logger.error "Invalid local authority code"
logger.error "Value: #{local_authority_code}"

{}
end

params.transform_values(&:to_i)
end
end
end
124 changes: 124 additions & 0 deletions lib/appropriate_bodies/importers/importer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
module AppropriateBodies::Importers
class Importer
def initialize(appropriate_body_csv:, teachers_csv:, induction_period_csv:, dfe_sign_in_mapping_csv:, admin_csv:, cutoff_csv:)
@induction_periods_grouped_by_trn = InductionPeriodImporter.new(induction_period_csv, cutoff_csv).periods_by_trn

@trns_with_induction_periods = @induction_periods_grouped_by_trn.keys
@teacher_importer_rows = TeacherImporter.new(teachers_csv, @trns_with_induction_periods).rows_with_wanted_statuses

@active_abs = @induction_periods_grouped_by_trn.flat_map { |_trn, ips| ips.map(&:legacy_appropriate_body_id) }.uniq
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like @active_absis @unique_legacy_appropriate_bodies?

@ab_importer_rows = AppropriateBodyImporter.new(appropriate_body_csv, @active_abs, dfe_sign_in_mapping_csv).rows

@admin_csv = CSV.read(admin_csv, headers: true)
end

def import!
import_ab_rows
import_teacher_rows
import_induction_periods_rows
import_induction_extensions

update_event_titles
# insert_admins
end

private

def teacher_trn_to_id
@teacher_trn_to_id ||= Teacher.all.select(:id, :trn).each_with_object({}) do |t, h|
h[t[:trn]] = t[:id]
end
end

def ab_legacy_id_to_id
@ab_legacy_id_to_id ||= AppropriateBody.all.select(:id, :dqt_id).each_with_object({}) do |ab, h|
h[ab[:dqt_id]] = ab[:id]
end
end

def import_ab_rows
Rails.logger.info("Active appropriate bodies: #{@active_abs.count}")
AppropriateBody.insert_all!(@ab_importer_rows.select { |r| r.dqt_id.in?(@active_abs) }.map(&:to_h))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we not be filtering the @ab_importer_rows in the AppropriateBodyImporter as it already has the@active_abs to filter on?

Rails.logger.info("Appropriate bodies inserted: #{AppropriateBody.count}")
end

def import_teacher_rows
Rails.logger.info("Active Teachers: #{@teacher_importer_rows.count}")
Teacher.insert_all!(@teacher_importer_rows.map(&:to_h))
Rails.logger.info("Teachers inserted: #{Teacher.count}")
end

def import_induction_periods_rows
induction_period_rows = []

@induction_periods_grouped_by_trn.slice(*teacher_trn_to_id.keys).each do |trn, induction_periods|
induction_periods.each do |ip|
begin
ip.teacher_id = teacher_trn_to_id.fetch(trn)
rescue KeyError
Rails.logger.error("No teacher found with trn: #{trn}")
next
end

begin
ip.appropriate_body_id = ab_legacy_id_to_id.fetch(ip.legacy_appropriate_body_id)
rescue KeyError
Rails.logger.error("No appropriate body found with legacy_id: #{ip.legacy_appropriate_body_id}")
next
end

induction_period_rows << ip
end
end

induction_period_ids = InductionPeriod.insert_all!(induction_period_rows.map(&:to_record), returning: [:id])
Rails.logger.info("Induction periods inserted: #{InductionPeriod.count}")

# FIXME: how do we set titles?
# can do it by executing a single line of SQL after insert
induction_period_rows.each_with_index { |row, i| row.id = induction_period_ids[i]["id"] }

events = induction_period_rows.flat_map(&:events).flatten

Event.insert_all(events)
end

def import_induction_extensions
induction_extensions = @teacher_importer_rows.select { |tir| tir.extension_terms.present? }.map do |row|
{
teacher_id: teacher_trn_to_id.fetch(row.trn),
number_of_terms: row.extension_terms
}
end

InductionExtension.insert_all!(induction_extensions)
Rails.logger.info("Induction extensions inserted: #{InductionExtension.count}")
end

def update_event_titles
statements = [<<~CLAIM, <<~RELEASE]
update events e
set heading = t.trs_first_name || ' ' || t.trs_last_name || ' was claimed by ' || ab.name
from teachers t, appropriate_bodies ab
where e.event_type = 'induction_period_opened'
and e.teacher_id = t.id
and e.appropriate_body_id = ab.id;
CLAIM
update events e
set heading = t.trs_first_name || ' ' || t.trs_last_name || ' was released by ' || ab.name
from teachers t, appropriate_bodies ab
where e.event_type = 'induction_period_closed'
and e.teacher_id = t.id
and e.appropriate_body_id = ab.id;
RELEASE

ActiveRecord::Base.connection.execute(statements.join(";"))
end

# def insert_admins
# @admin_csv.each do |admin|
# User.create(email: admin["email"], name: admin["name"])
# end
# end
end
end
Loading
Loading