diff --git a/db/postgres.go b/db/postgres.go index 0ef3add2..d4a6f045 100644 --- a/db/postgres.go +++ b/db/postgres.go @@ -112,16 +112,6 @@ func (p *PostgreSQL) CreateCompanies(batch [][]any) error { return nil } -// CreateIndex runs after all the data is creates. It drops duplicates and -// create a primary key on the ID field. -func (p *PostgreSQL) CreateIndex() error { - log.Output(1, "Creating indexes…") - if _, err := p.pool.Exec(context.Background(), p.sql["create_index"]); err != nil { - return fmt.Errorf("error creating index with: %s\n%w", p.sql["create_index"], err) - } - return nil -} - // GetCompany returns the JSON of a company based on a CNPJ number. func (p *PostgreSQL) GetCompany(id string) (string, error) { n, err := strconv.ParseInt(id, 10, 0) @@ -151,7 +141,7 @@ func (p *PostgreSQL) GetCompany(id string) (string, error) { // disables autovacuum on PostgreSQL. func (p *PostgreSQL) PreLoad() error { if _, err := p.pool.Exec(context.Background(), p.sql["pre_load"]); err != nil { - return fmt.Errorf("error disabling autovacuum with: %s\n%w", p.sql["autovacuum"], err) + return fmt.Errorf("error during pre load: %s\n%w", p.sql["pre_load"], err) } return nil } @@ -160,7 +150,7 @@ func (p *PostgreSQL) PreLoad() error { // autovacuum on PostgreSQL. func (p *PostgreSQL) PostLoad() error { if _, err := p.pool.Exec(context.Background(), p.sql["post_load"]); err != nil { - return fmt.Errorf("error re-renabling autovacuum with: %s\n%w", p.sql["autovacuum"], err) + return fmt.Errorf("error during post load: %s\n%w", p.sql["autovacuum"], err) } return nil } diff --git a/db/postgres/create.sql b/db/postgres/create.sql index 876c865a..c034d2d8 100644 --- a/db/postgres/create.sql +++ b/db/postgres/create.sql @@ -1,4 +1,5 @@ -CREATE UNLOGGED TABLE IF NOT EXISTS {{ .CompanyTableFullName }} ( +CREATE TABLE IF NOT EXISTS {{ .CompanyTableFullName }} ( + tmp_pk SERIAL PRIMARY KEY, {{ .IDFieldName }} bigint NOT NULL, {{ .JSONFieldName }} jsonb NOT NULL ); diff --git a/db/postgres/create_index.sql b/db/postgres/post_load.sql similarity index 57% rename from db/postgres/create_index.sql rename to db/postgres/post_load.sql index 2f6a0a05..93060d2f 100644 --- a/db/postgres/create_index.sql +++ b/db/postgres/post_load.sql @@ -16,5 +16,7 @@ WHERE ctid IN ( ); DROP INDEX idx_remove_duplicates; - -ALTER TABLE cnpj ADD PRIMARY KEY (id); +ALTER TABLE {{ .CompanyTableFullName }} DROP COLUMN tmp_pk CASCADE; +CREATE UNIQUE INDEX {{ .CompanyTableName }}_pk ON {{ .CompanyTableFullName }} ({{ .IDFieldName }}); +ALTER TABLE cnpj ADD PRIMARY KEY USING INDEX {{ .CompanyTableName }}_pk; +ALTER TABLE {{ .CompanyTableFullName }} SET LOGGED; diff --git a/db/postgres/pre_load.sql b/db/postgres/pre_load.sql new file mode 100644 index 00000000..41dc8f32 --- /dev/null +++ b/db/postgres/pre_load.sql @@ -0,0 +1 @@ +ALTER TABLE {{ .CompanyTableFullName }} SET UNLOGGED; diff --git a/db/postgres_test.go b/db/postgres_test.go index 9e5967f8..aee1a631 100644 --- a/db/postgres_test.go +++ b/db/postgres_test.go @@ -32,14 +32,17 @@ func TestPostgresDB(t *testing.T) { if err := pg.CreateTable(); err != nil { t.Errorf("expected no error creating the table, got %s", err) } + if err := pg.PreLoad(); err != nil { + t.Errorf("expected no error pre load, got %s", err) + } if err := pg.CreateCompanies([][]any{{id, json}}); err != nil { t.Errorf("expected no error saving a company, got %s", err) } if err := pg.CreateCompanies([][]any{{id, json}}); err != nil { t.Errorf("expected no error saving a duplicated company, got %s", err) } - if err := pg.CreateIndex(); err != nil { - t.Errorf("expected no error creating index, got %s", err) + if err := pg.PostLoad(); err != nil { + t.Errorf("expected no error post load, got %s", err) } got, err := pg.GetCompany("33683111000280") if err != nil { diff --git a/transform/transform.go b/transform/transform.go index 18cfce65..495d66b5 100644 --- a/transform/transform.go +++ b/transform/transform.go @@ -18,8 +18,9 @@ const MaxParallelDBQueries = 8 const BatchSize = 8192 type database interface { + PreLoad() error CreateCompanies([][]any) error - CreateIndex() error + PostLoad() error MetaSave(string, string) error } diff --git a/transform/venues.go b/transform/venues.go index f2454fce..9f33c583 100644 --- a/transform/venues.go +++ b/transform/venues.go @@ -123,6 +123,9 @@ func (t *venuesTask) run(m int) error { if err := t.bar.RenderBlank(); err != nil { return fmt.Errorf("error rendering the progress bar: %w", err) } + if err := t.db.PreLoad(); err != nil { + return fmt.Errorf("error preparing the database: %w", err) + } t.produceRows() for i := 0; i < m; i++ { t.shutdownWaitGroup.Add(1) @@ -145,7 +148,7 @@ func (t *venuesTask) run(m int) error { case n := <-t.saved: t.bar.Add(n) if t.bar.IsFinished() { - return t.db.CreateIndex() + return t.db.PostLoad() } } }