Skip to content

Commit

Permalink
Refactor into affiliation abstraction
Browse files Browse the repository at this point in the history
  • Loading branch information
jmduke committed Sep 9, 2024
1 parent dcfbd0a commit 4fa2080
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 36 deletions.
36 changes: 0 additions & 36 deletions app/scripts/populate_tranco.ts

This file was deleted.

Binary file modified bun.lockb
Binary file not shown.
15 changes: 15 additions & 0 deletions lib/affiliations/loaders/tranco.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { Affiliation } from "../types";

const TRANCO_URL = "https://tranco-list.eu/download/KJ94W/1000000";

export default async function load(): Promise<Affiliation[]> {
const response = await fetch(TRANCO_URL);
const data = await response.text();
const lines = data.split('\n').filter(line => line.trim() !== '').map(line => line.split(','));
return lines.map(([rank, domain]) => ({
identifier: domain,
metadata: {
rank: rank,
},
}));
}
34 changes: 34 additions & 0 deletions lib/affiliations/loaders/ycombinator.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { XMLParser } from "fast-xml-parser";
import { Affiliation } from "../types";

const SITEMAP_URL = "https://www.ycombinator.com/companies/sitemap.xml";

export default async function load(): Promise<Affiliation[]> {
const response = await fetch(SITEMAP_URL);
const data = await response.text();
const parser = new XMLParser();
const result = parser.parse(data);
const relevantURLs = result.urlset.url.filter((url: { loc: string }) => url.loc.endsWith(".com") && !url.loc.includes("/industry/"));
const affiliations: Affiliation[] = [];
for (const url of relevantURLs) {
try {
const companyResponse = await fetch(url.loc);
const companyHtml = await companyResponse.text();
const hrefMatch = companyHtml.match(/href="([^"]*)"[^>]*class="[^"]*mb-2[^"]*whitespace-nowrap[^"]*"/);

if (hrefMatch && hrefMatch[1]) {
affiliations.push({
identifier: hrefMatch[1],
metadata: {
source: 'ycombinator',
originalUrl: url.loc
}
});
}
} catch (error) {
console.error(`Error fetching ${url.loc}:`, error);
}
}

return affiliations;
}
16 changes: 16 additions & 0 deletions lib/affiliations/registry.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import tranco from "./loaders/tranco";
import { Affiliation } from "./types";

type RegisteredAffiliation = {
identifier: string;
name: string;
load: () => Promise<Affiliation[]>;
};

export const REGISTRY: { [key in string]: RegisteredAffiliation } = {
"tranco": {
identifier: "tranco",
name: "Tranco",
load: tranco,
},
};
4 changes: 4 additions & 0 deletions lib/affiliations/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
export type Affiliation = {
identifier: string;
metadata: Record<string, string>;
};
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"@radix-ui/react-dialog": "^1.1.1",
"@sentry/nextjs": "^8",
"@vercel/analytics": "^1.3.1",
"fast-xml-parser": "^4.5.0",
"fetch-h2": "^3.0.2",
"kysely": "^0.27.4",
"kysely-neon": "^1.3.0",
Expand Down

0 comments on commit 4fa2080

Please sign in to comment.