Skip to content

Commit

Permalink
Fix bug with subdomain parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
jmduke committed Sep 2, 2024
1 parent 95b1bfb commit f00c41c
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 3 deletions.
4 changes: 2 additions & 2 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ install:
bootstrap:
python3 scripts/bootstrap.py

test:
PINO_LEVEL=silent DISABLE_DATABASE=true DISABLE_PUPPETEER=true bun test
test *args:
PINO_LEVEL=silent DISABLE_DATABASE=true DISABLE_PUPPETEER=true bun test {{args}}
20 changes: 20 additions & 0 deletions lib/data.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@ import fetch from "@/lib/data";
import { describe, expect, test } from "vitest";
import { DetectedTechnology } from "./parsers/types";

const DOMAIN_TO_UNEXPECTED_DATA: Record<string, DetectedTechnology[]> = {
"changelog.com": [
{
identifier: "subdomain",
metadata: {
value: "op3.dev",
},
},
],
};

const DOMAIN_TO_EXPECTED_DATA: Record<string, DetectedTechnology[]> = {
"formkeep.com": [
{
Expand Down Expand Up @@ -69,6 +80,15 @@ describe("fetching", () => {
});
});

Object.entries(DOMAIN_TO_UNEXPECTED_DATA).forEach(([domain, unexpectedData]) => {
unexpectedData.forEach((data) => {
test(`does not fetch ${data.identifier} for ${domain}`, async () => {
const { detected_technologies } = await fetch(domain);
expect(detected_technologies).not.toContainEqual(data);
});
});
});

test("deduping identical records", async () => {
const { detected_technologies } = await fetch("zed.dev");
expect(detected_technologies.filter((tech) => tech.identifier === "twitter")).toHaveLength(1);
Expand Down
2 changes: 1 addition & 1 deletion lib/parsers/html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ const SUBDOMAIN_RULE = (html: string, domain: string) => {
}))
.filter(
(v) =>
v.value && v.value.startsWith("http") && v.value.includes(`.${domain}`)
v.value && v.value.startsWith("http") && new URL(v.value).hostname.includes(domain)
)
.map((v) => ({
value: new URL(v.value || "").hostname,
Expand Down

0 comments on commit f00c41c

Please sign in to comment.