Skip to content

Commit f703188

Browse files
committed
feat(workers): Adds publisher and author og:meta tags to Bookmark
1 parent 7bffa02 commit f703188

12 files changed

+523
-5
lines changed

apps/workers/crawlerWorker.ts

+10
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@ import { JSDOM } from "jsdom";
1414
import { DequeuedJob, Runner } from "liteque";
1515
import metascraper from "metascraper";
1616
import metascraperAmazon from "metascraper-amazon";
17+
import metascraperAuthor from "metascraper-author";
18+
import metascraperDate from "metascraper-date";
1719
import metascraperDescription from "metascraper-description";
1820
import metascraperImage from "metascraper-image";
1921
import metascraperLogo from "metascraper-logo-favicon";
22+
import metascraperPublisher from "metascraper-publisher";
2023
import metascraperReadability from "metascraper-readability";
2124
import metascraperTitle from "metascraper-title";
2225
import metascraperTwitter from "metascraper-twitter";
@@ -61,8 +64,11 @@ import {
6164
import { BookmarkTypes } from "@hoarder/shared/types/bookmarks";
6265

6366
const metascraperParser = metascraper([
67+
metascraperDate(),
6468
metascraperAmazon(),
6569
metascraperReadability(),
70+
metascraperAuthor(),
71+
metascraperPublisher(),
6672
metascraperTitle(),
6773
metascraperDescription(),
6874
metascraperTwitter(),
@@ -677,6 +683,10 @@ async function crawlAndParseUrl(
677683
htmlContent: readableContent?.content,
678684
crawledAt: new Date(),
679685
crawlStatusCode: statusCode,
686+
author: meta.author,
687+
publisher: meta.publisher,
688+
datePublished: meta.datePublished,
689+
dateModified: meta.dateModified,
680690
})
681691
.where(eq(bookmarkLinks.id, bookmarkId));
682692

apps/workers/package.json

+4-1
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,13 @@
2020
"liteque": "^0.3.2",
2121
"metascraper": "^5.45.24",
2222
"metascraper-amazon": "^5.45.22",
23+
"metascraper-author": "5.46.5",
24+
"metascraper-date": "^5.46.5",
2325
"metascraper-description": "^5.45.22",
2426
"metascraper-image": "^5.45.22",
2527
"metascraper-logo": "^5.45.22",
2628
"metascraper-logo-favicon": "^5.45.22",
29+
"metascraper-publisher": "5.45.22",
2730
"metascraper-readability": "^5.45.22",
2831
"metascraper-title": "^5.45.22",
2932
"metascraper-twitter": "^5.45.6",
@@ -66,4 +69,4 @@
6669
]
6770
},
6871
"prettier": "@hoarder/prettier-config"
69-
}
72+
}

apps/workers/videoWorker.ts

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ function prepareYtDlpArguments(url: string, assetPath: string) {
7171
`best[filesize<${serverConfig.crawler.maxVideoDownloadSize}M]`,
7272
);
7373
}
74+
7475
ytDlpArguments.push("-o", assetPath);
7576
ytDlpArguments.push("--no-playlist");
7677
return ytDlpArguments;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
ALTER TABLE `bookmarkLinks` ADD `author` text;--> statement-breakpoint
2+
ALTER TABLE `bookmarkLinks` ADD `publisher` text;--> statement-breakpoint
3+
ALTER TABLE `bookmarkLinks` ADD `datePublished` text;--> statement-breakpoint
4+
ALTER TABLE `bookmarkLinks` ADD `dateModified` text;--> statement-breakpoint
5+
ALTER TABLE `bookmarks` ADD `modifiedAt` integer;

packages/db/drizzle/meta/0041_snapshot.json packages/db/drizzle/meta/0042_snapshot.json

+29-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"version": "6",
33
"dialect": "sqlite",
4-
"id": "4219851a-7eed-421f-b52e-5d9cd045ff85",
4+
"id": "103f5256-2aa0-4ff7-98f1-f3c897f7cc33",
55
"prevId": "943b28e9-4b89-48c9-9e00-65314a09bd92",
66
"tables": {
77
"account": {
@@ -408,6 +408,34 @@
408408
"notNull": false,
409409
"autoincrement": false
410410
},
411+
"author": {
412+
"name": "author",
413+
"type": "text",
414+
"primaryKey": false,
415+
"notNull": false,
416+
"autoincrement": false
417+
},
418+
"publisher": {
419+
"name": "publisher",
420+
"type": "text",
421+
"primaryKey": false,
422+
"notNull": false,
423+
"autoincrement": false
424+
},
425+
"datePublished": {
426+
"name": "datePublished",
427+
"type": "text",
428+
"primaryKey": false,
429+
"notNull": false,
430+
"autoincrement": false
431+
},
432+
"dateModified": {
433+
"name": "dateModified",
434+
"type": "text",
435+
"primaryKey": false,
436+
"notNull": false,
437+
"autoincrement": false
438+
},
411439
"imageUrl": {
412440
"name": "imageUrl",
413441
"type": "text",

packages/db/drizzle/meta/_journal.json

+7
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,13 @@
295295
"when": 1738424745186,
296296
"tag": "0041_fat_bloodstrike",
297297
"breakpoints": true
298+
},
299+
{
300+
"idx": 42,
301+
"version": "6",
302+
"when": 1741820150134,
303+
"tag": "0042_flawless_whistler",
304+
"breakpoints": true
298305
}
299306
]
300307
}

packages/db/schema.ts

+4
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ export const bookmarkLinks = sqliteTable(
148148
// Crawled info
149149
title: text("title"),
150150
description: text("description"),
151+
author: text("author"),
152+
publisher: text("publisher"),
153+
datePublished: text("datePublished"),
154+
dateModified: text("dateModified"),
151155
imageUrl: text("imageUrl"),
152156
favicon: text("favicon"),
153157
content: text("content"),

packages/open-api/hoarder-openapi-spec.json

+16
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,22 @@
168168
"crawledAt": {
169169
"type": "string",
170170
"nullable": true
171+
},
172+
"author": {
173+
"type": "string",
174+
"nullable": true
175+
},
176+
"publisher": {
177+
"type": "string",
178+
"nullable": true
179+
},
180+
"datePublished": {
181+
"type": "string",
182+
"nullable": true
183+
},
184+
"dateModified": {
185+
"type": "string",
186+
"nullable": true
171187
}
172188
},
173189
"required": [

packages/shared/search.ts

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ export const zBookmarkIdxSchema = z.object({
1818
note: z.string().nullish(),
1919
summary: z.string().nullish(),
2020
tags: z.array(z.string()).default([]),
21+
publisher: z.string().nullish(),
22+
author: z.string().nullish(),
23+
datePublished: z.string().nullish(),
24+
dateModified: z.string().nullish(),
2125
});
2226

2327
export type ZBookmarkIdx = z.infer<typeof zBookmarkIdxSchema>;

packages/shared/types/bookmarks.ts

+4
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ export const zBookmarkedLinkSchema = z.object({
4646
favicon: z.string().nullish(),
4747
htmlContent: z.string().nullish(),
4848
crawledAt: z.date().nullish(),
49+
author: z.string().nullish(),
50+
publisher: z.string().nullish(),
51+
datePublished: z.string().nullish(),
52+
dateModified: z.string().nullish(),
4953
});
5054
export type ZBookmarkedLink = z.infer<typeof zBookmarkedLinkSchema>;
5155

packages/trpc/routers/bookmarks.ts

+2
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,8 @@ export const bookmarksAppRouter = router({
10831083
Title: ${bookmark.title ?? ""}
10841084
Description: ${bookmark.description ?? ""}
10851085
Content: ${bookmark.content ?? ""}
1086+
Publisher: ${bookmark.publisher ?? ""}
1087+
Author: ${bookmark.author ?? ""}
10861088
`;
10871089

10881090
const prompts = await ctx.db.query.customPrompts.findMany({

0 commit comments

Comments
 (0)