Skip to content

Commit e9ae8db

Browse files
committed
Gita Scraper
0 parents  commit e9ae8db

File tree

6 files changed

+4377
-0
lines changed

6 files changed

+4377
-0
lines changed

.DS_Store

6 KB
Binary file not shown.

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
node_modules

data/bg.json

+3,944
Large diffs are not rendered by default.

package.json

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"name": "gita-scraper",
3+
"version": "1.0.0",
4+
"main": "index.js",
5+
"license": "MIT",
6+
"dependencies": {
7+
"puppeteer": "^16.2.0"
8+
}
9+
}

scripts/index.js

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
const puppeteer = require("puppeteer");
2+
const fs = require("fs");
3+
4+
const bg = [];
5+
6+
const main = async () => {
7+
const browser = await puppeteer.launch();
8+
const page = await browser.newPage();
9+
10+
for (let chapter = 12; chapter <= 18; chapter++) {
11+
let verse = 1;
12+
13+
while (verse) {
14+
await page.goto(`https://vedabase.io/en/library/bg/${chapter}/${verse}/`);
15+
const sanskrit = await page.evaluate(
16+
() => document.querySelector(".r-verse-text")?.innerText
17+
);
18+
let translation = await page.evaluate(
19+
() => document.querySelector(".r-translation")?.innerText
20+
);
21+
22+
if (sanskrit && translation) {
23+
console.log("Chapter ", chapter, " ,verse ", verse);
24+
bg.push({
25+
chapter,
26+
verse,
27+
sanskrit,
28+
translation,
29+
});
30+
verse++;
31+
fs.writeFileSync("data/bg.json", JSON.stringify(bg));
32+
} else {
33+
let multiVerse = false;
34+
for (let i = 1; i < 5; i++) {
35+
await page.goto(
36+
`https://vedabase.io/en/library/bg/${chapter}/${verse}-${
37+
verse + i
38+
}/`
39+
);
40+
const sanskrit = await page.evaluate(
41+
() => document.querySelector(".r-verse-text")?.innerText
42+
);
43+
let translation = await page.evaluate(
44+
() => document.querySelector(".r-translation")?.innerText
45+
);
46+
47+
if (sanskrit && translation) {
48+
console.log("Chapter ", chapter, " ,verse ", verse, "-", verse + i);
49+
bg.push({
50+
chapter,
51+
verse: `${verse}-${verse + i}`,
52+
sanskrit,
53+
translation,
54+
});
55+
multiVerse = i;
56+
verse++;
57+
fs.writeFileSync("data/bg.json", JSON.stringify(bg));
58+
}
59+
}
60+
61+
if (multiVerse) {
62+
verse += multiVerse;
63+
} else {
64+
verse = null;
65+
}
66+
}
67+
}
68+
}
69+
70+
await browser.close();
71+
};
72+
73+
main().catch((e) => console.error(e));

0 commit comments

Comments
 (0)