-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy pathurl_to_markdown_processor.js
executable file
·44 lines (43 loc) · 1.63 KB
/
url_to_markdown_processor.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
const formatter = require('./url_to_markdown_formatters.js');
const common_filters = require('./url_to_markdown_common_filters');
const { Readability } = require('@mozilla/readability');
const turndown = require('turndown');
const JSDOM = require('jsdom').JSDOM;
const service = new turndown();
module.exports = {
process_dom: function (url, document, res, id = "", options) {
let inline_title = options.inline_title ?? true;
let ignore_links = options.ignore_links ?? false;
let improve_readability = options.improve_readability ?? true;
let title = document.window.document.querySelector('title');
if (title)
res.header("X-Title", encodeURIComponent(title.textContent));
if (id) {
let el = document.window.document.querySelector("#"+id);
if (el) document = new JSDOM('<!DOCTYPE html>'+ el.innerHTML);
}
let readable = null;
if (improve_readability) {
let reader = new Readability(document.window.document);
readable_obj = reader.parse();
if (readable_obj) {
readable = readable_obj.content;
}
}
if (!readable) {
readable = document.window.document.documentElement.outerHTML;
}
let replacements = [];
readable = formatter.format_codeblocks(readable, replacements);
readable = formatter.format_tables(readable, replacements);
let markdown = service.turndown(readable);
for (let i=0;i<replacements.length;i++) {
markdown = markdown.replace(replacements[i].placeholder, replacements[i].replacement);
}
let result = (url) ? common_filters.filter(url, markdown, ignore_links) : markdown;
if (inline_title && title) {
result = "# " + title.textContent + "\n" + result;
}
return result;
}
}