Replies: 1 comment
-
I'm a PDF & I agree it would be an nice feature offering, since it's an XSS protection for any app accepting user uploaded PDFs. const {
PDFDocument, PDFName, PDFArray, PDFDict,
} = require('pdf-lib');
async function sanitizePDF(pdfBytes) {
const pdfDoc = await PDFDocument.load(pdfBytes);
function removeJavaScriptFromDict(dict) {
if (!dict || !(dict instanceof PDFDict)) return;
if (dict.has(PDFName.of('JS'))) {
dict.delete(PDFName.of('JS'));
}
for (const [_, value] of dict.entries()) {
if (value instanceof PDFDict) {
removeJavaScriptFromDict(value);
}
}
}
const pages = pdfDoc.getPages();
pages.forEach((page) => {
const { dict } = page.node;
removeJavaScriptFromDict(dict);
const annotsArray = dict.get(PDFName.of('Annots'));
if (annotsArray instanceof PDFArray) {
annotsArray.asArray().forEach((annotRef) => {
const annot = annotRef.lookup();
if (annot instanceof PDFDict) {
removeJavaScriptFromDict(annot);
}
});
}
const aaDict = dict.get(PDFName.of('AA'));
if (aaDict instanceof PDFDict) {
removeJavaScriptFromDict(aaDict);
}
});
const { catalog } = pdfDoc;
const openAction = catalog.get(PDFName.of('OpenAction'));
if (openAction instanceof PDFDict) {
removeJavaScriptFromDict(openAction);
}
const namesDict = catalog.get(PDFName.of('Names'));
if (namesDict instanceof PDFDict) {
const jsDict = namesDict.get(PDFName.of('JavaScript'));
if (jsDict instanceof PDFDict) {
removeJavaScriptFromDict(jsDict);
}
}
return pdfDoc.save();
} |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
I noticed there is an
addjavascript
method, but is there a method to remove all javascript actions? How would I go about doing that?Link: https://pdf-lib.js.org/docs/api/classes/pdfdocument#addjavascript
I need it to avoid malicious code from users' uploaded content.
Beta Was this translation helpful? Give feedback.
All reactions