sublime-security · keaton-sublime · Dec 5, 2025 · Dec 5, 2025 · rw-access · Dec 16, 2025
diff --git a/build/configs/scanners.yaml b/build/configs/scanners.yaml
@@ -382,6 +382,14 @@ scanners:
         limit: 2000
         pdf_to_png: True
         no_object_extraction: True
+  'ScanPdfObjHash':
+    - positive:
+        flavors:
+          - 'application/pdf'
+          - 'pdf_file'
+      priority: 5
+      options:
+        scanner_timeout: 10 # in seconds
   'ScanOnenote':
     - positive:
         flavors:

diff --git a/build/python/backend/requirements.txt b/build/python/backend/requirements.txt
@@ -30,6 +30,7 @@ olefile==0.46
 oletools==0.60.1
 opencv-python==4.8.1.78
 opencv-contrib-python==4.8.1.78
+pdf-object-hashing @ git+https://github.com/0xkyle/pdf_object_hashing.git
 Pillow>=11.2.1
 pi-heif>=0.16.0
 idna==3.10
@@ -54,4 +55,4 @@ signify==0.3.0
 ssdeep==3.4
 tldextract==5.1.3
 tnefparse==1.4.0
-xmltodict==0.12.0
+xmltodict==0.12.0
diff --git a/src/python/strelka/scanners/scan_pdf_obj_hash.py b/src/python/strelka/scanners/scan_pdf_obj_hash.py
@@ -0,0 +1,30 @@
+from strelka import strelka
+from hashlib import md5
+from pdf_object_hashing import pdf_object as po
+
+class ScanPdfObjHash(strelka.Scanner):
+    def scan(self, data, file, options, expire_at):
+        pdf_object = po(fdata=data)
+        if pdf_object:
+            obj_hash_str = ""
+            pdf_file_hash = pdf_object.sha256
+            try:
+                pdf_object.check_pdf_header()
+                pdf_object.trailer_process()
+                pdf_object.trailer_process()
+                pdf_object.start_object_parsing()
+                pdf_object.pull_objects_xref_aware()
+            except:
+                self.event["object_hash"] = "error"
+            file_ordered_objects = pdf_object.get_objects_by_file_order(in_use_only=True)
+            if file_ordered_objects:
+                for item in file_ordered_objects:
+                    obj_hash_str += item["object_type"] + "|"
+                if obj_hash_str:
+                    obj_hash = md5(obj_hash_str.encode()).hexdigest()
+                    self.event["object_hash"] = obj_hash
+                    self.event["hash_string"] = obj_hash_str
+                else:
+                    self.event["object_hash"] = False
+                    self.event["hash_string"] = False
+