a-sync
diff --git a/‎.chalice/config.json
+17 b/‎.chalice/config.json
+17
diff --git a/‎.chalice/deployed/dev.json
+23 b/‎.chalice/deployed/dev.json
+23
diff --git a/‎.chalice/deployments/32ed5daabd094ca1cae6fc575cef3f28-python3.9.zip
18.4 MB b/‎.chalice/deployments/32ed5daabd094ca1cae6fc575cef3f28-python3.9.zip
18.4 MB
diff --git a/‎.chalice/deployments/585fa049a970f22e444a883bcf037917-python3.9.zip
34.1 MB b/‎.chalice/deployments/585fa049a970f22e444a883bcf037917-python3.9.zip
34.1 MB
diff --git a/‎.chalice/deployments/f264a3476453952d17bf4bf46323682b-python3.9.zip
34.9 MB b/‎.chalice/deployments/f264a3476453952d17bf4bf46323682b-python3.9.zip
34.9 MB
diff --git a/‎.gitignore
+2 b/‎.gitignore
+2
diff --git a/‎app.py
+108 b/‎app.py
+108
diff --git a/‎requirements.txt
+3 b/‎requirements.txt
+3
diff --git a/‎vendor/PIL/BdfFontFile.py
+111 b/‎vendor/PIL/BdfFontFile.py
+111
@@ -0,0 +1,17 @@
+{
+  "version": "2.0",
+  "app_name": "pdf2image",
+  "stages": {
+    "dev": {
+      "api_gateway_stage": "api",
+      "lambda_memory_size": 2048,
+      "lambda_timeout": 900
+    }
+  },
+  "environment_variables": {
+    "ORIGIN_BUCKET": "s3-sample-input",
+    "DESTINATION_BUCKET": "s3-write-data-999",
+    "FMT": "png",
+    "DPI": "300"
+  }
+}
@@ -0,0 +1,23 @@
+{
+  "resources": [
+    {
+      "name": "default-role",
+      "resource_type": "iam_role",
+      "role_arn": "arn:aws:iam::581522787838:role/pdf2image-dev",
+      "role_name": "pdf2image-dev"
+    },
+    {
+      "name": "pdf_to_image",
+      "resource_type": "lambda_function",
+      "lambda_arn": "arn:aws:lambda:us-west-2:581522787838:function:pdf2image-dev-pdf_to_image"
+    },
+    {
+      "name": "pdf_to_image-s3event",
+      "resource_type": "s3_event",
+      "bucket": "s3-sample-input",
+      "lambda_arn": "arn:aws:lambda:us-west-2:581522787838:function:pdf2image-dev-pdf_to_image"
+    }
+  ],
+  "schema_version": "2.0",
+  "backend": "api"
+}
@@ -0,0 +1,2 @@
+__pycache__
+env
@@ -0,0 +1,108 @@
+import boto3
+import logging
+import os
+
+from chalice import Chalice, Response
+from io import BytesIO
+from pdf2image import convert_from_bytes
+
+app = Chalice(app_name='pdf2image')
+
+
+DPI = 300
+if 'DPI' in os.environ:
+    try:
+        DPI = int(os.environ['DPI'])
+    except Exception as e:
+        logging.debug(f"Couldn't process DPI environment variable: {str(e)}.  Using the default: DPI=300")
+else:
+    logging.info(f"No DPI environment variable set.  Using the default: DPI=300")
+
+_SUPPORTED_IMAGE_EXTENSIONS = ["ppm", "jpeg", "png", "tiff"]
+FMT = "png"
+if 'FMT' in os.environ:
+    environ_fmt = str(os.environ['FMT'])
+    if environ_fmt in _SUPPORTED_IMAGE_EXTENSIONS:
+        FMT = environ_fmt
+    else:
+        logging.debug(f"Couldn't process FMT variable.  "
+                      f"Only the following formats are supported: {','.join(_SUPPORTED_IMAGE_EXTENSIONS)}.  "
+                      f"Using the default: FMI='png'")
+else:
+    logging.info(f"No FMT environment variable set.  Using the default: FMT='png'")
+
+DESTINATION_BUCKET = None
+if 'DESTINATION_BUCKET' in os.environ:
+    DESTINATION_BUCKET = str(os.environ['DESTINATION_BUCKET'])
+    logging.info(f"Setting the destination bucket: {DESTINATION_BUCKET}. "
+                 f"Be sure to set the S3 bucket trigger on the Lambda's configuration")
+else:
+    raise Exception(f"Couldn't process the DESTINATION_BUCKET environment variable. "
+                    f"The DESTINATION_BUCKET needs to be set to a valid S3 bucket to which the user has full access.")
+
+ORIGIN_BUCKET = ''
+if 'ORIGIN_BUCKET' in os.environ:
+    ORIGIN_BUCKET = str(os.environ['ORIGIN_BUCKET'])
+    logging.info(f"Setting the origin bucket: {ORIGIN_BUCKET}. "
+                 f"Be sure to set the S3 bucket trigger on the Lambda's configuration")
+else:
+    logging.info(f"Couldn't process the ORIGIN_BUCKET environment variable. "
+                 f"Be sure to set the S3 bucket trigger on the Lambda's configuration.")
+
+_SUPPORTED_FILE_EXTENSION = '.pdf'
+
+
+@app.on_s3_event(bucket=ORIGIN_BUCKET,
+                 events=['s3:ObjectCreated:*'])
+def pdf_to_image(event):
+    """Take a pdf fom an S3 bucket and convert it to a list of pillow images (one for each page of the pdf).
+    :param event: A Lambda event (referring to an S3 event object created event).
+    :return:
+    """
+    if not event.key.endswith(_SUPPORTED_FILE_EXTENSION):
+        raise Exception(f"Only .pdf files are supported by this module.")
+
+    logging.info(f"Fetching item (bucket: '{event.bucket}', key: '{event.key}') from S3.")
+
+    # Fetch the image bytes
+    s3 = boto3.resource('s3')
+    obj = s3.Object(event.bucket, event.key)
+    infile = obj.get()['Body'].read()
+    logging.info("Successfully retrieved S3 object.")
+
+    # Set poppler path
+    poppler_path = "/var/task/lib/poppler-utils-0.26/usr/bin"
+    images = convert_from_bytes(infile,
+                                dpi=DPI,
+                                fmt=FMT,
+                                poppler_path=poppler_path)
+    logging.info("Successfully converted pdf to image.")
+
+    for page_num, image in enumerate(images):
+
+        # The directory is: <name of the pdf>-num_pages-<number of pages in the pdf>
+        directory = event.key.split('.')[0] + "-num_pages-" + str(len(images))
+
+        # Then save the image and name it: <name of the pdf>-page<page number>.FMT
+        location = directory + "/" + event.key.split('.')[0] + "-page" + str(page_num) + '.' + FMT
+
+        logging.info(f"Saving page number {str(page_num)} to S3 at location: {DESTINATION_BUCKET}, {location}.")
+
+        # Load it into the buffer and save the boytjie to S3
+        buffer = BytesIO()
+        image.save(buffer, FMT.upper())
+        buffer.seek(0)
+        s3.Object(
+            DESTINATION_BUCKET,
+            location
+        ).put(
+            Body=buffer,
+            Metadata={
+                'ORIGINAL_DOCUMENT_BUCKET': event.bucket,
+                'ORIGINAL_DOCUMENT_KEY': event.key,
+                'PAGE_NUMBER': str(page_num),
+                'PAGE_COUNT': str(len(images))
+            }
+        )
+
+    return Response(f"PDF document ({event.key}) successfully converted to a series of images.")
@@ -0,0 +1,3 @@
+chalice
+pdf2image
+boto3
@@ -0,0 +1,111 @@
+#
+# The Python Imaging Library
+# $Id$
+#
+# bitmap distribution font (bdf) file parser
+#
+# history:
+# 1996-05-16 fl   created (as bdf2pil)
+# 1997-08-25 fl   converted to FontFile driver
+# 2001-05-25 fl   removed bogus __init__ call
+# 2002-11-20 fl   robustification (from Kevin Cazabon, Dmitry Vasiliev)
+# 2003-04-22 fl   more robustification (from Graham Dumpleton)
+#
+# Copyright (c) 1997-2003 by Secret Labs AB.
+# Copyright (c) 1997-2003 by Fredrik Lundh.
+#
+# See the README file for information on usage and redistribution.
+#
+
+"""
+Parse X Bitmap Distribution Format (BDF)
+"""
+
+
+from . import FontFile, Image
+
+bdf_slant = {
+    "R": "Roman",
+    "I": "Italic",
+    "O": "Oblique",
+    "RI": "Reverse Italic",
+    "RO": "Reverse Oblique",
+    "OT": "Other",
+}
+
+bdf_spacing = {"P": "Proportional", "M": "Monospaced", "C": "Cell"}
+
+
+def bdf_char(f):
+    # skip to STARTCHAR
+    while True:
+        s = f.readline()
+        if not s:
+            return None
+        if s[:9] == b"STARTCHAR":
+            break
+    id = s[9:].strip().decode("ascii")
+
+    # load symbol properties
+    props = {}
+    while True:
+        s = f.readline()
+        if not s or s[:6] == b"BITMAP":
+            break
+        i = s.find(b" ")
+        props[s[:i].decode("ascii")] = s[i + 1 : -1].decode("ascii")
+
+    # load bitmap
+    bitmap = []
+    while True:
+        s = f.readline()
+        if not s or s[:7] == b"ENDCHAR":
+            break
+        bitmap.append(s[:-1])
+    bitmap = b"".join(bitmap)
+
+    [x, y, l, d] = [int(p) for p in props["BBX"].split()]
+    [dx, dy] = [int(p) for p in props["DWIDTH"].split()]
+
+    bbox = (dx, dy), (l, -d - y, x + l, -d), (0, 0, x, y)
+
+    try:
+        im = Image.frombytes("1", (x, y), bitmap, "hex", "1")
+    except ValueError:
+        # deal with zero-width characters
+        im = Image.new("1", (x, y))
+
+    return id, int(props["ENCODING"]), bbox, im
+
+
+class BdfFontFile(FontFile.FontFile):
+    """Font file plugin for the X11 BDF format."""
+
+    def __init__(self, fp):
+        super().__init__()
+
+        s = fp.readline()
+        if s[:13] != b"STARTFONT 2.1":
+            msg = "not a valid BDF file"
+            raise SyntaxError(msg)
+
+        props = {}
+        comments = []
+
+        while True:
+            s = fp.readline()
+            if not s or s[:13] == b"ENDPROPERTIES":
+                break
+            i = s.find(b" ")
+            props[s[:i].decode("ascii")] = s[i + 1 : -1].decode("ascii")
+            if s[:i] in [b"COMMENT", b"COPYRIGHT"]:
+                if s.find(b"LogicalFontDescription") < 0:
+                    comments.append(s[i + 1 : -1].decode("ascii"))
+
+        while True:
+            c = bdf_char(fp)
+            if not c:
+                break
+            id, ch, (xy, dst, src), im = c
+            if 0 <= ch < len(self.glyph):
+                self.glyph[ch] = xy, dst, src, im