aws-solutions
diff --git a/‎IMPLEMENTATION_GUIDE.md
+446-5 b/‎IMPLEMENTATION_GUIDE.md
+446-5
diff --git a/‎README.md
+9-5 b/‎README.md
+9-5
diff --git a/‎deployment/build-s3-dist.sh
+32-8 b/‎deployment/build-s3-dist.sh
+32-8
diff --git a/‎deployment/media-insights-stack.yaml
+11-8 b/‎deployment/media-insights-stack.yaml
+11-8
diff --git a/‎source/consumers/elastic/lambda_handler.py
+35-32 b/‎source/consumers/elastic/lambda_handler.py
+35-32
diff --git a/‎source/dataplaneapi/.chalice/config.json
+3-1 b/‎source/dataplaneapi/.chalice/config.json
+3-1
diff --git a/‎source/operators/mediaconvert/start_media_convert.py
-36 b/‎source/operators/mediaconvert/start_media_convert.py
-36
@@ -8,8 +8,11 @@ MIE is a _serverless_ framework to accelerate the development of applications th
 2. Execute workflows and store the resulting media and analysis for later use.
 3. Query analysis extracted from media.
 4. Interactively explore some of the capabilities of MIE using the included content and analysis and search web application.
-5. Extend MIE for new applications by adding custom operators and custom data stores.
-   
+5. Extend MIE for new applications by adding custom operators and custom data stores. 
+
+# Limits
+
+This preview version of MIE can support workflows on short videos up to 4 minutes in duration. 
 
 # Architecture Overview
 
@@ -50,8 +53,9 @@ Deploy the demo architecture and application in your AWS account and start explo
 
 Region| Launch
 ------|-----
-US East (N. Virginia) | [![Launch in us-east-1](doc/images/launch-stack.png)](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/new?stackName=mie&templateURL=https://rodeolabz-us-east-1.s3.amazonaws.com/media-insights-solution/v0.1.3/cf/media-insights-stack.template)
-US West (Oregon) | [![Launch in us-west-2](doc/images/launch-stack.png)](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/new?stackName=mie&templateURL=https://rodeolabz-us-west-2.s3.amazonaws.com/media-insights-solution/v0.1.3/cf/media-insights-stack.template)
+US East (N. Virginia) | [![Launch in us-east-1](doc/images/launch-stack.png)](https://console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/new?stackName=mie&templateURL=https://rodeolabz-us-east-1.s3.amazonaws.com/media-insights-solution/v0.1.4/cf/media-insights-stack.template)
+US West (Oregon) | [![Launch in us-west-2](doc/images/launch-stack.png)](https://console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/new?stackName=mie&templateURL=https://rodeolabz-us-west-2.s3.amazonaws.com/media-insights-solution/v0.1.4/cf/media-insights-stack.template)
+
 
 The default settings for the template are configured to deploy the sample web application and all the back-end components it requires.  In addition, you must set the required parameter below.
 
@@ -90,7 +94,7 @@ After the stack successfully deploys, you can find important interface resources
 
 **MediaInsightsEnginePython37Layer** is a lambda layer required to build new operator lambdas
 
-**WebAppCloudfrontUrl** is the Url for the sample Media Insights web application
+**MediaInsightsWebAppUrl** is the Url for the sample Media Insights web application
 
 **WorkflowApiEndpoint** is the endpoint for accessing the Workflow APIs to create, update, delete and execute MIE workflows.
 
 
@@ -419,12 +419,36 @@ fi
 
 popd
 
-zip -g dist/start_media_convert.zip start_media_convert.py awsmie.py
-zip -g dist/get_media_convert.zip get_media_convert.py awsmie.py
+zip -g dist/start_media_convert.zip start_media_convert.py
+zip -g dist/get_media_convert.zip get_media_convert.py
 
 cp "./dist/start_media_convert.zip" "$dist_dir/start_media_convert.zip"
 cp "./dist/get_media_convert.zip" "$dist_dir/get_media_convert.zip"
 
+
+echo "------------------------------------------------------------------------------"
+echo "Thumbnail  Operations"
+echo "------------------------------------------------------------------------------"
+
+echo "Building Thumbnail function"
+cd "$source_dir/operators/thumbnail" || exit
+
+# Make lambda package
+[ -e dist ] && rm -r dist
+mkdir -p dist
+
+if ! [ -d ./dist/start_thumbnail.zip ]; then
+  zip -r9 ./dist/start_thumbnail.zip .
+
+elif [ -d ./dist/start_thumbnail.zip ]; then
+  echo "Package already present"
+fi
+
+popd
+
+zip -g dist/start_thumbnail.zip start_thumbnail.py
+cp "./dist/start_thumbnail.zip" "$dist_dir/start_thumbnail.zip"
+
 echo "------------------------------------------------------------------------------"
 echo "Transcribe  Operations"
 echo "------------------------------------------------------------------------------"
@@ -483,8 +507,8 @@ fi
 
 popd
 
-zip -g dist/start_transcribe.zip start_transcribe.py awsmie.py
-zip -g dist/get_transcribe.zip get_transcribe.py awsmie.py
+zip -g dist/start_transcribe.zip start_transcribe.py
+zip -g dist/get_transcribe.zip get_transcribe.py
 
 cp "./dist/start_transcribe.zip" "$dist_dir/start_transcribe.zip"
 cp "./dist/get_transcribe.zip" "$dist_dir/get_transcribe.zip"
@@ -540,7 +564,7 @@ fi
 
 popd
 
-zip -g dist/get_captions.zip get_captions.py awsmie.py
+zip -g dist/get_captions.zip get_captions.py
 
 cp "./dist/get_captions.zip" "$dist_dir/get_captions.zip"
 
@@ -595,7 +619,7 @@ fi
 
 popd
 
-zip -g dist/start_translate.zip start_translate.py awsmie.py
+zip -g dist/start_translate.zip start_translate.py
 
 cp "./dist/start_translate.zip" "$dist_dir/start_translate.zip"
 
@@ -658,8 +682,8 @@ fi
 
 popd
 
-zip -g dist/start_polly.zip start_polly.py awsmie.py
-zip -g dist/get_polly.zip get_polly.py awsmie.py
+zip -g dist/start_polly.zip start_polly.py
+zip -g dist/get_polly.zip get_polly.py
 
 cp "./dist/start_polly.zip" "$dist_dir/start_polly.zip"
 cp "./dist/get_polly.zip" "$dist_dir/get_polly.zip"
 
@@ -697,7 +697,7 @@ Resources:
       UserPoolId: !Ref MieUserPool
       ExplicitAuthFlows: ['ADMIN_NO_SRP_AUTH']
 
-  MieAppClient:
+  MieWebAppClient:
     Type: AWS::Cognito::UserPoolClient
     Properties:
       UserPoolId: !Ref MieUserPool
@@ -707,7 +707,7 @@ Resources:
     Properties:
       AllowUnauthenticatedIdentities: False
       CognitoIdentityProviders:
-        - ClientId: !Ref MieAppClient
+        - ClientId: !Ref MieWebAppClient
           ProviderName: !GetAtt MieUserPool.ProviderName
 
   # More hacky cfn for getting the role mapping
@@ -723,7 +723,7 @@ Resources:
           - - 'Fn::GetAtt':
                 - MieUserPool
                 - ProviderName
-            - Ref: MieAppClient
+            - Ref: MieWebAppClient
 
   CognitStandardAuthDefaultRole:
     Type: "AWS::IAM::Role"
@@ -1395,7 +1395,7 @@ Resources:
         UserPoolId: !Ref MieUserPool
         IdentityPoolId: !Ref MieIdentityPool
         AwsRegion: !Ref "AWS::Region"
-        PoolClientId: !Ref MieAppClient
+        PoolClientId: !Ref MieWebAppClient
 
 #  TranscriberWebApp:
 #    Condition: DeployTranscriberApp
@@ -1445,10 +1445,10 @@ Outputs:
     Value: !GetAtt MediaInsightsWorkflowApi.Outputs.EndpointURL
     Export:
       Name: !Join [":", [!Ref "AWS::StackName", WorkflowApiEndpoint]]
-  WebAppCloudfrontUrl:
+  MediaInsightsWebAppUrl:
     Condition: DeployDemoSiteCondition
-    Description: Url of the MIE Webapp
-    Value: !GetAtt MediaInsightsWebApp.Outputs.CloudfrontUrl
+    Description: Url of the Media Insights Engine sample web application
+    Value: !Join ["", ["https://", !GetAtt MediaInsightsWebApp.Outputs.CloudfrontUrl]]
   ElasticsearchEndpoint:
     Condition: DeployAnalyticsPipelineCondition
     Description: Endpoint for elasticsearch cluster
@@ -1466,8 +1466,11 @@ Outputs:
     Description: ID of the MIE Cognito User Pool
     Value: !Ref MieUserPool
   AdminClientId:
-    Description: ID of the Admin Cognito Client
+    Description: ID of the Admin Cognito Client. This can be used to authenticate command-line apps using boto3.
     Value: !Ref MieAdminClient
+  WebAppClientId:
+    Description: ID of the Webapp Cognito Client. This can be used to authenticate web apps using Amplify.
+    Value: !Ref MieWebAppClient
   AdminUsername:
     Description: Username of the default MIE admin
     Value: !Ref AdminEmail
@@ -324,74 +324,76 @@ def process_face_detection(asset, workflow, results):
                 extracted_items.append(item)
     bulk_index(es, asset, "face_detection", extracted_items)
 
-def process_logo_detection(asset, workflow, results):
-    # This function puts logo detection data in Elasticsearch.
-    # The logo detection raw data was in inconsistent with Confidence and BoundingBox fields in Rekognition.
-    # So, those fields are modified in this function, accordingly.
+def process_generic_data(asset, workflow, results):
+    # This function puts generic data in Elasticsearch.
     metadata = json.loads(results)
     es = connect_es(es_endpoint)
     extracted_items = []
     # We can tell if json results are paged by checking to see if the json results are an instance of the list type.
     if isinstance(metadata, list):
         # handle paged results
         for page in metadata:
-            if "Logos" in page:
-                for item in page["Logos"]:
+            if "Labels" in page:
+                for item in page["Labels"]:
                     try:
-                        item["Operator"] = "logo_detection"
+                        item["Operator"] = "generic_data_lookup"
                         item["Workflow"] = workflow
-                        if "Logo" in item:
-                            # Flatten the inner Logo array
-                            item["Confidence"] = float(item["Logo"]["Confidence"])*100
-                            item["Name"] = item["Logo"]["Name"]
+                        if "Label" in item:
+                            # Flatten the inner Label array
+                            item["Confidence"] = float(item["Label"]["Confidence"])*100
+                            item["Name"] = item["Label"]["Name"]
                             item["Instances"] = ''
-                            if 'Instances' in item["Logo"]:
-                                for box in item["Logo"]["Instances"]:
+                            if 'Instances' in item["Label"]:
+                                for box in item["Label"]["Instances"]:
                                     box["BoundingBox"]["Height"] = float(box["BoundingBox"]["Height"]) / 720
                                     box["BoundingBox"]["Top"] = float(box["BoundingBox"]["Top"]) / 720
                                     box["BoundingBox"]["Left"] = float(box["BoundingBox"]["Left"]) / 1280
                                     box["BoundingBox"]["Width"] = float(box["BoundingBox"]["Width"]) / 1280
                                     box["Confidence"] = float(box["Confidence"])*100
-                                item["Instances"] = item["Logo"]["Instances"]
+
+                                item["Instances"] = item["Label"]["Instances"]
                             item["Parents"] = ''
-                            if 'Parents' in item["Logo"]:
-                                item["Parents"] = item["Logo"]["Parents"]
+                            if 'Parents' in item["Label"]:
+                                item["Parents"] = item["Label"]["Parents"]
                             # Delete the flattened array
-                            del item["Logo"]
+                            del item["Label"]
                         extracted_items.append(item)
                     except KeyError as e:
                         print("KeyError: " + str(e))
                         print("Item: " + json.dumps(item))
     else:
         # these results are not paged
-        if "Logos" in metadata:
-            for item in metadata["Logos"]:
+
+        if "Labels" in metadata:
+            for item in metadata["Labels"]:
                 try:
-                    item["Operator"] = "logo_detection"
+                    item["Operator"] = "generic_data_lookup"
                     item["Workflow"] = workflow
-                    if "Logo" in item:
-                        # Flatten the inner Logo array
-                        item["Confidence"] = float(item["Logo"]["Confidence"])*100
-                        item["Name"] = item["Logo"]["Name"]
+                    if "Label" in item:
+                        # Flatten the inner Label array
+                        item["Confidence"] = float(item["Label"]["Confidence"])*100
+                        item["Name"] = item["Label"]["Name"]
                         item["Instances"] = ''
-                        if 'Instances' in item["Logo"]:
-                            for box in item["Logo"]["Instances"]:
+                        if 'Instances' in item["Label"]:
+                            for box in item["Label"]["Instances"]:
                                 box["BoundingBox"]["Height"] = float(box["BoundingBox"]["Height"]) / 720
                                 box["BoundingBox"]["Top"] = float(box["BoundingBox"]["Top"]) / 720
                                 box["BoundingBox"]["Left"] = float(box["BoundingBox"]["Left"]) / 1280
                                 box["BoundingBox"]["Width"] = float(box["BoundingBox"]["Width"]) / 1280
                                 box["Confidence"] = float(box["Confidence"])*100
-                            item["Instances"] = item["Logo"]["Instances"]
+                            item["Instances"] = item["Label"]["Instances"]
                         item["Parents"] = ''
-                        if 'Parents' in item["Logo"]:
-                            item["Parents"] = item["Logo"]["Parents"]
+                        if 'Parents' in item["Label"]:
+                            item["Parents"] = item["Label"]["Parents"]
                         # Delete the flattened array
-                        del item["Logo"]
+                        del item["Label"]
                     extracted_items.append(item)
                 except KeyError as e:
                     print("KeyError: " + str(e))
                     print("Item: " + json.dumps(item))
-    bulk_index(es, asset, "logos", extracted_items)
+
+    bulk_index(es, asset, "labels", extracted_items)
+
 
 def process_label_detection(asset, workflow, results):
     # Rekognition label detection puts labels on an inner array in its JSON result, but for ease of search in Elasticsearch we need those results as a top level json array. So this function does that.
@@ -674,7 +676,8 @@ def lambda_handler(event, context):
                         if operator == "translate":
                             process_translate(asset_id, workflow, metadata["Results"])
                         if operator == "genericdatalookup":
-                            process_logo_detection(asset_id, workflow, metadata["Results"])
+                            process_generic_data(asset_id, workflow, metadata["Results"])
+
                         if operator == "labeldetection":
                             process_label_detection(asset_id, workflow, metadata["Results"])
                         if operator == "celebrityrecognition":
 
@@ -10,7 +10,9 @@
     "dev": {
       "api_gateway_stage": "api",
       "autogen_policy": false,
-      "iam_policy_file": "policy-dev.json"
+      "iam_policy_file": "policy-dev.json",
+      "lambda_timeout": 900,
+      "lambda_memory_size": 2048
     }
   }
 }
@@ -34,7 +34,6 @@ def lambda_handler(event, context):
 
     file_input = "s3://" + bucket + "/" + key
     destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/workflows/" + workflow_id + "/"
-    thumbnail_destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/"
 
     try:
         response = mediaconvert.describe_endpoints()
@@ -89,41 +88,6 @@ def lambda_handler(event, context):
                     "Destination": destination
                   }
                 }
-              },
-              {
-                  "CustomName": "thumbnail",
-                  "Name": "File Group",
-                  "Outputs": [
-                      {
-                          "ContainerSettings": {
-                              "Container": "RAW"
-                          },
-                          "VideoDescription": {
-                              "ScalingBehavior": "DEFAULT",
-                              "TimecodeInsertion": "DISABLED",
-                              "AntiAlias": "ENABLED",
-                              "Sharpness": 50,
-                              "CodecSettings": {
-                                  "Codec": "FRAME_CAPTURE",
-                                  "FrameCaptureSettings": {
-                                      "FramerateNumerator": 1,
-                                      "FramerateDenominator": 5,
-                                      "MaxCaptures": 2,
-                                      "Quality": 80
-                                  }
-                              },
-                              "DropFrameTimecode": "ENABLED",
-                              "ColorMetadata": "INSERT"
-                          },
-                          "NameModifier": "_thumbnail"
-                      }
-                  ],
-                  "OutputGroupSettings": {
-                      "Type": "FILE_GROUP_SETTINGS",
-                      "FileGroupSettings": {
-                          "Destination": thumbnail_destination
-                      }
-                  }
               }],
               "AdAvailOffset": 0,
               "Inputs": [{
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,9 @@`
`10`	`10`	`"dev": {`
`11`	`11`	`"api_gateway_stage": "api",`
`12`	`12`	`"autogen_policy": false,`
`13`		`- "iam_policy_file": "policy-dev.json"`
	`13`	`+ "iam_policy_file": "policy-dev.json",`
	`14`	`+ "lambda_timeout": 900,`
	`15`	`+ "lambda_memory_size": 2048`
`14`	`16`	`}`
`15`	`17`	`}`
`16`	`18`	`}`