move codes back from orchestrator repo and added readme

GSA · Mar 6, 2024 · 60e7944 · 60e7944
1 parent 759d019
commit 60e7944
Show file tree

Hide file tree

Showing 10 changed files with 443 additions and 97 deletions.
diff --git a/.env → .env-sample b/.env → .env-sample
@@ -10,7 +10,7 @@ MDTRANSLATOR_URL=http://127.0.0.1:3000/translates
 
 DATABASE_SERVER=db
 DATABASE_PORT=5432
-DATABASE_NAME=postgres
-DATABASE_USER=postgres
-DATABASE_PASSWORD=password
+DATABASE_NAME=<place_holder>
+DATABASE_USER=<place_holder>
+DATABASE_PASSWORD=<place_holder>
 DATABASE_URI=postgresql://${DATABASE_USER}:${DATABASE_PASSWORD}@${DATABASE_SERVER}:${DATABASE_PORT}/${DATABASE_NAME}
diff --git a/.gitignore b/.gitignore
@@ -23,4 +23,5 @@ tmp/
 
 # vscode debugger
 .vscode/
+.env
 
diff --git a/Dockerfile b/Dockerfile
@@ -1,11 +1,28 @@
-FROM python:3.8-slim
+# Use an official Python runtime as a base image
+FROM python:3.11
 
-WORKDIR /app
+# Set the working directory in the container
+WORKDIR /usr/src/app
 
-COPY . /app
+# Copy the current directory contents into the container at /usr/src/app
+COPY . .
 
-RUN pip install --no-cache-dir -r requirements.txt
+# Install Poetry
+RUN pip install poetry
 
+# Avoid creating virtual environments
+RUN poetry config virtualenvs.create false
+
+# Install project dependencies
+RUN poetry install
+
+# Make port 8080 available to the world outside this container
 EXPOSE 8080
 
-CMD ["python", "app.py"]
+# Define environment variable
+ENV FLASK_APP=app.py
+ENV FLASK_RUN_HOST=0.0.0.0
+ENV FLASK_RUN_PORT=8080
+
+# Run Flask
+CMD ["poetry", "run", "flask", "run"]
diff --git a/Makefile b/Makefile
@@ -7,6 +7,12 @@ pypi-upload: build-dist  ## Uploads new package to PyPi after clean, build
 build-dist: clean-dist  ## Builds new package dist
 	poetry build --verbose
 
+build:  ## build Flask app
+	docker compose build app
+
+up:  ## up Flask app
+	docker compose up db app
+
 clean-dist:  ## Cleans dist dir
 	rm -rf dist/*
 

diff --git a/README.md b/README.md
@@ -88,3 +88,59 @@ If you followed the instructions for `CKAN load testing` and `Harvester testing`
 
 - `./test/harvest_sources/dcatus.json`
   - Represents an original harvest source prior to change occuring.
+
+
+## Flask App
+
+### Local development 
+
+1. Copy the sample environment file and set your local configurations:
+
+   ```bash
+   cp .env.sample .env
+   ```
+
+   Edit the `.env` file with your local settings.
+
+
+2. Use the Makefile to set up local Docker containers, including a PostgreSQL database and the Flask application:
+
+   ```bash
+   make build 
+   make up
+   make test
+   make clean
+   ```
+
+   This will start the necessary services and execute the test.
+
+### Deployment to cloud.gov
+
+#### Database Service Setup
+
+A database service is required for use on cloud.gov.
+
+In a given Cloud Foundry `space`, a db can be created with 
+`cf create-service <service offering> <plan> <service instance>`. 
+
+In dev, for example, the db was created with 
+`cf create-service aws-rds micro-psql harvesting-logic-db`. 
+
+Creating databases for the other spaces should follow the same pattern, though the size may need to be adjusted (see available AWS RDS service offerings with `cf marketplace -e aws-rds`).
+
+Any created service needs to be bound to an app with `cf bind-service <app> <service>`. With the above example, the db can be bound with 
+`cf bind-service harvesting-logic harvesting-logic-db`.
+
+Accessing the service can be done with service keys. They can be created with `cf create-service-keys`, listed with `cf service-keys`, and shown with 
+
+`cf service-key <service-key-name>`.
+
+#### Manually Deploying the Flask Application
+
+1. Ensure you have a `manifest.yml` and `vars.yml` file configured for your Flask application. The `vars.yml` file should include variables such as `FLASK_APP` and database service bindings.
+
+2. Deploy the application using Cloud Foundry's `cf push` command with the variable file:
+
+   ```bash
+   cf push --vars-file vars.yml
+   ```
diff --git a/app.py b/app.py
@@ -26,7 +26,7 @@ def add_harvest_source():
 def add_harvest_job():
     source_id = request.args.get('source_id', None)
     if source_id is None:
-        return 'Please provide source_id'
+        return 'Please provide source_id: /add_job?source_id=xxx'
     else:
         job=db.add_harvest_job(new_job, source_id)
         return(f"Added new job with ID: {job.id}")
@@ -35,52 +35,49 @@ def add_harvest_job():
 def add_harvest_error():
     job_id = request.args.get('job_id', None)
     if job_id is None:
-        return 'Please provide job_id'
+        return 'Please provide job_id: /add_error?job_id=xxx'
     else:
         err=db.add_harvest_error(new_error, job_id)
         return(f"Added new error with ID: {err.id}")
 
-@app.route('/get_all_sources', methods=['GET'])
+@app.route('/harvest_sources', methods=['GET'])
 def get_all_harvest_sources():
     result = db.get_all_harvest_sources()
     return result
 
-@app.route('/get_all_jobs', methods=['GET'])
+@app.route('/harvest_jobs', methods=['GET'])
 def get_all_harvest_jobs():
     result = db.get_all_harvest_jobs()
     return result
 
-@app.route('/get_all_errors', methods=['GET'])
+@app.route('/harvest_errors', methods=['GET'])
 def get_all_harvest_errors():
     result = db.get_all_harvest_errors()
     return result
 
-@app.route('/get_source', methods=['GET'])
-def get_harvest_source():
-    id = request.args.get('id', None)
-    if id is None:
-        return 'Please provide id'
-    else:
-        result = db.get_harvest_source(id)
+@app.route('/harvest_source/<source_id>', methods=['GET'])
+def get_harvest_source(source_id):
+    try:
+        result = db.get_harvest_source(source_id)
         return result
+    except:
+        return "provide source_id"
 
-@app.route('/get_job', methods=['GET'])
-def get_harvest_job():
-    id = request.args.get('id', None)
-    if id is None:
-        return 'Please provide id'
-    else:
-        result = db.get_harvest_job(id)
+@app.route('/harvest_job/<job_id>', methods=['GET'])
+def get_harvest_job(job_id):
+    try:
+        result = db.get_harvest_job(job_id)
         return result
+    except:
+        return "provide job_id"
 
-@app.route('/get_error', methods=['GET'])
-def get_harvest_error():
-    id = request.args.get('id', None)
-    if id is None:
-        return 'Please provide id'
-    else:
-        result = db.get_harvest_error(id)
+@app.route('/harvest_error/<error_id>', methods=['GET'])
+def get_harvest_error(error_id):
+    try:
+        result = db.get_harvest_error(error_id)
         return result
+    except:
+        return "provide error_id"
 
 @app.teardown_appcontext
 def shutdown_session(exception=None):

diff --git a/create_cloudgov_service.sh b/create_cloudgov_service.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+set -e 
+
+# If an argument was provided, use it as the service name prefix. 
+# Otherwise default to "harvesting-logic".
+app_name=${1:-harvesting-logic}
+
+# Get the current space and trim leading whitespace
+space=$(cf target | grep space | cut -d : -f 2 | xargs)
+
+# Production and staging should use bigger DB instances
+if [ "$space" = "prod" ] || [ "$space" = "staging" ]; then
+    cf service "${app_name}-db"    > /dev/null 2>&1 || cf create-service aws-rds medium-psql-redundant "${app_name}-db" --wait&
+else
+    cf service "${app_name}-db"    > /dev/null 2>&1 || cf create-service aws-rds micro-psql "${app_name}-db" --wait&
+fi
diff --git a/manifest.yml b/manifest.yml
@@ -9,5 +9,5 @@ applications:
       - ((database_name))
     instances: 1
     env:
-      FLASK_APP: app.py
-      DATABASE_NAME: ((database_name))
+      DATABASE_NAME: ((database_name))
+    command: python app.py