Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create harvest db #36

Merged
merged 7 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions .env
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
POSTGRES_HOST=0.0.0.0
POSTGRES_USER=placeholder-user
POSTGRES_PASSWORD=placeholder-pass
POSTGRES_DB=testdb
POSTGRES_TABLE=harvestjob
POSTGRES_FIELD_ID=jobid
POSTGRES_FIELD_STATUS=status

S3FILESTORE__AWS_BUCKET_NAME=test-bucket
S3FILESTORE__AWS_STORAGE_PATH=local
S3FILESTORE__REGION_NAME=us-east-1
Expand All @@ -15,3 +7,11 @@ S3FILESTORE__AWS_SECRET_ACCESS_KEY=_placeholder
S3FILESTORE__SIGNATURE_VERSION=s3v4

MDTRANSLATOR_URL=http://127.0.0.1:3000/translates

DATABASE_SERVER=db
DATABASE_PORT=5432
DATABASE_NAME=mydb
DATABASE_USER=myuser
DATABASE_PASSWORD=mypassword
DATABASE_URI=postgresql://${DATABASE_USER}:${DATABASE_PASSWORD}@${DATABASE_SERVER}:${DATABASE_PORT}/${DATABASE_NAME}

2 changes: 1 addition & 1 deletion .github/workflows/commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ jobs:
- name: deploy DHL
uses: cloud-gov/cg-cli-tools@main
with:
command: cf push datagov-harvesting-logic --strategy rolling --no-wait
command: cf push datagov-harvesting-logic --vars-file vars.yml --strategy rolling --no-wait
cf_org: gsa-datagov
cf_space: ${{vars.ENVIRONMENT_NAME}}
cf_username: ${{secrets.CF_SERVICE_USER}}
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@
- name: deploy DHL
uses: cloud-gov/cg-cli-tools@main
with:
command: cf push datagov-harvesting-logic --strategy rolling --no-wait
command: cf push --vars-file vars.yml --strategy rolling --no-wait
cf_org: gsa-datagov
cf_space: ${{vars.ENVIRONMENT_NAME}}
cf_username: ${{secrets.CF_SERVICE_USER}}
cf_password: ${{secrets.CF_SERVICE_AUTH}}
- name: smoke test
uses: cloud-gov/cg-cli-tools@main
with:
command: cf run-task datagov-harvesting-logic -c "/home/vcap/app/scripts/smoke-test.py" --name smoke-test
command: cf run-task harvesting-logic -c "/home/vcap/app/scripts/smoke-test.py" --name smoke-test
cf_org: gsa-datagov
cf_space: ${{vars.ENVIRONMENT_NAME}}
cf_username: ${{secrets.CF_SERVICE_USER}}
Expand All @@ -51,7 +51,7 @@
uses: cloud-gov/cg-cli-tools@main
with:
command: >
scripts/monitor-cf-logs.sh datagov-harvesting-logic smoke-test
scripts/monitor-cf-logs.sh harvesting-logic smoke-test
cf_org: gsa-datagov
cf_space: ${{vars.ENVIRONMENT_NAME}}
cf_username: ${{secrets.CF_SERVICE_USER}}
Expand Down Expand Up @@ -91,15 +91,15 @@
- name: deploy DHL
uses: cloud-gov/cg-cli-tools@main
with:
command: cf push datagov-harvesting-logic --strategy rolling --no-wait
command: cf push --vars-file vars.yml --strategy rolling --no-wait
cf_org: gsa-datagov
cf_space: ${{vars.ENVIRONMENT_NAME}}
cf_username: ${{secrets.CF_SERVICE_USER}}
cf_password: ${{secrets.CF_SERVICE_AUTH}}
- name: smoke test
uses: cloud-gov/cg-cli-tools@main
with:
command: cf run-task datagov-harvesting-logic -c "/home/vcap/app/scripts/smoke-test.py" --name smoke-test
command: cf run-task harvesting-logic -c "/home/vcap/app/scripts/smoke-test.py" --name smoke-test
cf_org: gsa-datagov
cf_space: ${{vars.ENVIRONMENT_NAME}}
cf_username: ${{secrets.CF_SERVICE_USER}}
Expand All @@ -108,7 +108,7 @@
uses: cloud-gov/cg-cli-tools@main
with:
command: >
scripts/monitor-cf-logs.sh datagov-harvesting-logic smoke-test
scripts/monitor-cf-logs.sh harvesting-logic smoke-test
cf_org: gsa-datagov
cf_space: ${{vars.ENVIRONMENT_NAME}}
cf_username: ${{secrets.CF_SERVICE_USER}}
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ tmp/

# vscode debugger
.vscode/
.env

17 changes: 17 additions & 0 deletions .profile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

set -o errexit
set -o pipefail

function vcap_get_service () {
local path name
name="$1"
path="$2"
service_name=${APP_NAME}-${name}
echo $VCAP_SERVICES | jq --raw-output --arg service_name "$service_name" ".[][] | $path"
}

export APP_NAME=$(echo $VCAP_APPLICATION | jq -r '.application_name')

export URI=$(vcap_get_service aws-rds .credentials.uri)
export DATABASE_URI=$(echo $URI | sed 's/postgres:\/\//postgresql:\/\//g')
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10.0
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.10

WORKDIR /app

COPY . /app

RUN pip install --no-cache-dir -r requirements.txt

EXPOSE 8080

CMD ["python", "app.py"]
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ pypi-upload: build-dist ## Uploads new package to PyPi after clean, build
build-dist: clean-dist ## Builds new package dist
poetry build --verbose

build: ## build Flask app
docker compose build app

clean-dist: ## Cleans dist dir
rm -rf dist/*

Expand Down
54 changes: 54 additions & 0 deletions README.md
Jin-Sun-tts marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,57 @@ If you followed the instructions for `CKAN load testing` and `Harvester testing`

- `./test/harvest_sources/dcatus.json`
- Represents an original harvest source prior to change occuring.


## Flask App

### Local development

1. set your local configurations in `.env` file.

2. Use the Makefile to set up local Docker containers, including a PostgreSQL database and the Flask application:

```bash
make build
make up
make test
make clean
```

This will start the necessary services and execute the test.

### Deployment to cloud.gov

#### Database Service Setup

A database service is required for use on cloud.gov.

In a given Cloud Foundry `space`, a db can be created with
`cf create-service <service offering> <plan> <service instance>`.

In dev, for example, the db was created with
`cf create-service aws-rds micro-psql harvesting-logic-db`.

Creating databases for the other spaces should follow the same pattern, though the size may need to be adjusted (see available AWS RDS service offerings with `cf marketplace -e aws-rds`).

Any created service needs to be bound to an app with `cf bind-service <app> <service>`. With the above example, the db can be bound with
`cf bind-service harvesting-logic harvesting-logic-db`.

Accessing the service can be done with service keys. They can be created with `cf create-service-keys`, listed with `cf service-keys`, and shown with

`cf service-key <service-key-name>`.

#### Manually Deploying the Flask Application

1. Ensure you have a `manifest.yml` and `vars.yml` file configured for your Flask application. The `vars.yml` file may include variables:

```bash
app_name: harvesting-logic
database_name: harvesting-logic-db
```

2. Deploy the application using Cloud Foundry's `cf push` command with the variable file:

```bash
cf push --vars-file vars.yml
```
91 changes: 91 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from flask import Flask, request
from harvester.database.interface import HarvesterDBInterface
from harvester.database import init_db
from tests.database.data import new_source, new_job, new_error

app = Flask(__name__)
db = HarvesterDBInterface()

@app.route('/', methods=['GET'])
def index():
html = "<b>" + init_db.create_tables() + "</b>"
html += "<ul>"
for rule in app.url_map.iter_rules():
if 'static' not in rule.endpoint and 'index' not in rule.endpoint:
html += (f"<li>{rule.endpoint} : "
f"<a href='{rule.rule}'>{rule.rule}</a></li><br>")
html += "</ul>"
return html

@app.route('/add_source', methods=['GET'])
def add_harvest_source():
source=db.add_harvest_source(new_source)
Jin-Sun-tts marked this conversation as resolved.
Show resolved Hide resolved
return(f"Added new source with ID: {source.id}")

@app.route('/add_job', methods=['GET'])
def add_harvest_job():
source_id = request.args.get('source_id', None)
if source_id is None:
return 'Please provide source_id: /add_job?source_id=xxx'
else:
job=db.add_harvest_job(new_job, source_id)
Jin-Sun-tts marked this conversation as resolved.
Show resolved Hide resolved
return(f"Added new job with ID: {job.id}")

@app.route('/add_error', methods=['GET'])
def add_harvest_error():
job_id = request.args.get('job_id', None)
if job_id is None:
return 'Please provide job_id: /add_error?job_id=xxx'
else:
err=db.add_harvest_error(new_error, job_id)
Jin-Sun-tts marked this conversation as resolved.
Show resolved Hide resolved
return(f"Added new error with ID: {err.id}")
Jin-Sun-tts marked this conversation as resolved.
Show resolved Hide resolved

@app.route('/harvest_sources', methods=['GET'])
def get_all_harvest_sources():
result = db.get_all_harvest_sources()
return result

@app.route('/harvest_jobs', methods=['GET'])
def get_all_harvest_jobs():
result = db.get_all_harvest_jobs()
return result

@app.route('/harvest_errors_by_job/<job_id>', methods=['GET'])
def get_all_harvest_errors_by_job(job_id):
try:
result = db.get_all_harvest_errors_by_job(job_id)
return result
except Exception:
return " provide job_id"


@app.route('/harvest_source/<source_id>', methods=['GET'])
def get_harvest_source(source_id):
Jin-Sun-tts marked this conversation as resolved.
Show resolved Hide resolved
try:
result = db.get_harvest_source(source_id)
return result
except Exception:
return " provide source_id"

@app.route('/harvest_job/<job_id>', methods=['GET'])
def get_harvest_job(job_id):
try:
result = db.get_harvest_job(job_id)
return result
except Exception:
return "provide job_id"

@app.route('/harvest_error/<error_id>', methods=['GET'])
def get_harvest_error(error_id):
try:
result = db.get_harvest_error(error_id)
return result
except Exception:
return "provide error_id"

@app.teardown_appcontext
def shutdown_session(exception=None):
db.close()

if __name__ == '__main__':
app.run(host="0.0.0.0", port=8080)
17 changes: 17 additions & 0 deletions create_cloudgov_service.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/sh

set -e

# If an argument was provided, use it as the service name prefix.
# Otherwise default to "harvesting-logic".
app_name=${1:-harvesting-logic}

# Get the current space and trim leading whitespace
space=$(cf target | grep space | cut -d : -f 2 | xargs)

# Production and staging should use bigger DB instances
if [ "$space" = "prod" ] || [ "$space" = "staging" ]; then
cf service "${app_name}-db" > /dev/null 2>&1 || cf create-service aws-rds medium-psql-redundant "${app_name}-db" --wait&
else
cf service "${app_name}-db" > /dev/null 2>&1 || cf create-service aws-rds micro-psql "${app_name}-db" --wait&
fi
Jin-Sun-tts marked this conversation as resolved.
Show resolved Hide resolved
27 changes: 26 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,29 @@ services:
- DOCKER_HOST=unix:///var/run/docker.sock
- DEFAULT_REGION=us-east-1
volumes:
- "./tmp/localstack:/var/lib/localstack"
- "./tmp/localstack:/var/lib/localstack"
db:
image: postgres:latest
restart: always
env_file:
- .env
environment:
POSTGRES_USER: ${DATABASE_USER}
POSTGRES_PASSWORD: ${DATABASE_PASSWORD}
POSTGRES_DB: ${DATABASE_NAME}
ports:
- "${DATABASE_PORT}:5432"
volumes:
- postgres_data:/var/lib/postgresql/data

app:
build: .
depends_on:
- db
environment:
DATABASE_URI: ${DATABASE_URI}
ports:
- "8080:8080"

volumes:
postgres_data:
3 changes: 3 additions & 0 deletions harvester/database/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import os

DATABASE_URI = os.getenv('DATABASE_URI')
14 changes: 14 additions & 0 deletions harvester/database/init_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from sqlalchemy import create_engine
from harvester.database.models import Base
from sqlalchemy.engine.reflection import Inspector
from . import DATABASE_URI

def create_tables():
engine = create_engine(DATABASE_URI)
Base.metadata.create_all(engine)
inspector = Inspector.from_engine(engine)
table_names = inspector.get_table_names()
return (f"Database tables : {table_names}")



Loading
Loading