diff --git a/Dockerfile b/Dockerfile index 72d4487..6f178c7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,17 +2,8 @@ FROM node:8-alpine WORKDIR /home/node -RUN apk add --no-cache curl git jq && \ - git clone https://github.com/GPII/universal.git && \ - cd universal && \ - rm -f package-lock.json && \ - npm install json5 && \ - npm install fs && \ - npm install rimraf && \ - npm install mkdirp && \ - node scripts/convertPrefs.js testData/preferences/ build/dbData/ && \ - apk del git +RUN apk add --no-cache curl git jq -COPY loadData.sh /usr/local/bin +COPY deleteAndLoadSnapsets.sh /usr/local/bin/ -CMD ["/usr/local/bin/loadData.sh"] +CMD ["/usr/local/bin/deleteAndLoadSnapsets.sh"] diff --git a/README.md b/README.md index cfefebe..828d196 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,13 @@ # CouchDB Data Loader -Builds a [sidecar container](http://blog.kubernetes.io/2015/06/the-distributed-system-toolkit-patterns.html) with the CouchDB data from the GPII/universal repository baked in and a mechanism for loading them into a CouchDB database. +Builds a [sidecar container](http://blog.kubernetes.io/2015/06/the-distributed-system-toolkit-patterns.html) that contains the `git` command and a shell script for setting up a CouchDB database. When the docker image is run, this sequence is executed: +1. Clones the latest version of [GPII universal](https://github.com/gpii/universal/), +1. Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, +1. Creates a CouchDB database if none exits, +1. Optionally clears an existing database of all its records, +1. Updates the database with respect to its `design/views` document, as required, +1. Deletes any snapsets currently in the database, +1. Loads the latest snapsets created at the second step into the database. ## Building @@ -11,7 +18,7 @@ Builds a [sidecar container](http://blog.kubernetes.io/2015/06/the-distributed-s - `COUCHDB_URL`: URL of the CouchDB database. (required) - `CLEAR_INDEX`: If defined, the database at $COUCHDB_URL will be deleted and recreated. (optional) - `STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) -- `BUILD_DATA_DIR`: The directory where the data built from a npm step resides. (optional) +- `BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) The use of environment variables for data directories is useful if you want to mount the database data using a Docker volume and point the data loader at it. @@ -28,8 +35,14 @@ $ docker run -d -p 8081:8081 --name preferences --link couchdb -e NODE_ENV=gpii. ``` -Loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory): +Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory). The first version has the optional `CLEAR_INDEX` set to erase and reset the database prior to other database changes: ``` $ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=1 gpii/gpii-dataloader ``` + +The second version has `CLEAR_INDEX` set to nothing such that any existing database is left intact prior to subsequent changes to it (e.g., deleting the snapsets): + +``` +$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX= gpii/gpii-dataloader +``` diff --git a/deleteAndLoadSnapsets.sh b/deleteAndLoadSnapsets.sh new file mode 100755 index 0000000..5c77246 --- /dev/null +++ b/deleteAndLoadSnapsets.sh @@ -0,0 +1,79 @@ +#!/bin/sh + +STATIC_DATA_DIR=${STATIC_DATA_DIR:-/home/node/universal/testData/dbData} +BUILD_DATA_DIR=${BUILD_DATA_DIR:-/home/node/universal/build/dbData/snapset} + +log() { + echo "$(date +'%Y-%m-%d %H:%M:%S') - $1" +} + +warm_indices(){ + log "Warming indices..." + + for view in $(curl -s $COUCHDB_URL/_design/views/ | jq -r '.views | keys[]'); do + curl -fsS $COUCHDB_URL/_design/views/_view/$view >/dev/null + done + + log "Finished warming indices..." +} + +# Verify variables +if [ -z "$COUCHDB_URL" ]; then + echo "COUCHDB_URL environment variable must be defined" + exit 1 +fi + +if [ ! -d "$STATIC_DATA_DIR" -o ! "$(ls -A $STATIC_DATA_DIR/*.json)" ]; then + echo "STATIC_DATA_DIR ($STATIC_DATA_DIR) does not exist or does not contain data, using universal's 'testData/dbData' as the default" + STATIC_DATA_DIR=./testData/dbData +fi + +if [ ! -d "$BUILD_DATA_DIR" -o ! "$(ls -A $BUILD_DATA_DIR/*.json)" ]; then + echo "BUILD_DATA_DIR ($BUILD_DATA_DIR) does not exist or does not contain data, using universal's 'build/dbData/snapset' as the default" + BUILD_DATA_DIR=./build/dbData/snapset +fi + +COUCHDB_URL_SANITIZED=`echo "$COUCHDB_URL" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g'` + +log "Starting" +log "CouchDB: $COUCHDB_URL_SANITIZED" +log "Clear index: $CLEAR_INDEX" +log "Static: $STATIC_DATA_DIR" +log "Build: $BUILD_DATA_DIR" +log "Working directory: `pwd`" + +# Set up universal +git clone --depth 1 https://github.com/GPII/universal.git +cd universal + +npm install json5 +npm install fs +npm install rimraf +npm install mkdirp +npm install infusion +rm -f package-lock.json +node scripts/convertPrefs.js testData/preferences/ build/dbData/snapset/ snapset + +# Initialize (possibly clear) data base +if [ ! -z "$CLEAR_INDEX" ]; then + log "Deleting database at $COUCHDB_URL_SANITIZED" + if ! curl -fsS -X DELETE "$COUCHDB_URL"; then + log "Error deleting database" + fi +fi + +log "Creating database at $COUCHDB_URL_SANITIZED" +if ! curl -fsS -X PUT "$COUCHDB_URL"; then + log "Database already exists at $COUCHDB_URL_SANITIZED" +fi + +# Submit data +node scripts/deleteAndLoadSnapsets.js $COUCHDB_URL $STATIC_DATA_DIR $BUILD_DATA_DIR +err=$? +if [ $err != 0 ]; then + log "deleteAndLoadSnapsets.js failed with $err, exiting" + exit $err +fi + +# Warm Data +warm_indices diff --git a/loadData.sh b/loadData.sh deleted file mode 100755 index f48b0a9..0000000 --- a/loadData.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/bin/sh - -STATIC_DATA_DIR=${STATIC_DATA_DIR:-/home/node/universal/testData/dbData} -BUILD_DATA_DIR=${BUILD_DATA_DIR:-/home/node/universal/build/dbData} - -log() { - echo "$(date +'%Y-%m-%d %H:%M:%S') - $1" -} - -warm_indices(){ - log "Warming indices..." - - for view in $(curl -s $COUCHDB_URL/_design/views/ | jq -r '.views | keys[]'); do - curl -fsS $COUCHDB_URL/_design/views/_view/$view >/dev/null - done - - log "Finished warming indices..." -} - -loadData() { - log "Loading data from $1" - - for file in $1/*.json; do - log "Submitting $file" - - curl -H 'Content-Type: application/json' -X POST "$COUCHDB_URL/_bulk_docs" -d @- <