From d6cb062fa69d7d2bd7c6fce52f5dbc30b7f88f27 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Thu, 18 Oct 2018 23:05:31 +0100 Subject: [PATCH 01/11] GPII-3138: Import deleteAndLoadSnapsets.sh and DataLoader.md --- documentation/DataLoader.md | 48 +++++++++++++++++++ scripts/deleteAndLoadSnapsets.sh | 79 ++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+) create mode 100644 documentation/DataLoader.md create mode 100755 scripts/deleteAndLoadSnapsets.sh diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md new file mode 100644 index 000000000..828d19699 --- /dev/null +++ b/documentation/DataLoader.md @@ -0,0 +1,48 @@ +# CouchDB Data Loader + +Builds a [sidecar container](http://blog.kubernetes.io/2015/06/the-distributed-system-toolkit-patterns.html) that contains the `git` command and a shell script for setting up a CouchDB database. When the docker image is run, this sequence is executed: +1. Clones the latest version of [GPII universal](https://github.com/gpii/universal/), +1. Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, +1. Creates a CouchDB database if none exits, +1. Optionally clears an existing database of all its records, +1. Updates the database with respect to its `design/views` document, as required, +1. Deletes any snapsets currently in the database, +1. Loads the latest snapsets created at the second step into the database. + +## Building + +- `docker build -t gpii/gpii-dataloader .` + +## Environment Variables + +- `COUCHDB_URL`: URL of the CouchDB database. (required) +- `CLEAR_INDEX`: If defined, the database at $COUCHDB_URL will be deleted and recreated. (optional) +- `STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) +- `BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) + +The use of environment variables for data directories is useful if you want to mount the database data using a Docker volume and point the data loader at it. + +Note that since [the docker doesn't support the environment variable type of array](https://github.com/moby/moby/issues/20169), two separate environment variables are used for inputting data directories instead of one array that holds these directories. + +## Running + +Example using containers: + +``` +$ docker run -d -p 5984:5984 --name couchdb couchdb +$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=1 gpii/gpii-dataloader +$ docker run -d -p 8081:8081 --name preferences --link couchdb -e NODE_ENV=gpii.config.preferencesServer.standalone.production -e PREFERENCESSERVER_LISTEN_PORT=8081 -e DATASOURCE_HOSTNAME=http://couchdb -e DATASOURCE_PORT=5984 vagrant-universal + +``` + +Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory). The first version has the optional `CLEAR_INDEX` set to erase and reset the database prior to other database changes: + +``` +$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=1 gpii/gpii-dataloader +``` + +The second version has `CLEAR_INDEX` set to nothing such that any existing database is left intact prior to subsequent changes to it (e.g., deleting the snapsets): + +``` +$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX= gpii/gpii-dataloader +``` diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh new file mode 100755 index 000000000..5c7724662 --- /dev/null +++ b/scripts/deleteAndLoadSnapsets.sh @@ -0,0 +1,79 @@ +#!/bin/sh + +STATIC_DATA_DIR=${STATIC_DATA_DIR:-/home/node/universal/testData/dbData} +BUILD_DATA_DIR=${BUILD_DATA_DIR:-/home/node/universal/build/dbData/snapset} + +log() { + echo "$(date +'%Y-%m-%d %H:%M:%S') - $1" +} + +warm_indices(){ + log "Warming indices..." + + for view in $(curl -s $COUCHDB_URL/_design/views/ | jq -r '.views | keys[]'); do + curl -fsS $COUCHDB_URL/_design/views/_view/$view >/dev/null + done + + log "Finished warming indices..." +} + +# Verify variables +if [ -z "$COUCHDB_URL" ]; then + echo "COUCHDB_URL environment variable must be defined" + exit 1 +fi + +if [ ! -d "$STATIC_DATA_DIR" -o ! "$(ls -A $STATIC_DATA_DIR/*.json)" ]; then + echo "STATIC_DATA_DIR ($STATIC_DATA_DIR) does not exist or does not contain data, using universal's 'testData/dbData' as the default" + STATIC_DATA_DIR=./testData/dbData +fi + +if [ ! -d "$BUILD_DATA_DIR" -o ! "$(ls -A $BUILD_DATA_DIR/*.json)" ]; then + echo "BUILD_DATA_DIR ($BUILD_DATA_DIR) does not exist or does not contain data, using universal's 'build/dbData/snapset' as the default" + BUILD_DATA_DIR=./build/dbData/snapset +fi + +COUCHDB_URL_SANITIZED=`echo "$COUCHDB_URL" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g'` + +log "Starting" +log "CouchDB: $COUCHDB_URL_SANITIZED" +log "Clear index: $CLEAR_INDEX" +log "Static: $STATIC_DATA_DIR" +log "Build: $BUILD_DATA_DIR" +log "Working directory: `pwd`" + +# Set up universal +git clone --depth 1 https://github.com/GPII/universal.git +cd universal + +npm install json5 +npm install fs +npm install rimraf +npm install mkdirp +npm install infusion +rm -f package-lock.json +node scripts/convertPrefs.js testData/preferences/ build/dbData/snapset/ snapset + +# Initialize (possibly clear) data base +if [ ! -z "$CLEAR_INDEX" ]; then + log "Deleting database at $COUCHDB_URL_SANITIZED" + if ! curl -fsS -X DELETE "$COUCHDB_URL"; then + log "Error deleting database" + fi +fi + +log "Creating database at $COUCHDB_URL_SANITIZED" +if ! curl -fsS -X PUT "$COUCHDB_URL"; then + log "Database already exists at $COUCHDB_URL_SANITIZED" +fi + +# Submit data +node scripts/deleteAndLoadSnapsets.js $COUCHDB_URL $STATIC_DATA_DIR $BUILD_DATA_DIR +err=$? +if [ $err != 0 ]; then + log "deleteAndLoadSnapsets.js failed with $err, exiting" + exit $err +fi + +# Warm Data +warm_indices From 59a4eb41ba36e3992eb60a114b8b49479f9f219e Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 13:58:30 +0100 Subject: [PATCH 02/11] GPII-3138: Add dataLoader dependencies to docker image (jq and curl) --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 024de9f2f..559ae1367 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,7 @@ WORKDIR /app COPY . /app RUN apk add --no-cache --virtual build-dependencies python make git g++ && \ + apk add --no-cache curl jq && \ npm install && \ chown -R node:node . && \ npm cache clean --force && \ From 9370d184c966526cd713432c8e535f96bc2f514c Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:05:34 +0100 Subject: [PATCH 03/11] GPII-3138: Modify convertPrefs.js to expect directories without the final slash --- scripts/convertPrefs.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/convertPrefs.js b/scripts/convertPrefs.js index d3429cabc..3c6a44075 100644 --- a/scripts/convertPrefs.js +++ b/scripts/convertPrefs.js @@ -45,7 +45,7 @@ rimraf(targetDir, function () { filenames.forEach(function (filename) { if (filename.endsWith(".json5")) { var gpiiKey = filename.substr(0, filename.length - 6); - var preferences = fs.readFileSync(inputDir + filename, "utf-8"); + var preferences = fs.readFileSync(inputDir + "/" + filename, "utf-8"); var currentTime = new Date().toISOString(); var prefsSafeId = "prefsSafe-" + gpiiKey; @@ -80,11 +80,11 @@ rimraf(targetDir, function () { }); // Write the target files - var prefsSafesFile = targetDir + "prefsSafes.json"; + var prefsSafesFile = targetDir + "/prefsSafes.json"; console.log("prefsSafesFile: " + prefsSafesFile); fs.writeFileSync(prefsSafesFile, JSON.stringify(prefsSafes, null, 4)); - var gpiiKeysFile = targetDir + "gpiiKeys.json"; + var gpiiKeysFile = targetDir + "/gpiiKeys.json"; fs.writeFileSync(gpiiKeysFile, JSON.stringify(gpiiKeys, null, 4)); console.log("Finished converting preferences data in the source directory " + inputDir + " to the target directory " + targetDir); From 0808a24d7306800073b5d40cb72e46a63e2e1e83 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:15:14 +0100 Subject: [PATCH 04/11] GPII-3138: Update data loader script to reflect new location in universal docker image --- scripts/deleteAndLoadSnapsets.sh | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index 5c7724662..2e11c237d 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -1,7 +1,12 @@ #!/bin/sh +APP_DIR=${APP_DIR:-"/app"} -STATIC_DATA_DIR=${STATIC_DATA_DIR:-/home/node/universal/testData/dbData} -BUILD_DATA_DIR=${BUILD_DATA_DIR:-/home/node/universal/build/dbData/snapset} +STATIC_DATA_DIR=${STATIC_DATA_DIR:-"${APP_DIR}/testData/dbData"} +PREFERENCES_DATA_DIR=${PREFERENCES_DATA_DIR:-"${APP_DIR}/testData/preferences"} +BUILD_DATA_DIR=${BUILD_DATA_DIR:-'/tmp/build/dbData'} + +DATALOADER_JS="${APP_DIR}/scripts/deleteAndLoadSnapsets.js" +CONVERT_JS="${APP_DIR}/scripts/convertPrefs.js" log() { echo "$(date +'%Y-%m-%d %H:%M:%S') - $1" @@ -42,17 +47,17 @@ log "Static: $STATIC_DATA_DIR" log "Build: $BUILD_DATA_DIR" log "Working directory: `pwd`" -# Set up universal -git clone --depth 1 https://github.com/GPII/universal.git -cd universal -npm install json5 -npm install fs -npm install rimraf -npm install mkdirp -npm install infusion -rm -f package-lock.json -node scripts/convertPrefs.js testData/preferences/ build/dbData/snapset/ snapset +# Convert preferences json5 to GPII keys and preferences safes +if [ -d "${PREFERENCES_DATA_DIR}" ]; then + node "${CONVERT_JS}" "${PREFERENCES_DATA_DIR}" "${BUILD_DATA_DIR}" snapset + if [ "$?" != '0' ]; then + log "[ERROR] ${CONVERT_JS} failed (exit code: $?)" + exit 1 + fi +else + log "PREFERENCES_DATA_DIR ($PREFERENCES_DATA_DIR) does not exist, nothing to convert" +fi # Initialize (possibly clear) data base if [ ! -z "$CLEAR_INDEX" ]; then @@ -68,7 +73,7 @@ if ! curl -fsS -X PUT "$COUCHDB_URL"; then fi # Submit data -node scripts/deleteAndLoadSnapsets.js $COUCHDB_URL $STATIC_DATA_DIR $BUILD_DATA_DIR +node "${DATALOADER_JS}" "${COUCHDB_URL}" "${STATIC_DATA_DIR}" "${BUILD_DATA_DIR}" err=$? if [ $err != 0 ]; then log "deleteAndLoadSnapsets.js failed with $err, exiting" From 2c6c50daf6fa5f3ff61477aa1390ca7b93d9956a Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:23:58 +0100 Subject: [PATCH 05/11] GPII-3138: Update directory checks to match containerised environment --- scripts/deleteAndLoadSnapsets.sh | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index 2e11c237d..c1ea15e8f 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -28,15 +28,6 @@ if [ -z "$COUCHDB_URL" ]; then exit 1 fi -if [ ! -d "$STATIC_DATA_DIR" -o ! "$(ls -A $STATIC_DATA_DIR/*.json)" ]; then - echo "STATIC_DATA_DIR ($STATIC_DATA_DIR) does not exist or does not contain data, using universal's 'testData/dbData' as the default" - STATIC_DATA_DIR=./testData/dbData -fi - -if [ ! -d "$BUILD_DATA_DIR" -o ! "$(ls -A $BUILD_DATA_DIR/*.json)" ]; then - echo "BUILD_DATA_DIR ($BUILD_DATA_DIR) does not exist or does not contain data, using universal's 'build/dbData/snapset' as the default" - BUILD_DATA_DIR=./build/dbData/snapset -fi COUCHDB_URL_SANITIZED=`echo "$COUCHDB_URL" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g'` @@ -47,6 +38,10 @@ log "Static: $STATIC_DATA_DIR" log "Build: $BUILD_DATA_DIR" log "Working directory: `pwd`" +# Create build dir if it does not exist +if [ ! -d "${BUILD_DATA_DIR}" ]; then + mkdir -p "${BUILD_DATA_DIR}" +fi # Convert preferences json5 to GPII keys and preferences safes if [ -d "${PREFERENCES_DATA_DIR}" ]; then From 776b880e7e5b120b03801e6e651c65a2ce40ebe4 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:26:44 +0100 Subject: [PATCH 06/11] GPII-3138: Minor formatting changes --- scripts/deleteAndLoadSnapsets.sh | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index c1ea15e8f..fe4192aca 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -15,28 +15,28 @@ log() { warm_indices(){ log "Warming indices..." - for view in $(curl -s $COUCHDB_URL/_design/views/ | jq -r '.views | keys[]'); do - curl -fsS $COUCHDB_URL/_design/views/_view/$view >/dev/null + for view in $(curl -s "${COUCHDB_URL}/_design/views/" | jq -r '.views | keys[]'); do + curl -fsS "${COUCHDB_URL}/_design/views/_view/${view}" >/dev/null done log "Finished warming indices..." } # Verify variables -if [ -z "$COUCHDB_URL" ]; then +if [ -z "${COUCHDB_URL}" ]; then echo "COUCHDB_URL environment variable must be defined" exit 1 fi +COUCHDB_URL_SANITIZED=$(echo "${COUCHDB_URL}" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g') -COUCHDB_URL_SANITIZED=`echo "$COUCHDB_URL" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g'` +log 'Starting' +log "CouchDB: ${COUCHDB_URL_SANITIZED}" +log "Clear index: ${CLEAR_INDEX}" +log "Static: ${STATIC_DATA_DIR}" +log "Build: ${BUILD_DATA_DIR}" +log "Working directory: $(pwd)" -log "Starting" -log "CouchDB: $COUCHDB_URL_SANITIZED" -log "Clear index: $CLEAR_INDEX" -log "Static: $STATIC_DATA_DIR" -log "Build: $BUILD_DATA_DIR" -log "Working directory: `pwd`" # Create build dir if it does not exist if [ ! -d "${BUILD_DATA_DIR}" ]; then @@ -55,24 +55,24 @@ else fi # Initialize (possibly clear) data base -if [ ! -z "$CLEAR_INDEX" ]; then - log "Deleting database at $COUCHDB_URL_SANITIZED" - if ! curl -fsS -X DELETE "$COUCHDB_URL"; then +if [ "${CLEAR_INDEX}" == 'true' ]; then + log "Deleting database at ${COUCHDB_URL_SANITIZED}" + if ! curl -fsS -X DELETE "${COUCHDB_URL}"; then log "Error deleting database" fi fi -log "Creating database at $COUCHDB_URL_SANITIZED" -if ! curl -fsS -X PUT "$COUCHDB_URL"; then - log "Database already exists at $COUCHDB_URL_SANITIZED" +log "Creating database at ${COUCHDB_URL_SANITIZED}" +if ! curl -fsS -X PUT "${COUCHDB_URL}"; then + log "Database already exists at ${COUCHDB_URL_SANITIZED}" fi # Submit data node "${DATALOADER_JS}" "${COUCHDB_URL}" "${STATIC_DATA_DIR}" "${BUILD_DATA_DIR}" err=$? -if [ $err != 0 ]; then - log "deleteAndLoadSnapsets.js failed with $err, exiting" - exit $err +if [ "${err}" != '0' ]; then + log "${DATALOADER_JS} failed with ${err}, exiting" + exit "${err}" fi # Warm Data From d588bf231c4bb6ea0c48057ae87bffbf5b3c2782 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:27:16 +0100 Subject: [PATCH 07/11] GPII-3138: Add DB connectivity check to dataloader --- scripts/deleteAndLoadSnapsets.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index fe4192aca..f27730d07 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -37,6 +37,13 @@ log "Static: ${STATIC_DATA_DIR}" log "Build: ${BUILD_DATA_DIR}" log "Working directory: $(pwd)" +# Check we can connect to CouchDB +COUCHDB_URL_ROOT=$(echo "${COUCHDB_URL}" | sed 's/[^\/]*$//g') +RET_CODE=$(curl --write-out '%{http_code}' --silent --output /dev/null "${COUCHDB_URL_ROOT}/_up") +if [ "$RET_CODE" != '200' ]; then + log "[ERROR] Failed to connect to CouchDB: ${COUCHDB_URL_SANITIZED}" + exit 1 +fi # Create build dir if it does not exist if [ ! -d "${BUILD_DATA_DIR}" ]; then From 6e6da162ba894e5a3159c5a061d9b2561aede48a Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:28:16 +0100 Subject: [PATCH 08/11] GPII-3138: Update vagrantCloudBasedContainers.sh to wor with omported data loader --- scripts/vagrantCloudBasedContainers.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/vagrantCloudBasedContainers.sh b/scripts/vagrantCloudBasedContainers.sh index 49bcc8d4f..3e95d8bc9 100755 --- a/scripts/vagrantCloudBasedContainers.sh +++ b/scripts/vagrantCloudBasedContainers.sh @@ -6,7 +6,7 @@ # It builds a Docker image for GPII/universal and uses it to start two # components: the Preferences Server and the Flow Manager. # -# It also starts a CouchDB container and loads the CouchDB data into +# It also starts a CouchDB container and data into # it, so tests running against the GPII components will have access to the # latest test data. # @@ -35,16 +35,16 @@ COUCHDB_HEALTHCHECK_TIMEOUT=30 if [ "$NO_REBUILD" == "true" ] ; then CLEAR_INDEX= else - CLEAR_INDEX=1 + CLEAR_INDEX='true' fi UNIVERSAL_DIR="/home/vagrant/sync/universal" STATIC_DATA_DIR="$UNIVERSAL_DIR/testData/dbData" BUILD_DATA_DIR="$UNIVERSAL_DIR/build/dbData/snapset" -DATALOADER_IMAGE="herrclown/gpii-dataloader" DATALOADER_COUCHDB_URL="http://couchdb:${COUCHDB_PORT}/gpii" DATASOURCE_HOSTNAME="http://couchdb" +DATALOADER_CMD='/app/scripts/deleteAndLoadSnapsets.sh' GPII_PREFERENCES_CONFIG="gpii.config.preferencesServer.standalone.production" GPII_PREFERENCES_PORT=9081 @@ -82,7 +82,7 @@ docker run -d -p $COUCHDB_PORT:$COUCHDB_PORT --name couchdb $COUCHDB_IMAGE wget -O /dev/null --retry-connrefused --waitretry=$COUCHDB_HEALTHCHECK_DELAY --read-timeout=20 --timeout=1 --tries=$COUCHDB_HEALTHCHECK_TIMEOUT http://localhost:$COUCHDB_PORT # Load the CouchDB data -docker run --rm --link couchdb -v $STATIC_DATA_DIR:/static_data -e STATIC_DATA_DIR=/static_data -v $BUILD_DATA_DIR:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=$DATALOADER_COUCHDB_URL -e CLEAR_INDEX=$CLEAR_INDEX $DATALOADER_IMAGE +docker run --rm --link couchdb -v $STATIC_DATA_DIR:/static_data -e STATIC_DATA_DIR=/static_data -v $BUILD_DATA_DIR:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=$DATALOADER_COUCHDB_URL -e CLEAR_INDEX=$CLEAR_INDEX $UNIVERSAL_IMAGE $DATALOADER_CMD # Wait for the CouchDB views become accessible. Accessing the view URL forced the view index to build which take time. # The URL returns 500 when the index is not ready, so use "--retry-on-http-error" option to continue retries at 500 response code. From e55d8c13e21dc2bc3591ba06bab600ba56968945 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:33:19 +0100 Subject: [PATCH 09/11] GPII-3138: Update DataLoader docs to reflect current state --- documentation/DataLoader.md | 29 ++++++++++++++--------------- documentation/README.md | 1 + 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md index 828d19699..da87f5c2e 100644 --- a/documentation/DataLoader.md +++ b/documentation/DataLoader.md @@ -1,22 +1,21 @@ # CouchDB Data Loader +(`scripts/deleteAndLoadSnapsets.sh`) -Builds a [sidecar container](http://blog.kubernetes.io/2015/06/the-distributed-system-toolkit-patterns.html) that contains the `git` command and a shell script for setting up a CouchDB database. When the docker image is run, this sequence is executed: -1. Clones the latest version of [GPII universal](https://github.com/gpii/universal/), +This script is used to setup CouchDB database and is executed as a Kubernetes +batch Job every time new version of the universal image is deployed to the +cluster (also when cluster is initially created). + +It does following: 1. Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, +1. Optionally deletes existing database, 1. Creates a CouchDB database if none exits, -1. Optionally clears an existing database of all its records, 1. Updates the database with respect to its `design/views` document, as required, -1. Deletes any snapsets currently in the database, -1. Loads the latest snapsets created at the second step into the database. - -## Building - -- `docker build -t gpii/gpii-dataloader .` +1. Loads the latest snapsets created into the database. ## Environment Variables - `COUCHDB_URL`: URL of the CouchDB database. (required) -- `CLEAR_INDEX`: If defined, the database at $COUCHDB_URL will be deleted and recreated. (optional) +- `CLEAR_INDEX`: If set to `true`, the database at $COUCHDB_URL will be deleted and recreated. (optional) - `STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) - `BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) @@ -30,19 +29,19 @@ Example using containers: ``` $ docker run -d -p 5984:5984 --name couchdb couchdb -$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=1 gpii/gpii-dataloader +$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh $ docker run -d -p 8081:8081 --name preferences --link couchdb -e NODE_ENV=gpii.config.preferencesServer.standalone.production -e PREFERENCESSERVER_LISTEN_PORT=8081 -e DATASOURCE_HOSTNAME=http://couchdb -e DATASOURCE_PORT=5984 vagrant-universal ``` -Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory). The first version has the optional `CLEAR_INDEX` set to erase and reset the database prior to other database changes: +Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory). The first version has the optional `CLEAR_INDEX` set to true to erase and reset the database prior to other database changes: ``` -$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=1 gpii/gpii-dataloader +$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` -The second version has `CLEAR_INDEX` set to nothing such that any existing database is left intact prior to subsequent changes to it (e.g., deleting the snapsets): +The second version does not set `CLEAR_INDEX` such that any existing database is left intact prior to subsequent changes to it (e.g., deleting the snapsets): ``` -$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX= gpii/gpii-dataloader +$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` diff --git a/documentation/README.md b/documentation/README.md index 2de699630..c3952effe 100644 --- a/documentation/README.md +++ b/documentation/README.md @@ -9,6 +9,7 @@ * [Preferences Server](PreferencesServer.md) * [Data Model for Preferences and OAuth Data](DataModel.md) * [Pouch Manager](PouchManager.md) + * [Data Loader](DataLoader.md) * [MatchMakerFramework](MatchMakerFramework.md) * [Flat Match Maker](FlatMatchMaker.md) * [Apptology](Apptology.md) From ab32b8b268f9e6c350aa6654452cdec97cfa1cc5 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 19:42:55 +0100 Subject: [PATCH 10/11] GPII-3138: Fix linting errors on DataLoader.md --- documentation/DataLoader.md | 59 ++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md index da87f5c2e..acee04e7a 100644 --- a/documentation/DataLoader.md +++ b/documentation/DataLoader.md @@ -1,16 +1,17 @@ # CouchDB Data Loader + (`scripts/deleteAndLoadSnapsets.sh`) -This script is used to setup CouchDB database and is executed as a Kubernetes -batch Job every time new version of the universal image is deployed to the -cluster (also when cluster is initially created). +This script is used to setup CouchDB database and is executed as a Kubernetes batch Job every time new version of the +universal image is deployed to the cluster (also when cluster is initially created). It does following: -1. Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, -1. Optionally deletes existing database, -1. Creates a CouchDB database if none exits, -1. Updates the database with respect to its `design/views` document, as required, -1. Loads the latest snapsets created into the database. + +- Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, +- Optionally deletes existing database, +- Creates a CouchDB database if none exits, +- Updates the database with respect to its `design/views` document, as required, +- Loads the latest snapsets created into the database. ## Environment Variables @@ -19,29 +20,47 @@ It does following: - `STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) - `BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) -The use of environment variables for data directories is useful if you want to mount the database data using a Docker volume and point the data loader at it. +The use of environment variables for data directories is useful if you want to mount the database data using a Docker +volume and point the data loader at it. -Note that since [the docker doesn't support the environment variable type of array](https://github.com/moby/moby/issues/20169), two separate environment variables are used for inputting data directories instead of one array that holds these directories. +Note that since [the docker doesn't support the environment variable type of +array](https://github.com/moby/moby/issues/20169), two separate environment variables are used for inputting data +directories instead of one array that holds these directories. ## Running Example using containers: -``` +```bash $ docker run -d -p 5984:5984 --name couchdb couchdb -$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh -$ docker run -d -p 8081:8081 --name preferences --link couchdb -e NODE_ENV=gpii.config.preferencesServer.standalone.production -e PREFERENCESSERVER_LISTEN_PORT=8081 -e DATASOURCE_HOSTNAME=http://couchdb -e DATASOURCE_PORT=5984 vagrant-universal - +$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii \ + -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh +$ docker run -d -p 8081:8081 --name preferences --link couchdb \ + -e NODE_ENV=gpii.config.preferencesServer.standalone.production \ + -e PREFERENCESSERVER_LISTEN_PORT=8081 -e DATASOURCE_HOSTNAME=http://couchdb \ + -e DATASOURCE_PORT=5984 vagrant-universal ``` -Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory). The first version has the optional `CLEAR_INDEX` set to true to erase and reset the database prior to other database changes: +Below are two versions of loading couchdb data from a different location (e.g. +/home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for +build data directory). The first version has the optional `CLEAR_INDEX` set to true to erase and reset the database +prior to other database changes: -``` -$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh +```bash +$ docker run --name dataloader --link couchdb \ + -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data \ + -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data \ + -e COUCHDB_URL=http://couchdb:5984/gpii \ + -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` -The second version does not set `CLEAR_INDEX` such that any existing database is left intact prior to subsequent changes to it (e.g., deleting the snapsets): +The second version does not set `CLEAR_INDEX` such that any existing database is left intact prior to subsequent changes +to it (e.g., deleting the snapsets): -``` -$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii vagrant-universal scripts/deleteAndLoadSnapsets.sh +```bash +$ docker run --name dataloader --link couchdb \ + -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data \ + -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data \ + -e COUCHDB_URL=http://couchdb:5984/gpii \ + vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` From bbfabf23e437a45879ce87c78f8596f7bfb1d902 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Mon, 22 Oct 2018 13:40:45 +0100 Subject: [PATCH 11/11] GPII-3138: Revert unintended change in DataLoader readme --- scripts/vagrantCloudBasedContainers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/vagrantCloudBasedContainers.sh b/scripts/vagrantCloudBasedContainers.sh index 3e95d8bc9..600eb4b50 100755 --- a/scripts/vagrantCloudBasedContainers.sh +++ b/scripts/vagrantCloudBasedContainers.sh @@ -6,7 +6,7 @@ # It builds a Docker image for GPII/universal and uses it to start two # components: the Preferences Server and the Flow Manager. # -# It also starts a CouchDB container and data into +# It also starts a CouchDB container and loads the CouchDB data into # it, so tests running against the GPII components will have access to the # latest test data. #