From d5562d59ca72ad7560112501251cd468c13f4b99 Mon Sep 17 00:00:00 2001 From: Joseph Scheuhammer Date: Thu, 18 Oct 2018 16:34:08 -0400 Subject: [PATCH 01/22] GPII-3138: Moved parts of gpii-dataloder code into universal Instead of building and running a docker container, move the relevant code into universal --- scripts/deleteAndLoadSnapsets.sh | 71 ++++++++++++++++++++++++++ scripts/vagrantCloudBasedContainers.sh | 7 +-- 2 files changed, 75 insertions(+), 3 deletions(-) create mode 100755 scripts/deleteAndLoadSnapsets.sh diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh new file mode 100755 index 000000000..e35da74de --- /dev/null +++ b/scripts/deleteAndLoadSnapsets.sh @@ -0,0 +1,71 @@ +#!/bin/sh + +UNIVERSAL_DIR=${UNIVERSAL_DIR:-/home/node/universal} +STATIC_DATA_DIR=${STATIC_DATA_DIR:-/home/node/universal/testData/dbData} +BUILD_DATA_DIR=${BUILD_DATA_DIR:-/home/node/universal/build/dbData/snapset} + +log() { + echo "$(date +'%Y-%m-%d %H:%M:%S') - $1" +} + +warm_indices(){ + log "Warming indices..." + + for view in $(curl -s $COUCHDB_URL/_design/views/ | jq -r '.views | keys[]'); do + curl -fsS $COUCHDB_URL/_design/views/_view/$view >/dev/null + done + + log "Finished warming indices..." +} + +# Verify variables +if [ -z "$COUCHDB_URL" ]; then + echo "COUCHDB_URL environment variable must be defined" + exit 1 +fi + +if [ ! -d "$STATIC_DATA_DIR" -o ! "$(ls -A $STATIC_DATA_DIR/*.json)" ]; then + echo "STATIC_DATA_DIR ($STATIC_DATA_DIR) does not exist or does not contain data, using universal's 'testData/dbData' as the default" + STATIC_DATA_DIR=./testData/dbData +fi + +if [ ! -d "$BUILD_DATA_DIR" -o ! "$(ls -A $BUILD_DATA_DIR/*.json)" ]; then + echo "BUILD_DATA_DIR ($BUILD_DATA_DIR) does not exist or does not contain data, using universal's 'build/dbData/snapset' as the default" + BUILD_DATA_DIR=./build/dbData/snapset +fi + +COUCHDB_URL_SANITIZED=`echo "$COUCHDB_URL" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g'` + +cd "$UNIVERSAL_DIR" +log "Starting" +log "CouchDB: $COUCHDB_URL_SANITIZED" +log "Clear index: $CLEAR_INDEX" +log "Static: $STATIC_DATA_DIR" +log "Build: $BUILD_DATA_DIR" +log "Working directory: `pwd`" + +node scripts/convertPrefs.js testData/preferences/ build/dbData/snapset/ snapset + +# Initialize (possibly clear) data base +if [ ! -z "$CLEAR_INDEX" ]; then + log "Deleting database at $COUCHDB_URL_SANITIZED" + if ! curl -fsS -X DELETE "$COUCHDB_URL"; then + log "Error deleting database" + fi +fi + +log "Creating database at $COUCHDB_URL_SANITIZED" +if ! curl -fsS -X PUT "$COUCHDB_URL"; then + log "Database already exists at $COUCHDB_URL_SANITIZED" +fi + +# Submit data +node scripts/deleteAndLoadSnapsets.js $COUCHDB_URL $STATIC_DATA_DIR $BUILD_DATA_DIR +err=$? +if [ $err != 0 ]; then + log "deleteAndLoadSnapsets.js failed with $err, exiting" + exit $err +fi + +# Warm Data +warm_indices diff --git a/scripts/vagrantCloudBasedContainers.sh b/scripts/vagrantCloudBasedContainers.sh index 49bcc8d4f..8c2dc75b3 100755 --- a/scripts/vagrantCloudBasedContainers.sh +++ b/scripts/vagrantCloudBasedContainers.sh @@ -39,11 +39,11 @@ else fi UNIVERSAL_DIR="/home/vagrant/sync/universal" +SCRIPT_DIR="$UNIVERSAL_DIR/scripts" STATIC_DATA_DIR="$UNIVERSAL_DIR/testData/dbData" BUILD_DATA_DIR="$UNIVERSAL_DIR/build/dbData/snapset" -DATALOADER_IMAGE="herrclown/gpii-dataloader" -DATALOADER_COUCHDB_URL="http://couchdb:${COUCHDB_PORT}/gpii" +COUCHDB_URL="http://localhost:${COUCHDB_PORT}/gpii" DATASOURCE_HOSTNAME="http://couchdb" GPII_PREFERENCES_CONFIG="gpii.config.preferencesServer.standalone.production" @@ -82,7 +82,8 @@ docker run -d -p $COUCHDB_PORT:$COUCHDB_PORT --name couchdb $COUCHDB_IMAGE wget -O /dev/null --retry-connrefused --waitretry=$COUCHDB_HEALTHCHECK_DELAY --read-timeout=20 --timeout=1 --tries=$COUCHDB_HEALTHCHECK_TIMEOUT http://localhost:$COUCHDB_PORT # Load the CouchDB data -docker run --rm --link couchdb -v $STATIC_DATA_DIR:/static_data -e STATIC_DATA_DIR=/static_data -v $BUILD_DATA_DIR:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=$DATALOADER_COUCHDB_URL -e CLEAR_INDEX=$CLEAR_INDEX $DATALOADER_IMAGE +export UNIVERSAL_DIR COUCHDB_URL STATIC_DATA_DIR BUILD_DATA_DIR CLEAR_INDEX +$SCRIPT_DIR/deleteAndLoadSnapsets.sh # Wait for the CouchDB views become accessible. Accessing the view URL forced the view index to build which take time. # The URL returns 500 when the index is not ready, so use "--retry-on-http-error" option to continue retries at 500 response code. From 9e5346ce13f80bd3eb0ec5d5cfb6406eee0da362 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Thu, 18 Oct 2018 23:05:31 +0100 Subject: [PATCH 02/22] GPII-3138: Import deleteAndLoadSnapsets.sh and DataLoader.md --- documentation/DataLoader.md | 48 ++++++++++++++++++++++++++++++++ scripts/deleteAndLoadSnapsets.sh | 12 ++++++-- 2 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 documentation/DataLoader.md diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md new file mode 100644 index 000000000..828d19699 --- /dev/null +++ b/documentation/DataLoader.md @@ -0,0 +1,48 @@ +# CouchDB Data Loader + +Builds a [sidecar container](http://blog.kubernetes.io/2015/06/the-distributed-system-toolkit-patterns.html) that contains the `git` command and a shell script for setting up a CouchDB database. When the docker image is run, this sequence is executed: +1. Clones the latest version of [GPII universal](https://github.com/gpii/universal/), +1. Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, +1. Creates a CouchDB database if none exits, +1. Optionally clears an existing database of all its records, +1. Updates the database with respect to its `design/views` document, as required, +1. Deletes any snapsets currently in the database, +1. Loads the latest snapsets created at the second step into the database. + +## Building + +- `docker build -t gpii/gpii-dataloader .` + +## Environment Variables + +- `COUCHDB_URL`: URL of the CouchDB database. (required) +- `CLEAR_INDEX`: If defined, the database at $COUCHDB_URL will be deleted and recreated. (optional) +- `STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) +- `BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) + +The use of environment variables for data directories is useful if you want to mount the database data using a Docker volume and point the data loader at it. + +Note that since [the docker doesn't support the environment variable type of array](https://github.com/moby/moby/issues/20169), two separate environment variables are used for inputting data directories instead of one array that holds these directories. + +## Running + +Example using containers: + +``` +$ docker run -d -p 5984:5984 --name couchdb couchdb +$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=1 gpii/gpii-dataloader +$ docker run -d -p 8081:8081 --name preferences --link couchdb -e NODE_ENV=gpii.config.preferencesServer.standalone.production -e PREFERENCESSERVER_LISTEN_PORT=8081 -e DATASOURCE_HOSTNAME=http://couchdb -e DATASOURCE_PORT=5984 vagrant-universal + +``` + +Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory). The first version has the optional `CLEAR_INDEX` set to erase and reset the database prior to other database changes: + +``` +$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=1 gpii/gpii-dataloader +``` + +The second version has `CLEAR_INDEX` set to nothing such that any existing database is left intact prior to subsequent changes to it (e.g., deleting the snapsets): + +``` +$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX= gpii/gpii-dataloader +``` diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index e35da74de..5c7724662 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -1,6 +1,5 @@ #!/bin/sh -UNIVERSAL_DIR=${UNIVERSAL_DIR:-/home/node/universal} STATIC_DATA_DIR=${STATIC_DATA_DIR:-/home/node/universal/testData/dbData} BUILD_DATA_DIR=${BUILD_DATA_DIR:-/home/node/universal/build/dbData/snapset} @@ -36,7 +35,6 @@ fi COUCHDB_URL_SANITIZED=`echo "$COUCHDB_URL" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g'` -cd "$UNIVERSAL_DIR" log "Starting" log "CouchDB: $COUCHDB_URL_SANITIZED" log "Clear index: $CLEAR_INDEX" @@ -44,6 +42,16 @@ log "Static: $STATIC_DATA_DIR" log "Build: $BUILD_DATA_DIR" log "Working directory: `pwd`" +# Set up universal +git clone --depth 1 https://github.com/GPII/universal.git +cd universal + +npm install json5 +npm install fs +npm install rimraf +npm install mkdirp +npm install infusion +rm -f package-lock.json node scripts/convertPrefs.js testData/preferences/ build/dbData/snapset/ snapset # Initialize (possibly clear) data base From 5c28281ad9a92583706e096c9bd6cc6a45a67147 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 13:58:30 +0100 Subject: [PATCH 03/22] GPII-3138: Add dataLoader dependencies to docker image (jq and curl) --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 024de9f2f..559ae1367 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,7 @@ WORKDIR /app COPY . /app RUN apk add --no-cache --virtual build-dependencies python make git g++ && \ + apk add --no-cache curl jq && \ npm install && \ chown -R node:node . && \ npm cache clean --force && \ From 5f3921a193becad7c1bc20d8c93ceaa15ea8d264 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:05:34 +0100 Subject: [PATCH 04/22] GPII-3138: Modify convertPrefs.js to expect directories without the final slash --- scripts/convertPrefs.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/convertPrefs.js b/scripts/convertPrefs.js index d3429cabc..3c6a44075 100644 --- a/scripts/convertPrefs.js +++ b/scripts/convertPrefs.js @@ -45,7 +45,7 @@ rimraf(targetDir, function () { filenames.forEach(function (filename) { if (filename.endsWith(".json5")) { var gpiiKey = filename.substr(0, filename.length - 6); - var preferences = fs.readFileSync(inputDir + filename, "utf-8"); + var preferences = fs.readFileSync(inputDir + "/" + filename, "utf-8"); var currentTime = new Date().toISOString(); var prefsSafeId = "prefsSafe-" + gpiiKey; @@ -80,11 +80,11 @@ rimraf(targetDir, function () { }); // Write the target files - var prefsSafesFile = targetDir + "prefsSafes.json"; + var prefsSafesFile = targetDir + "/prefsSafes.json"; console.log("prefsSafesFile: " + prefsSafesFile); fs.writeFileSync(prefsSafesFile, JSON.stringify(prefsSafes, null, 4)); - var gpiiKeysFile = targetDir + "gpiiKeys.json"; + var gpiiKeysFile = targetDir + "/gpiiKeys.json"; fs.writeFileSync(gpiiKeysFile, JSON.stringify(gpiiKeys, null, 4)); console.log("Finished converting preferences data in the source directory " + inputDir + " to the target directory " + targetDir); From 044aa1067df5feda5e342bceafb23e77b57af3d3 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:15:14 +0100 Subject: [PATCH 05/22] GPII-3138: Update data loader script to reflect new location in universal docker image --- scripts/deleteAndLoadSnapsets.sh | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index 5c7724662..2e11c237d 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -1,7 +1,12 @@ #!/bin/sh +APP_DIR=${APP_DIR:-"/app"} -STATIC_DATA_DIR=${STATIC_DATA_DIR:-/home/node/universal/testData/dbData} -BUILD_DATA_DIR=${BUILD_DATA_DIR:-/home/node/universal/build/dbData/snapset} +STATIC_DATA_DIR=${STATIC_DATA_DIR:-"${APP_DIR}/testData/dbData"} +PREFERENCES_DATA_DIR=${PREFERENCES_DATA_DIR:-"${APP_DIR}/testData/preferences"} +BUILD_DATA_DIR=${BUILD_DATA_DIR:-'/tmp/build/dbData'} + +DATALOADER_JS="${APP_DIR}/scripts/deleteAndLoadSnapsets.js" +CONVERT_JS="${APP_DIR}/scripts/convertPrefs.js" log() { echo "$(date +'%Y-%m-%d %H:%M:%S') - $1" @@ -42,17 +47,17 @@ log "Static: $STATIC_DATA_DIR" log "Build: $BUILD_DATA_DIR" log "Working directory: `pwd`" -# Set up universal -git clone --depth 1 https://github.com/GPII/universal.git -cd universal -npm install json5 -npm install fs -npm install rimraf -npm install mkdirp -npm install infusion -rm -f package-lock.json -node scripts/convertPrefs.js testData/preferences/ build/dbData/snapset/ snapset +# Convert preferences json5 to GPII keys and preferences safes +if [ -d "${PREFERENCES_DATA_DIR}" ]; then + node "${CONVERT_JS}" "${PREFERENCES_DATA_DIR}" "${BUILD_DATA_DIR}" snapset + if [ "$?" != '0' ]; then + log "[ERROR] ${CONVERT_JS} failed (exit code: $?)" + exit 1 + fi +else + log "PREFERENCES_DATA_DIR ($PREFERENCES_DATA_DIR) does not exist, nothing to convert" +fi # Initialize (possibly clear) data base if [ ! -z "$CLEAR_INDEX" ]; then @@ -68,7 +73,7 @@ if ! curl -fsS -X PUT "$COUCHDB_URL"; then fi # Submit data -node scripts/deleteAndLoadSnapsets.js $COUCHDB_URL $STATIC_DATA_DIR $BUILD_DATA_DIR +node "${DATALOADER_JS}" "${COUCHDB_URL}" "${STATIC_DATA_DIR}" "${BUILD_DATA_DIR}" err=$? if [ $err != 0 ]; then log "deleteAndLoadSnapsets.js failed with $err, exiting" From d59bf09f5997c6e82b7f281795fdd838d308b059 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:23:58 +0100 Subject: [PATCH 06/22] GPII-3138: Update directory checks to match containerised environment --- scripts/deleteAndLoadSnapsets.sh | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index 2e11c237d..c1ea15e8f 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -28,15 +28,6 @@ if [ -z "$COUCHDB_URL" ]; then exit 1 fi -if [ ! -d "$STATIC_DATA_DIR" -o ! "$(ls -A $STATIC_DATA_DIR/*.json)" ]; then - echo "STATIC_DATA_DIR ($STATIC_DATA_DIR) does not exist or does not contain data, using universal's 'testData/dbData' as the default" - STATIC_DATA_DIR=./testData/dbData -fi - -if [ ! -d "$BUILD_DATA_DIR" -o ! "$(ls -A $BUILD_DATA_DIR/*.json)" ]; then - echo "BUILD_DATA_DIR ($BUILD_DATA_DIR) does not exist or does not contain data, using universal's 'build/dbData/snapset' as the default" - BUILD_DATA_DIR=./build/dbData/snapset -fi COUCHDB_URL_SANITIZED=`echo "$COUCHDB_URL" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g'` @@ -47,6 +38,10 @@ log "Static: $STATIC_DATA_DIR" log "Build: $BUILD_DATA_DIR" log "Working directory: `pwd`" +# Create build dir if it does not exist +if [ ! -d "${BUILD_DATA_DIR}" ]; then + mkdir -p "${BUILD_DATA_DIR}" +fi # Convert preferences json5 to GPII keys and preferences safes if [ -d "${PREFERENCES_DATA_DIR}" ]; then From 54d75f7f1c531f0912e02afd7cbbeedd2919a0c9 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:26:44 +0100 Subject: [PATCH 07/22] GPII-3138: Minor formatting changes --- scripts/deleteAndLoadSnapsets.sh | 38 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index c1ea15e8f..fe4192aca 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -15,28 +15,28 @@ log() { warm_indices(){ log "Warming indices..." - for view in $(curl -s $COUCHDB_URL/_design/views/ | jq -r '.views | keys[]'); do - curl -fsS $COUCHDB_URL/_design/views/_view/$view >/dev/null + for view in $(curl -s "${COUCHDB_URL}/_design/views/" | jq -r '.views | keys[]'); do + curl -fsS "${COUCHDB_URL}/_design/views/_view/${view}" >/dev/null done log "Finished warming indices..." } # Verify variables -if [ -z "$COUCHDB_URL" ]; then +if [ -z "${COUCHDB_URL}" ]; then echo "COUCHDB_URL environment variable must be defined" exit 1 fi +COUCHDB_URL_SANITIZED=$(echo "${COUCHDB_URL}" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g') -COUCHDB_URL_SANITIZED=`echo "$COUCHDB_URL" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g'` +log 'Starting' +log "CouchDB: ${COUCHDB_URL_SANITIZED}" +log "Clear index: ${CLEAR_INDEX}" +log "Static: ${STATIC_DATA_DIR}" +log "Build: ${BUILD_DATA_DIR}" +log "Working directory: $(pwd)" -log "Starting" -log "CouchDB: $COUCHDB_URL_SANITIZED" -log "Clear index: $CLEAR_INDEX" -log "Static: $STATIC_DATA_DIR" -log "Build: $BUILD_DATA_DIR" -log "Working directory: `pwd`" # Create build dir if it does not exist if [ ! -d "${BUILD_DATA_DIR}" ]; then @@ -55,24 +55,24 @@ else fi # Initialize (possibly clear) data base -if [ ! -z "$CLEAR_INDEX" ]; then - log "Deleting database at $COUCHDB_URL_SANITIZED" - if ! curl -fsS -X DELETE "$COUCHDB_URL"; then +if [ "${CLEAR_INDEX}" == 'true' ]; then + log "Deleting database at ${COUCHDB_URL_SANITIZED}" + if ! curl -fsS -X DELETE "${COUCHDB_URL}"; then log "Error deleting database" fi fi -log "Creating database at $COUCHDB_URL_SANITIZED" -if ! curl -fsS -X PUT "$COUCHDB_URL"; then - log "Database already exists at $COUCHDB_URL_SANITIZED" +log "Creating database at ${COUCHDB_URL_SANITIZED}" +if ! curl -fsS -X PUT "${COUCHDB_URL}"; then + log "Database already exists at ${COUCHDB_URL_SANITIZED}" fi # Submit data node "${DATALOADER_JS}" "${COUCHDB_URL}" "${STATIC_DATA_DIR}" "${BUILD_DATA_DIR}" err=$? -if [ $err != 0 ]; then - log "deleteAndLoadSnapsets.js failed with $err, exiting" - exit $err +if [ "${err}" != '0' ]; then + log "${DATALOADER_JS} failed with ${err}, exiting" + exit "${err}" fi # Warm Data From 68a961b5283b02920c924f99b5b24c097197869f Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:27:16 +0100 Subject: [PATCH 08/22] GPII-3138: Add DB connectivity check to dataloader --- scripts/deleteAndLoadSnapsets.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index fe4192aca..f27730d07 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -37,6 +37,13 @@ log "Static: ${STATIC_DATA_DIR}" log "Build: ${BUILD_DATA_DIR}" log "Working directory: $(pwd)" +# Check we can connect to CouchDB +COUCHDB_URL_ROOT=$(echo "${COUCHDB_URL}" | sed 's/[^\/]*$//g') +RET_CODE=$(curl --write-out '%{http_code}' --silent --output /dev/null "${COUCHDB_URL_ROOT}/_up") +if [ "$RET_CODE" != '200' ]; then + log "[ERROR] Failed to connect to CouchDB: ${COUCHDB_URL_SANITIZED}" + exit 1 +fi # Create build dir if it does not exist if [ ! -d "${BUILD_DATA_DIR}" ]; then From fb3d435ab7e97940b50b470360d711e2b5d80586 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:28:16 +0100 Subject: [PATCH 09/22] GPII-3138: Update vagrantCloudBasedContainers.sh to wor with omported data loader --- scripts/vagrantCloudBasedContainers.sh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scripts/vagrantCloudBasedContainers.sh b/scripts/vagrantCloudBasedContainers.sh index 8c2dc75b3..d03924d57 100755 --- a/scripts/vagrantCloudBasedContainers.sh +++ b/scripts/vagrantCloudBasedContainers.sh @@ -6,7 +6,7 @@ # It builds a Docker image for GPII/universal and uses it to start two # components: the Preferences Server and the Flow Manager. # -# It also starts a CouchDB container and loads the CouchDB data into +# It also starts a CouchDB container and data into # it, so tests running against the GPII components will have access to the # latest test data. # @@ -35,7 +35,7 @@ COUCHDB_HEALTHCHECK_TIMEOUT=30 if [ "$NO_REBUILD" == "true" ] ; then CLEAR_INDEX= else - CLEAR_INDEX=1 + CLEAR_INDEX='true' fi UNIVERSAL_DIR="/home/vagrant/sync/universal" @@ -45,6 +45,7 @@ BUILD_DATA_DIR="$UNIVERSAL_DIR/build/dbData/snapset" COUCHDB_URL="http://localhost:${COUCHDB_PORT}/gpii" DATASOURCE_HOSTNAME="http://couchdb" +DATALOADER_CMD='/app/scripts/deleteAndLoadSnapsets.sh' GPII_PREFERENCES_CONFIG="gpii.config.preferencesServer.standalone.production" GPII_PREFERENCES_PORT=9081 @@ -81,9 +82,7 @@ docker run -d -p $COUCHDB_PORT:$COUCHDB_PORT --name couchdb $COUCHDB_IMAGE # Wait for CouchDB wget -O /dev/null --retry-connrefused --waitretry=$COUCHDB_HEALTHCHECK_DELAY --read-timeout=20 --timeout=1 --tries=$COUCHDB_HEALTHCHECK_TIMEOUT http://localhost:$COUCHDB_PORT -# Load the CouchDB data -export UNIVERSAL_DIR COUCHDB_URL STATIC_DATA_DIR BUILD_DATA_DIR CLEAR_INDEX -$SCRIPT_DIR/deleteAndLoadSnapsets.sh +docker run --rm --link couchdb -v $STATIC_DATA_DIR:/static_data -e STATIC_DATA_DIR=/static_data -v $BUILD_DATA_DIR:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=$DATALOADER_COUCHDB_URL -e CLEAR_INDEX=$CLEAR_INDEX $UNIVERSAL_IMAGE $DATALOADER_CMD # Wait for the CouchDB views become accessible. Accessing the view URL forced the view index to build which take time. # The URL returns 500 when the index is not ready, so use "--retry-on-http-error" option to continue retries at 500 response code. From 26995c2abeed8fccf0485de4a025ed323538dad7 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 17:33:19 +0100 Subject: [PATCH 10/22] GPII-3138: Update DataLoader docs to reflect current state --- documentation/DataLoader.md | 29 ++++++++++++++--------------- documentation/README.md | 1 + 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md index 828d19699..da87f5c2e 100644 --- a/documentation/DataLoader.md +++ b/documentation/DataLoader.md @@ -1,22 +1,21 @@ # CouchDB Data Loader +(`scripts/deleteAndLoadSnapsets.sh`) -Builds a [sidecar container](http://blog.kubernetes.io/2015/06/the-distributed-system-toolkit-patterns.html) that contains the `git` command and a shell script for setting up a CouchDB database. When the docker image is run, this sequence is executed: -1. Clones the latest version of [GPII universal](https://github.com/gpii/universal/), +This script is used to setup CouchDB database and is executed as a Kubernetes +batch Job every time new version of the universal image is deployed to the +cluster (also when cluster is initially created). + +It does following: 1. Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, +1. Optionally deletes existing database, 1. Creates a CouchDB database if none exits, -1. Optionally clears an existing database of all its records, 1. Updates the database with respect to its `design/views` document, as required, -1. Deletes any snapsets currently in the database, -1. Loads the latest snapsets created at the second step into the database. - -## Building - -- `docker build -t gpii/gpii-dataloader .` +1. Loads the latest snapsets created into the database. ## Environment Variables - `COUCHDB_URL`: URL of the CouchDB database. (required) -- `CLEAR_INDEX`: If defined, the database at $COUCHDB_URL will be deleted and recreated. (optional) +- `CLEAR_INDEX`: If set to `true`, the database at $COUCHDB_URL will be deleted and recreated. (optional) - `STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) - `BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) @@ -30,19 +29,19 @@ Example using containers: ``` $ docker run -d -p 5984:5984 --name couchdb couchdb -$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=1 gpii/gpii-dataloader +$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh $ docker run -d -p 8081:8081 --name preferences --link couchdb -e NODE_ENV=gpii.config.preferencesServer.standalone.production -e PREFERENCESSERVER_LISTEN_PORT=8081 -e DATASOURCE_HOSTNAME=http://couchdb -e DATASOURCE_PORT=5984 vagrant-universal ``` -Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory). The first version has the optional `CLEAR_INDEX` set to erase and reset the database prior to other database changes: +Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory). The first version has the optional `CLEAR_INDEX` set to true to erase and reset the database prior to other database changes: ``` -$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=1 gpii/gpii-dataloader +$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` -The second version has `CLEAR_INDEX` set to nothing such that any existing database is left intact prior to subsequent changes to it (e.g., deleting the snapsets): +The second version does not set `CLEAR_INDEX` such that any existing database is left intact prior to subsequent changes to it (e.g., deleting the snapsets): ``` -$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX= gpii/gpii-dataloader +$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` diff --git a/documentation/README.md b/documentation/README.md index 2de699630..c3952effe 100644 --- a/documentation/README.md +++ b/documentation/README.md @@ -9,6 +9,7 @@ * [Preferences Server](PreferencesServer.md) * [Data Model for Preferences and OAuth Data](DataModel.md) * [Pouch Manager](PouchManager.md) + * [Data Loader](DataLoader.md) * [MatchMakerFramework](MatchMakerFramework.md) * [Flat Match Maker](FlatMatchMaker.md) * [Apptology](Apptology.md) From d44b584dfa7ef7422a08272f955514e943f991ca Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Fri, 19 Oct 2018 19:42:55 +0100 Subject: [PATCH 11/22] GPII-3138: Fix linting errors on DataLoader.md --- documentation/DataLoader.md | 59 ++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md index da87f5c2e..acee04e7a 100644 --- a/documentation/DataLoader.md +++ b/documentation/DataLoader.md @@ -1,16 +1,17 @@ # CouchDB Data Loader + (`scripts/deleteAndLoadSnapsets.sh`) -This script is used to setup CouchDB database and is executed as a Kubernetes -batch Job every time new version of the universal image is deployed to the -cluster (also when cluster is initially created). +This script is used to setup CouchDB database and is executed as a Kubernetes batch Job every time new version of the +universal image is deployed to the cluster (also when cluster is initially created). It does following: -1. Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, -1. Optionally deletes existing database, -1. Creates a CouchDB database if none exits, -1. Updates the database with respect to its `design/views` document, as required, -1. Loads the latest snapsets created into the database. + +- Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, +- Optionally deletes existing database, +- Creates a CouchDB database if none exits, +- Updates the database with respect to its `design/views` document, as required, +- Loads the latest snapsets created into the database. ## Environment Variables @@ -19,29 +20,47 @@ It does following: - `STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) - `BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) -The use of environment variables for data directories is useful if you want to mount the database data using a Docker volume and point the data loader at it. +The use of environment variables for data directories is useful if you want to mount the database data using a Docker +volume and point the data loader at it. -Note that since [the docker doesn't support the environment variable type of array](https://github.com/moby/moby/issues/20169), two separate environment variables are used for inputting data directories instead of one array that holds these directories. +Note that since [the docker doesn't support the environment variable type of +array](https://github.com/moby/moby/issues/20169), two separate environment variables are used for inputting data +directories instead of one array that holds these directories. ## Running Example using containers: -``` +```bash $ docker run -d -p 5984:5984 --name couchdb couchdb -$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh -$ docker run -d -p 8081:8081 --name preferences --link couchdb -e NODE_ENV=gpii.config.preferencesServer.standalone.production -e PREFERENCESSERVER_LISTEN_PORT=8081 -e DATASOURCE_HOSTNAME=http://couchdb -e DATASOURCE_PORT=5984 vagrant-universal - +$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii \ + -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh +$ docker run -d -p 8081:8081 --name preferences --link couchdb \ + -e NODE_ENV=gpii.config.preferencesServer.standalone.production \ + -e PREFERENCESSERVER_LISTEN_PORT=8081 -e DATASOURCE_HOSTNAME=http://couchdb \ + -e DATASOURCE_PORT=5984 vagrant-universal ``` -Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for build data directory). The first version has the optional `CLEAR_INDEX` set to true to erase and reset the database prior to other database changes: +Below are two versions of loading couchdb data from a different location (e.g. +/home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for +build data directory). The first version has the optional `CLEAR_INDEX` set to true to erase and reset the database +prior to other database changes: -``` -$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh +```bash +$ docker run --name dataloader --link couchdb \ + -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data \ + -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data \ + -e COUCHDB_URL=http://couchdb:5984/gpii \ + -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` -The second version does not set `CLEAR_INDEX` such that any existing database is left intact prior to subsequent changes to it (e.g., deleting the snapsets): +The second version does not set `CLEAR_INDEX` such that any existing database is left intact prior to subsequent changes +to it (e.g., deleting the snapsets): -``` -$ docker run --name dataloader --link couchdb -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=http://couchdb:5984/gpii vagrant-universal scripts/deleteAndLoadSnapsets.sh +```bash +$ docker run --name dataloader --link couchdb \ + -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data \ + -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data \ + -e COUCHDB_URL=http://couchdb:5984/gpii \ + vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` From 07ace4c913427c88190ba5c5b0f9d1503505cc33 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Mon, 22 Oct 2018 13:40:45 +0100 Subject: [PATCH 12/22] GPII-3138: Revert unintended change in DataLoader readme --- scripts/vagrantCloudBasedContainers.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/vagrantCloudBasedContainers.sh b/scripts/vagrantCloudBasedContainers.sh index d03924d57..85c0f2b52 100755 --- a/scripts/vagrantCloudBasedContainers.sh +++ b/scripts/vagrantCloudBasedContainers.sh @@ -6,7 +6,7 @@ # It builds a Docker image for GPII/universal and uses it to start two # components: the Preferences Server and the Flow Manager. # -# It also starts a CouchDB container and data into +# It also starts a CouchDB container and loads the CouchDB data into # it, so tests running against the GPII components will have access to the # latest test data. # From eb92c4e13994bdfb9b38cee8243ea49ea12b4cff Mon Sep 17 00:00:00 2001 From: Joseph Scheuhammer Date: Mon, 22 Oct 2018 12:53:36 -0400 Subject: [PATCH 13/22] GPII-3138: Improved check for proper usage of convertPrefs.js Added check for the required number of command line arguments. --- scripts/convertPrefs.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/convertPrefs.js b/scripts/convertPrefs.js index d3429cabc..d0f05d9ba 100644 --- a/scripts/convertPrefs.js +++ b/scripts/convertPrefs.js @@ -27,7 +27,7 @@ var inputDir = process.argv[2]; var targetDir = process.argv[3]; var prefsSafeType = process.argv[4] || "user"; -if (prefsSafeType !== "snapset" && prefsSafeType !== "user") { +if (process.argv.length < 4 || (prefsSafeType !== "snapset" && prefsSafeType !== "user")) { console.log("Usage: node scripts/convertPrefs.js InputFolder OutputFolder PrefsSafeType"); console.log(" where PrefsSafeType, is one of 'snapset' or 'user' (defaults to 'user')"); process.exit(1); From f28c79a067d06d3ff9455da5f4b7b6af7e623eb5 Mon Sep 17 00:00:00 2001 From: Joseph Scheuhammer Date: Mon, 22 Oct 2018 16:39:13 -0400 Subject: [PATCH 14/22] GPII-3138: Fixed merge issues. Fixed minor problems from previous merge of Stepan's pull request. --- scripts/vagrantCloudBasedContainers.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/vagrantCloudBasedContainers.sh b/scripts/vagrantCloudBasedContainers.sh index 85c0f2b52..18a5683de 100755 --- a/scripts/vagrantCloudBasedContainers.sh +++ b/scripts/vagrantCloudBasedContainers.sh @@ -39,13 +39,12 @@ else fi UNIVERSAL_DIR="/home/vagrant/sync/universal" -SCRIPT_DIR="$UNIVERSAL_DIR/scripts" STATIC_DATA_DIR="$UNIVERSAL_DIR/testData/dbData" BUILD_DATA_DIR="$UNIVERSAL_DIR/build/dbData/snapset" -COUCHDB_URL="http://localhost:${COUCHDB_PORT}/gpii" +DATALOADER_COUCHDB_URL="http://couchdb:${COUCHDB_PORT}/gpii" DATASOURCE_HOSTNAME="http://couchdb" -DATALOADER_CMD='/app/scripts/deleteAndLoadSnapsets.sh' +DATALOADER_CMD="/app/scripts/deleteAndLoadSnapsets.sh" GPII_PREFERENCES_CONFIG="gpii.config.preferencesServer.standalone.production" GPII_PREFERENCES_PORT=9081 @@ -82,6 +81,7 @@ docker run -d -p $COUCHDB_PORT:$COUCHDB_PORT --name couchdb $COUCHDB_IMAGE # Wait for CouchDB wget -O /dev/null --retry-connrefused --waitretry=$COUCHDB_HEALTHCHECK_DELAY --read-timeout=20 --timeout=1 --tries=$COUCHDB_HEALTHCHECK_TIMEOUT http://localhost:$COUCHDB_PORT +# Load the CouchDB data docker run --rm --link couchdb -v $STATIC_DATA_DIR:/static_data -e STATIC_DATA_DIR=/static_data -v $BUILD_DATA_DIR:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=$DATALOADER_COUCHDB_URL -e CLEAR_INDEX=$CLEAR_INDEX $UNIVERSAL_IMAGE $DATALOADER_CMD # Wait for the CouchDB views become accessible. Accessing the view URL forced the view index to build which take time. From 9c42fbe0b7522adafe2655fd83c624f145b9d7fb Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Tue, 23 Oct 2018 15:13:12 +0100 Subject: [PATCH 15/22] GPII-3138: Fix typo in DataLoader.md --- documentation/DataLoader.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md index acee04e7a..63cd906e5 100644 --- a/documentation/DataLoader.md +++ b/documentation/DataLoader.md @@ -9,7 +9,7 @@ It does following: - Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, - Optionally deletes existing database, -- Creates a CouchDB database if none exits, +- Creates a CouchDB database if none exists, - Updates the database with respect to its `design/views` document, as required, - Loads the latest snapsets created into the database. From ee8417b05e74402b0bc2b07dff6e4a5f985a441f Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Tue, 23 Oct 2018 15:14:14 +0100 Subject: [PATCH 16/22] GPII-3138: Add GPII prefix to dataloader env variables --- documentation/DataLoader.md | 34 +++++++++++++------------- scripts/vagrantCloudBasedContainers.sh | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md index 63cd906e5..ea782665b 100644 --- a/documentation/DataLoader.md +++ b/documentation/DataLoader.md @@ -15,10 +15,10 @@ It does following: ## Environment Variables -- `COUCHDB_URL`: URL of the CouchDB database. (required) -- `CLEAR_INDEX`: If set to `true`, the database at $COUCHDB_URL will be deleted and recreated. (optional) -- `STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) -- `BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) +- `GPII_COUCHDB_URL`: URL of the CouchDB database. (required) +- `GPII_CLEAR_INDEX`: If set to `true`, the database at $GPII_COUCHDB_URL will be deleted and recreated. (optional) +- `GPII_STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) +- `GPII_BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) The use of environment variables for data directories is useful if you want to mount the database data using a Docker volume and point the data loader at it. @@ -33,8 +33,8 @@ Example using containers: ```bash $ docker run -d -p 5984:5984 --name couchdb couchdb -$ docker run --rm --link couchdb -e COUCHDB_URL=http://couchdb:5984/gpii \ - -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh +$ docker run --rm --link couchdb -e GPII_COUCHDB_URL=http://couchdb:5984/gpii \ + -e GPII_CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh $ docker run -d -p 8081:8081 --name preferences --link couchdb \ -e NODE_ENV=gpii.config.preferencesServer.standalone.production \ -e PREFERENCESSERVER_LISTEN_PORT=8081 -e DATASOURCE_HOSTNAME=http://couchdb \ @@ -43,24 +43,24 @@ $ docker run -d -p 8081:8081 --name preferences --link couchdb \ Below are two versions of loading couchdb data from a different location (e.g. /home/vagrant/sync/universal/testData/dbData for static data directory and /home/vagrant/sync/universal/build/dbData for -build data directory). The first version has the optional `CLEAR_INDEX` set to true to erase and reset the database -prior to other database changes: +build data directory). The first version has the optional `GPII_CLEAR_INDEX` set to true to erase and reset the +database prior to other database changes: ```bash $ docker run --name dataloader --link couchdb \ - -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data \ - -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data \ - -e COUCHDB_URL=http://couchdb:5984/gpii \ - -e CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh + -v /home/vagrant/sync/universal/testData/dbData:/static_data -e GPII_STATIC_DATA_DIR=/static_data \ + -v /home/vagrant/sync/universal/build/dbData:/build_data -e GPII_BUILD_DATA_DIR=/build_data \ + -e GPII_COUCHDB_URL=http://couchdb:5984/gpii \ + -e GPII_CLEAR_INDEX=true vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` -The second version does not set `CLEAR_INDEX` such that any existing database is left intact prior to subsequent changes -to it (e.g., deleting the snapsets): +The second version does not set `GPII_CLEAR_INDEX` such that any existing database is left intact prior to subsequent +changes to it (e.g., deleting the snapsets): ```bash $ docker run --name dataloader --link couchdb \ - -v /home/vagrant/sync/universal/testData/dbData:/static_data -e STATIC_DATA_DIR=/static_data \ - -v /home/vagrant/sync/universal/build/dbData:/build_data -e BUILD_DATA_DIR=/build_data \ - -e COUCHDB_URL=http://couchdb:5984/gpii \ + -v /home/vagrant/sync/universal/testData/dbData:/static_data -e GPII_STATIC_DATA_DIR=/static_data \ + -v /home/vagrant/sync/universal/build/dbData:/build_data -e GPII_BUILD_DATA_DIR=/build_data \ + -e GPII_COUCHDB_URL=http://couchdb:5984/gpii \ vagrant-universal scripts/deleteAndLoadSnapsets.sh ``` diff --git a/scripts/vagrantCloudBasedContainers.sh b/scripts/vagrantCloudBasedContainers.sh index 18a5683de..2a9c1eb37 100755 --- a/scripts/vagrantCloudBasedContainers.sh +++ b/scripts/vagrantCloudBasedContainers.sh @@ -82,7 +82,7 @@ docker run -d -p $COUCHDB_PORT:$COUCHDB_PORT --name couchdb $COUCHDB_IMAGE wget -O /dev/null --retry-connrefused --waitretry=$COUCHDB_HEALTHCHECK_DELAY --read-timeout=20 --timeout=1 --tries=$COUCHDB_HEALTHCHECK_TIMEOUT http://localhost:$COUCHDB_PORT # Load the CouchDB data -docker run --rm --link couchdb -v $STATIC_DATA_DIR:/static_data -e STATIC_DATA_DIR=/static_data -v $BUILD_DATA_DIR:/build_data -e BUILD_DATA_DIR=/build_data -e COUCHDB_URL=$DATALOADER_COUCHDB_URL -e CLEAR_INDEX=$CLEAR_INDEX $UNIVERSAL_IMAGE $DATALOADER_CMD +docker run --rm --link couchdb -v $STATIC_DATA_DIR:/static_data -e GPII_STATIC_DATA_DIR=/static_data -v $BUILD_DATA_DIR:/build_data -e GPII_BUILD_DATA_DIR=/build_data -e GPII_COUCHDB_URL=$DATALOADER_COUCHDB_URL -e GPII_CLEAR_INDEX=$CLEAR_INDEX $UNIVERSAL_IMAGE $DATALOADER_CMD # Wait for the CouchDB views become accessible. Accessing the view URL forced the view index to build which take time. # The URL returns 500 when the index is not ready, so use "--retry-on-http-error" option to continue retries at 500 response code. From 28fc6354494fdcc4869e5f5000c330e306476e47 Mon Sep 17 00:00:00 2001 From: Joseph Scheuhammer Date: Tue, 23 Oct 2018 10:48:14 -0400 Subject: [PATCH 17/22] GPII-3138: Added to description of dataloader script. --- documentation/DataLoader.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md index ea782665b..342aca50d 100644 --- a/documentation/DataLoader.md +++ b/documentation/DataLoader.md @@ -11,7 +11,8 @@ It does following: - Optionally deletes existing database, - Creates a CouchDB database if none exists, - Updates the database with respect to its `design/views` document, as required, -- Loads the latest snapsets created into the database. +- Deletes the snapsets and keys, if any, from the database, +- Loads the latest snapsets and keys created created above into the database. ## Environment Variables From 6794a0ff95ac8fcf91216f92b5969bd32c1e71fe Mon Sep 17 00:00:00 2001 From: Joseph Scheuhammer Date: Tue, 23 Oct 2018 11:23:23 -0400 Subject: [PATCH 18/22] GPII-3138: Added "GPII_" prefix to environment variables --- scripts/deleteAndLoadSnapsets.sh | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index f27730d07..dc3a97734 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -15,33 +15,33 @@ log() { warm_indices(){ log "Warming indices..." - for view in $(curl -s "${COUCHDB_URL}/_design/views/" | jq -r '.views | keys[]'); do - curl -fsS "${COUCHDB_URL}/_design/views/_view/${view}" >/dev/null + for view in $(curl -s "${GPII_COUCHDB_URL}/_design/views/" | jq -r '.views | keys[]'); do + curl -fsS "${GPII_COUCHDB_URL}/_design/views/_view/${view}" >/dev/null done log "Finished warming indices..." } # Verify variables -if [ -z "${COUCHDB_URL}" ]; then - echo "COUCHDB_URL environment variable must be defined" +if [ -z "${GPII_COUCHDB_URL}" ]; then + echo "GPII_COUCHDB_URL environment variable must be defined" exit 1 fi -COUCHDB_URL_SANITIZED=$(echo "${COUCHDB_URL}" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g') +GPII_COUCHDB_URL_SANITIZED=$(echo "${GPII_COUCHDB_URL}" | sed -e 's,\(://\)[^/]*\(@\),\1\2,g') log 'Starting' -log "CouchDB: ${COUCHDB_URL_SANITIZED}" +log "CouchDB: ${GPII_COUCHDB_URL_SANITIZED}" log "Clear index: ${CLEAR_INDEX}" log "Static: ${STATIC_DATA_DIR}" log "Build: ${BUILD_DATA_DIR}" log "Working directory: $(pwd)" # Check we can connect to CouchDB -COUCHDB_URL_ROOT=$(echo "${COUCHDB_URL}" | sed 's/[^\/]*$//g') -RET_CODE=$(curl --write-out '%{http_code}' --silent --output /dev/null "${COUCHDB_URL_ROOT}/_up") +GPII_COUCHDB_URL_ROOT=$(echo "${GPII_COUCHDB_URL}" | sed 's/[^\/]*$//g') +RET_CODE=$(curl --write-out '%{http_code}' --silent --output /dev/null "${GPII_COUCHDB_URL_ROOT}/_up") if [ "$RET_CODE" != '200' ]; then - log "[ERROR] Failed to connect to CouchDB: ${COUCHDB_URL_SANITIZED}" + log "[ERROR] Failed to connect to CouchDB: ${GPII_COUCHDB_URL_SANITIZED}" exit 1 fi @@ -63,19 +63,19 @@ fi # Initialize (possibly clear) data base if [ "${CLEAR_INDEX}" == 'true' ]; then - log "Deleting database at ${COUCHDB_URL_SANITIZED}" - if ! curl -fsS -X DELETE "${COUCHDB_URL}"; then + log "Deleting database at ${GPII_COUCHDB_URL_SANITIZED}" + if ! curl -fsS -X DELETE "${GPII_COUCHDB_URL}"; then log "Error deleting database" fi fi -log "Creating database at ${COUCHDB_URL_SANITIZED}" -if ! curl -fsS -X PUT "${COUCHDB_URL}"; then - log "Database already exists at ${COUCHDB_URL_SANITIZED}" +log "Creating database at ${GPII_COUCHDB_URL_SANITIZED}" +if ! curl -fsS -X PUT "${GPII_COUCHDB_URL}"; then + log "Database already exists at ${GPII_COUCHDB_URL_SANITIZED}" fi # Submit data -node "${DATALOADER_JS}" "${COUCHDB_URL}" "${STATIC_DATA_DIR}" "${BUILD_DATA_DIR}" +node "${DATALOADER_JS}" "${GPII_COUCHDB_URL}" "${STATIC_DATA_DIR}" "${BUILD_DATA_DIR}" err=$? if [ "${err}" != '0' ]; then log "${DATALOADER_JS} failed with ${err}, exiting" From af1660b7b23669d971b2428924a01b4f3886cd93 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Tue, 23 Oct 2018 16:38:34 +0100 Subject: [PATCH 19/22] GPII-3138: Add GPII prefix to dataloader env variables --- scripts/deleteAndLoadSnapsets.sh | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index dc3a97734..c886d3290 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -1,9 +1,9 @@ #!/bin/sh APP_DIR=${APP_DIR:-"/app"} -STATIC_DATA_DIR=${STATIC_DATA_DIR:-"${APP_DIR}/testData/dbData"} -PREFERENCES_DATA_DIR=${PREFERENCES_DATA_DIR:-"${APP_DIR}/testData/preferences"} -BUILD_DATA_DIR=${BUILD_DATA_DIR:-'/tmp/build/dbData'} +GPII_STATIC_DATA_DIR=${GPII_STATIC_DATA_DIR:-"${APP_DIR}/testData/dbData"} +GPII_PREFERENCES_DATA_DIR=${GPII_PREFERENCES_DATA_DIR:-"${APP_DIR}/testData/preferences"} +GPII_BUILD_DATA_DIR=${GPII_BUILD_DATA_DIR:-'/tmp/build/dbData'} DATALOADER_JS="${APP_DIR}/scripts/deleteAndLoadSnapsets.js" CONVERT_JS="${APP_DIR}/scripts/convertPrefs.js" @@ -32,9 +32,9 @@ GPII_COUCHDB_URL_SANITIZED=$(echo "${GPII_COUCHDB_URL}" | sed -e 's,\(://\)[^/]* log 'Starting' log "CouchDB: ${GPII_COUCHDB_URL_SANITIZED}" -log "Clear index: ${CLEAR_INDEX}" -log "Static: ${STATIC_DATA_DIR}" -log "Build: ${BUILD_DATA_DIR}" +log "Clear index: ${GPII_CLEAR_INDEX}" +log "Static: ${GPII_STATIC_DATA_DIR}" +log "Build: ${GPII_BUILD_DATA_DIR}" log "Working directory: $(pwd)" # Check we can connect to CouchDB @@ -46,23 +46,23 @@ if [ "$RET_CODE" != '200' ]; then fi # Create build dir if it does not exist -if [ ! -d "${BUILD_DATA_DIR}" ]; then - mkdir -p "${BUILD_DATA_DIR}" +if [ ! -d "${GPII_BUILD_DATA_DIR}" ]; then + mkdir -p "${GPII_BUILD_DATA_DIR}" fi # Convert preferences json5 to GPII keys and preferences safes -if [ -d "${PREFERENCES_DATA_DIR}" ]; then - node "${CONVERT_JS}" "${PREFERENCES_DATA_DIR}" "${BUILD_DATA_DIR}" snapset +if [ -d "${GPII_PREFERENCES_DATA_DIR}" ]; then + node "${CONVERT_JS}" "${GPII_PREFERENCES_DATA_DIR}" "${GPII_BUILD_DATA_DIR}" snapset if [ "$?" != '0' ]; then log "[ERROR] ${CONVERT_JS} failed (exit code: $?)" exit 1 fi else - log "PREFERENCES_DATA_DIR ($PREFERENCES_DATA_DIR) does not exist, nothing to convert" + log "GPII_PREFERENCES_DATA_DIR ($GPII_PREFERENCES_DATA_DIR) does not exist, nothing to convert" fi # Initialize (possibly clear) data base -if [ "${CLEAR_INDEX}" == 'true' ]; then +if [ "${GPII_CLEAR_INDEX}" == 'true' ]; then log "Deleting database at ${GPII_COUCHDB_URL_SANITIZED}" if ! curl -fsS -X DELETE "${GPII_COUCHDB_URL}"; then log "Error deleting database" @@ -75,7 +75,7 @@ if ! curl -fsS -X PUT "${GPII_COUCHDB_URL}"; then fi # Submit data -node "${DATALOADER_JS}" "${GPII_COUCHDB_URL}" "${STATIC_DATA_DIR}" "${BUILD_DATA_DIR}" +node "${DATALOADER_JS}" "${GPII_COUCHDB_URL}" "${GPII_STATIC_DATA_DIR}" "${GPII_BUILD_DATA_DIR}" err=$? if [ "${err}" != '0' ]; then log "${DATALOADER_JS} failed with ${err}, exiting" From a8598eb58ae492117839953a13e096dcbdf881cb Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Wed, 24 Oct 2018 07:46:19 +0100 Subject: [PATCH 20/22] GPII-3138: Add GPII prefix to dataloader APP_DIR variable --- scripts/deleteAndLoadSnapsets.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/deleteAndLoadSnapsets.sh b/scripts/deleteAndLoadSnapsets.sh index c886d3290..4677f7a5a 100755 --- a/scripts/deleteAndLoadSnapsets.sh +++ b/scripts/deleteAndLoadSnapsets.sh @@ -1,12 +1,12 @@ #!/bin/sh -APP_DIR=${APP_DIR:-"/app"} +GPII_APP_DIR=${GPII_APP_DIR:-"/app"} -GPII_STATIC_DATA_DIR=${GPII_STATIC_DATA_DIR:-"${APP_DIR}/testData/dbData"} -GPII_PREFERENCES_DATA_DIR=${GPII_PREFERENCES_DATA_DIR:-"${APP_DIR}/testData/preferences"} +GPII_STATIC_DATA_DIR=${GPII_STATIC_DATA_DIR:-"${GPII_APP_DIR}/testData/dbData"} +GPII_PREFERENCES_DATA_DIR=${GPII_PREFERENCES_DATA_DIR:-"${GPII_APP_DIR}/testData/preferences"} GPII_BUILD_DATA_DIR=${GPII_BUILD_DATA_DIR:-'/tmp/build/dbData'} -DATALOADER_JS="${APP_DIR}/scripts/deleteAndLoadSnapsets.js" -CONVERT_JS="${APP_DIR}/scripts/convertPrefs.js" +DATALOADER_JS="${GPII_APP_DIR}/scripts/deleteAndLoadSnapsets.js" +CONVERT_JS="${GPII_APP_DIR}/scripts/convertPrefs.js" log() { echo "$(date +'%Y-%m-%d %H:%M:%S') - $1" From 7362413a553ef16d436f2496dfdfd447765cd920 Mon Sep 17 00:00:00 2001 From: Joseph Scheuhammer Date: Wed, 24 Oct 2018 15:55:16 -0400 Subject: [PATCH 21/22] GPII-3138: Improved dataloader README - Called out that only views, and snapset PrefsSafes and their associated GPII keys are deleted/updated (steps 4, 5, and 6), - Explained usage of environment variables, - Added a warning about GPII_CLEAR_INDEX, - Explained usage difference between development vs. staging/production environments. --- documentation/DataLoader.md | 42 ++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/documentation/DataLoader.md b/documentation/DataLoader.md index 342aca50d..aec227398 100644 --- a/documentation/DataLoader.md +++ b/documentation/DataLoader.md @@ -2,30 +2,44 @@ (`scripts/deleteAndLoadSnapsets.sh`) -This script is used to setup CouchDB database and is executed as a Kubernetes batch Job every time new version of the +This script is used to setup CouchDB database and is executed as a Kubernetes batch Job every time a new version of the universal image is deployed to the cluster (also when cluster is initially created). -It does following: +It does the following: -- Converts the preferences in universal into `snapset` Prefs Safes and GPII Keys, -- Optionally deletes existing database, -- Creates a CouchDB database if none exists, -- Updates the database with respect to its `design/views` document, as required, -- Deletes the snapsets and keys, if any, from the database, -- Loads the latest snapsets and keys created created above into the database. +1. Converts the preferences in universal into `snapset` Prefs Safes and their associated GPII Keys, +2. Optionally deletes the existing database, +3. Creates a CouchDB database if none exists, +4. Updates the database with respect to its `_design/views` document, as required, +5. Deletes the `snapset` Prefs Safes and their associated GPII Keys, if any, currently in the database, +6. Loads the latest snapsets and associated keys created at step 1. into the database. + +Steps 4, 5, and 6 are handled by, and documented further in [`scripts/deleteAndLoadSnapsets.js`](https://github.com/GPII/universal/blob/master/scripts/deleteAndLoadSnapsets.js#L11). ## Environment Variables -- `GPII_COUCHDB_URL`: URL of the CouchDB database. (required) -- `GPII_CLEAR_INDEX`: If set to `true`, the database at $GPII_COUCHDB_URL will be deleted and recreated. (optional) -- `GPII_STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) -- `GPII_BUILD_DATA_DIR`: The directory where the data built from the conversion step resides. (optional) +With the exception of `GPII_COUCHDB_URL`, the following environment variables have default values defined within +`scripts/deleteAndLoadSnapsets.sh`. The database, `GPII_COUCHDB_URL`, must be set outside of the script. Developers +can set these variables as needed for testing and experimentation. -The use of environment variables for data directories is useful if you want to mount the database data using a Docker +The use of environment variables for data directories is also useful if you want to mount the database data using a Docker volume and point the data loader at it. +WARNING: setting `GPII_CLEAR_INDEX` to `true` will erase all the contents of the database. Use with caution, and with +your own database for development. In a staging or production environment, these variables are set appropriately for +those contexts; in particular `GPII_CLEAR_INDEX` will not be set. + +- `GPII_COUCHDB_URL`: URL of the CouchDB database. (required) +- `GPII_CLEAR_INDEX`: If set to `true`, the database at `$GPII_COUCHDB_URL` will be deleted and replaced with an empty + database. (optional) +- `GPII_STATIC_DATA_DIR`: The directory where the static data to be loaded into CouchDB resides. (optional) +- `GPII_PREFERENCES_DATA_DIR`: The directory containing the "raw" preferences that are converted into `snapset` Prefs + Safes and their associated GPII Keys (step 1 above). (optional) +- `GPII_BUILD_DATA_DIR`: The directory where the data built from the conversion step reside. (optional) +- `GPII_APP_DIR`: The main directory, typically `universal`. (optional) + Note that since [the docker doesn't support the environment variable type of -array](https://github.com/moby/moby/issues/20169), two separate environment variables are used for inputting data +array](https://github.com/moby/moby/issues/20169), separate environment variables are used for inputting data directories instead of one array that holds these directories. ## Running From dd3ea150795f07e0fe77891cbfe5a77b6b349281 Mon Sep 17 00:00:00 2001 From: Stepan Stipl Date: Thu, 25 Oct 2018 13:55:03 +0100 Subject: [PATCH 22/22] GPII-3138: Add uniqie ID to pouchManager temp test dir --- gpii/node_modules/pouchManager/test/pouchManagerTests.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gpii/node_modules/pouchManager/test/pouchManagerTests.js b/gpii/node_modules/pouchManager/test/pouchManagerTests.js index d30b60047..9cf92c139 100644 --- a/gpii/node_modules/pouchManager/test/pouchManagerTests.js +++ b/gpii/node_modules/pouchManager/test/pouchManagerTests.js @@ -254,8 +254,9 @@ fluid.defaults("gpii.tests.pouchManager.testEnvironment", { baseDir: { expander: { funcName: "fluid.stringTemplate", - args: ["%base/pouchManagerTests", { - base: "@expand:{settingsDir}.getBaseSettingsDir()" + args: ["%base/pouchManagerTests-%id", { + base: "@expand:{settingsDir}.getBaseSettingsDir()", + id: "{that}.id" }] } },