From 1009fbc133f8b729720eb1a22ac07489a0a23c56 Mon Sep 17 00:00:00 2001 From: Alicia Date: Fri, 3 Sep 2021 17:21:38 +0200 Subject: [PATCH 1/2] updated script with improve logic for assets deletion on copy/sync, and a new squeeze form of updating the sync file per dataset --- ResourceWatch/example_migrate_script.ipynb | 641 +++++++++++---------- 1 file changed, 333 insertions(+), 308 deletions(-) diff --git a/ResourceWatch/example_migrate_script.ipynb b/ResourceWatch/example_migrate_script.ipynb index 138c384..4880ff0 100644 --- a/ResourceWatch/example_migrate_script.ipynb +++ b/ResourceWatch/example_migrate_script.ipynb @@ -2,20 +2,21 @@ "cells": [ { "cell_type": "markdown", + "metadata": {}, "source": [ "# Migration and sync of assets between prod and staging" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## Summary" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "Currently the production API is the one that has the latest updated data by the WRI team. \n", "This notebook copies assets from `production` to `staging` maintening the match between IDs. Optionally, it would be possible to copy assets back from `staging` to `production`. \n", @@ -24,31 +25,32 @@ "1. upload/update assest to `production`\n", "2. make a copy of the assests from `production` to `staging` using this script\n", "3. synchronise the ids of the assets.\n" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## Instructions\n", "\n", "1. run the `Functions`.\n", "2. create a list with the assets urls to copy.\n", "3. `Processing` has the steps to carry out the migration. " - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "## Functions\n", "These are the functions we need to create and synchronise assets from `staging` to `production`." - ], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, + "metadata": {}, + "outputs": [], "source": [ "import getpass\n", "import requests as re\n", @@ -56,25 +58,27 @@ "from datetime import datetime\n", "import logging\n", "import time\n", + "import os\n", + "import dictdiffer\n", "logger = logging.getLogger()\n", "logger.setLevel(logging.INFO)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, + "metadata": {}, + "outputs": [], "source": [ "staging_server = \"https://staging-api.resourcewatch.org\"\n", "prod_server = \"https://api.resourcewatch.org\"" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, + "metadata": {}, + "outputs": [], "source": [ "class bcolors:\n", " HEADER = '\\033[95m'\n", @@ -86,13 +90,13 @@ " ENDC = '\\033[0m'\n", " BOLD = '\\033[1m'\n", " UNDERLINE = '\\033[4m'" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, + "metadata": {}, + "outputs": [], "source": [ "def auth(env='prod'):\n", " serverUrl = {\n", @@ -108,42 +112,50 @@ " response.raise_for_status()\n", " print(f'{bcolors.OKGREEN}Successfully logged into {env}{bcolors.ENDC}')\n", " return response.json().get('data').get('token')" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", - "execution_count": 5, - "source": [ - "token = {\n", - " 'staging': auth('staging'),\n", - " 'prod':auth('prod')\n", - "}" - ], + "execution_count": 9, + "metadata": { + "scrolled": true + }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "You are login into \u001b[95m\u001b[1mstaging\u001b[0m\n", + "Email: alicia.arenzana@vizzuality.com\n", + "Password: ········\n", "\u001b[92mSuccessfully logged into staging\u001b[0m\n", "You are login into \u001b[95m\u001b[1mprod\u001b[0m\n", + "Email: alicia.arenzana@vizzuality.com\n", + "Password: ········\n", "\u001b[92mSuccessfully logged into prod\u001b[0m\n" ] } ], - "metadata": {} + "source": [ + "token = {\n", + " 'staging': auth('staging'),\n", + " 'prod':auth('prod')\n", + "}" + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": {}, + "outputs": [], "source": [ "# @TODO \n", "# * Migrate one day the body payloads to data model classes and refactor to classes following inheritance and recursive property copies\n", "# * Type function with Mypy\n", "# * Add proper method descriptions\n", "# * Refactor methods to reuse more code\n", + "# * https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/ \n", + "# retries and calls should be taking into account retry and backof factor from Request\n", "#from typing import List\n", "#from pydantic import BaseModel, parse_obj_as\n", "# class DatasetModel(BaseModel):\n", @@ -154,14 +166,14 @@ "\n", "# class metadataModel(BaseModel):\n", " \n", - "# class vocabularyModel(BaseModel):\n" - ], - "outputs": [], - "metadata": {} + "# class vocabularyModel(BaseModel):" + ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 11, + "metadata": {}, + "outputs": [], "source": [ "def setTokenHeader(env, token=token):\n", " '''\n", @@ -530,7 +542,7 @@ " else:\n", " return None\n", " \n", - "def copyAssets(assetList, sync=False, fromEnv='prod', toEnv='staging'):\n", + "def copyAssets(assetList, sync=False, removeAssets=False, fromEnv='prod', toEnv='staging'):\n", " '''\n", " Creates a new copy or syncs the assets that we set up in the fromEnv into the destination Env \n", " '''\n", @@ -541,25 +553,25 @@ " raise IndexError(f'asset list is empty or not defined')\n", " \n", " \n", - " dataAssets = [] \n", - " \n", + " dataAssets = [] \n", + "\n", " if sync:\n", " newDatasetList = [asset[f'{fromEnv}Id'] for asset in assetList if asset['type'] == 'dataset']\n", " dataAssets = getAssetList(fromEnv, newDatasetList)\n", - "\n", " else: \n", " dataAssets = getAssetList(fromEnv, assetList)\n", - " \n", - " try:\n", - " print(f'{bcolors.OKBLUE}Preparing to {\"sync\" if sync else \"copy\"} from {fromEnv} to {toEnv}...{bcolors.ENDC}')\n", - " resources = []\n", - " \n", - " # @TODO:\n", - " # Improve loop performance with multiprocessing\n", - " # move loops into reusable function based on type\n", - " # For sync only path updated data\n", - " \n", - " for dataset in dataAssets['data']:\n", + "\n", + " # @TODO:\n", + " # Improve loop performance with multiprocessing\n", + " # move loops into reusable function based on type\n", + " # For sync only patch updated data\n", + "\n", + " for dataset in dataAssets['data']:\n", + " try:\n", + " print(f'{bcolors.OKBLUE}Preparing to {\"sync\" if sync else \"copy\"} from {fromEnv} to {toEnv}...{bcolors.ENDC}')\n", + " \n", + " resources = [] # Move this to dataset level as syncfiles are created per dataset now.\n", + " \n", " toDatasetId = assetIdToBeSync(sync, assetList, dataset, fromEnv, toEnv)\n", " if toDatasetId:\n", " logger.info(f'sync [{fromEnv}]dataset: {dataset.get(\"id\")}')\n", @@ -608,18 +620,18 @@ " 'type': 'metadata',\n", " f'{fromEnv}Id':layerMetadata.get('id'),\n", " f'{toEnv}Id': newMetadata['data']\n", - " }) \n", - " \n", - " # remove toEnv layers that are not on fromEnv \n", - " for layer in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('layer'):\n", - " if layer.get(\"id\") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'layer']:\n", - " headers = setTokenHeader(toEnv)\n", - " serverUrl = {\n", - " 'prod': prod_server,\n", - " 'staging': staging_server\n", - " }\n", - " url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/layer/{layer.get(\"id\")}'\n", - " deleteAssets(url, headers) \n", + " })\n", + " # remove toEnv layers that are not on fromEnv using a safe net \n", + " if removeAssets: \n", + " for layer in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('layer'):\n", + " if layer.get(\"id\") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'layer']:\n", + " headers = setTokenHeader(toEnv)\n", + " serverUrl = {\n", + " 'prod': prod_server,\n", + " 'staging': staging_server\n", + " }\n", + " url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/layer/{layer.get(\"id\")}'\n", + " deleteAssets(url, headers) \n", " \n", " # sync widgets\n", " for widget in dataset['attributes'].get('widget'):\n", @@ -648,17 +660,17 @@ " f'{fromEnv}Id':widgetMetadata.get('id'),\n", " f'{toEnv}Id': newMetadata['data']\n", " })\n", - "\n", - " # remove toEnv widgets that are not on fromEnv \n", - " for widget in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('widget'):\n", - " if widget.get(\"id\") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'widget']:\n", - " headers = setTokenHeader(toEnv)\n", - " serverUrl = {\n", - " 'prod': prod_server,\n", - " 'staging': staging_server\n", - " }\n", - " url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/widget/{widget.get(\"id\")}'\n", - " deleteAssets(url, headers) \n", + " # remove toEnv widgets that are not on fromEnv using a safe net\n", + " if removeAssets: \n", + " for widget in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('widget'):\n", + " if widget.get(\"id\") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'widget']:\n", + " headers = setTokenHeader(toEnv)\n", + " serverUrl = {\n", + " 'prod': prod_server,\n", + " 'staging': staging_server\n", + " }\n", + " url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/widget/{widget.get(\"id\")}'\n", + " deleteAssets(url, headers) \n", "\n", " for metadata in dataset['attributes'].get('metadata'):\n", " logger.info('creating metadata')\n", @@ -670,105 +682,92 @@ " f'{toEnv}Id': newMetadata['data']\n", " })\n", " \n", - " except NameError or IndexError as e:\n", - " logger.error(e)\n", - " raise e\n", - " except:\n", - " pass\n", + " ## Here we will add the logic to create the sync files.\n", + " except NameError or IndexError as e:\n", + " logger.error(e)\n", + " raise e\n", + " except:\n", + " pass\n", " \n", - " filename = f'dataset_sync_files/RW_prod_staging_match_{resources[0][\"prodId\"]}.json'\n", - " if not sync and len(resources) > 0:\n", - " print(f'creating sync file with name: {filename}')\n", - " with open(filename, 'w') as outfile:\n", - " json.dump(resources, outfile)\n", - " print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n", - " return filename\n", - "\n", - " elif sync and len(resources) > 0:\n", - " if resources[-1]['type'] == 'metadata':\n", - " print(f'update sync file {filename}')\n", - " with open(filename, 'w') as outfile:\n", - " json.dump(resources, outfile)\n", + " # We are assuming that the first item in the resources is a dataset.\n", + " filename = f'dataset_sync_files/RW_prod_staging_match_{resources[0][\"prodId\"]}.json'\n", + " try:\n", + " ### The logic here is try to see if the file already exists and reads it\n", + " ### if not it will create it.\n", + " fileExists = os.path.exists(filename)\n", + " if len(resources) > 0:\n", + " with open(filename, 'w+') as outfile:\n", + " if fileExists:\n", + " oldfile = json.load(outfile) # we save here the old sync data.\n", + " # Here there are a couple of drivers: \n", + " # Do we consider that the latest version of sync file generated is the right one? \n", + " # What if there is a failure?\n", + " # Do we want to combine them? on the old code i'm seeing an assumption \n", + " # related metadata being the latest thing.\n", + " difference = list(dictdiffer.diff(resources, oldfile))\n", + " if difference == []:\n", + " break\n", + " else:\n", + " writeOptions = {\n", + " 'Y': resources,\n", + " 'N': oldfile,\n", + " 'M': dictdiffer.patch(difference, resources) \n", + " }\n", + " for diff in difference: \n", + " print(diff)\n", + " userConfirmation = input(f'{bcolors.WARNING} Do you want to overwrite or merge \\\n", + " {str(oldfile)} with {str(resources)}:{bcolors.ENDC} \\\n", + " Y/M/N') or \"N\"\n", + " if userConfirmation not in ('Y', 'N', 'M'):\n", + " raise NameError(f'User confirmation option not valid: {userConfirmation}')\n", + " \n", + " json.dump(writeOptions[userConfirmation], outfile, sort_keys=True)\n", + " else:\n", + " json.dump(resources, outfile, sort_keys=True)\n", + " \n", " print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n", " return filename\n", - " else:\n", - " with open(filename,\"r\") as oldfile:\n", - " oldfile = json.load(oldfile)\n", - " if oldfile[-1]['type'] == 'metadata' and resources[-1]['type'] != 'metadata':\n", - " print('update sync file fail, please run sync again')\n", - " if oldfile[-1]['type'] != 'metadata':\n", - " print(f'update sync file {filename}')\n", - " with open(filename, 'w') as outfile:\n", - " json.dump(resources, outfile)\n", - " print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n", - " return filename\n", + " except Error as e:\n", + " raise e\n", " \n", - "def syncAssets(syncList, fromEnv='prod', toEnv='staging'):\n", + "def syncAssets(syncList, remove = False, fromEnv='prod', toEnv='staging'):\n", " '''\n", " Allows sync of Assets\n", " '''\n", - " \n", - " return copyAssets(syncList, True, fromEnv, toEnv)" - ], - "outputs": [], - "metadata": {} + " return copyAssets(syncList, True, remove, fromEnv, toEnv)" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "# Processing\n", "## Get list of assets that we want to modify or sync" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "#### List of assets:\n", "\n", "* `datasetsProd` will contain the id of the assets in productioon that need to be migrated to `staging`. We need to make sure that this list is in sync with the document we have shared with the assets." - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### For testing purposes\n", "Dummy assests to create assets in production environment" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 8, - "source": [ - "# Dummy data to test the notebook: creation of a dummy dataset with a layer in production.\n", - "toEnv = 'prod'\n", - "serverUrl = {\n", - " 'prod': prod_server,\n", - " 'staging': staging_server\n", - " }\n", - "headers = setTokenHeader(toEnv)\n", - "urlDataset = f'{serverUrl[toEnv]}/v1/dataset'\n", - "bodyDataset = {'dataset':{\n", - " 'application': ['rw'],\n", - " 'name': 'This is a test',\n", - " 'connectorType': 'rest',\n", - " 'provider': 'cartodb',\n", - " 'published': False,\n", - " 'overwrite': False,\n", - " 'protected':False,\n", - " 'env': 'production',\n", - " 'connectorUrl': \"https://wri-rw.carto.com/api/v2/sql?q=select * from air_temo_anomalies\"\n", - " }\n", - "}\n", - "\n", - "responseDataset = postAssets(urlDataset, bodyDataset, headers)\n", - "responseDataset" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'data': {'id': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n", @@ -818,37 +817,43 @@ " 'layerRelevantProps': []}}}" ] }, + "execution_count": 8, "metadata": {}, - "execution_count": 8 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "# Dummy data to test the notebook: creation of a dummy dataset with a layer in production.\n", + "toEnv = 'prod'\n", + "serverUrl = {\n", + " 'prod': prod_server,\n", + " 'staging': staging_server\n", + " }\n", + "headers = setTokenHeader(toEnv)\n", + "urlDataset = f'{serverUrl[toEnv]}/v1/dataset'\n", + "bodyDataset = {'dataset':{\n", + " 'application': ['rw'],\n", + " 'name': 'This is a test',\n", + " 'connectorType': 'rest',\n", + " 'provider': 'cartodb',\n", + " 'published': False,\n", + " 'overwrite': False,\n", + " 'protected':False,\n", + " 'env': 'production',\n", + " 'connectorUrl': \"https://wri-rw.carto.com/api/v2/sql?q=select * from air_temo_anomalies\"\n", + " }\n", + "}\n", + "\n", + "responseDataset = postAssets(urlDataset, bodyDataset, headers)\n", + "responseDataset" + ] }, { "cell_type": "code", "execution_count": 9, - "source": [ - "urlLayer = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/layer'\n", - "bodyLayer = {\n", - " 'application': ['rw'],\n", - " 'name': 'test-121',\n", - " 'provider': 'cartodb',\n", - " 'default': True,\n", - " 'published': False,\n", - " 'env': 'production',\n", - " 'layerConfig': {\n", - " \"body\": {}\n", - " },\n", - " 'legendConfig': {},\n", - " 'interactionConfig': {},\n", - " 'applicationConfig': {}\n", - " }\n", - "responseLayer = postAssets(urlLayer, bodyLayer, headers)\n", - "responseLayer" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'data': {'id': 'c21dd7ab-e729-4811-9433-8333b1d7c9e9',\n", @@ -873,33 +878,37 @@ " 'updatedAt': '2021-06-07T09:36:15.327Z'}}}" ] }, + "execution_count": 9, "metadata": {}, - "execution_count": 9 + "output_type": "execute_result" } ], - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 10, "source": [ - "urlWidget = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/widget'\n", - "bodyWidget = {\n", + "urlLayer = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/layer'\n", + "bodyLayer = {\n", " 'application': ['rw'],\n", " 'name': 'test-121',\n", + " 'provider': 'cartodb',\n", " 'default': True,\n", " 'published': False,\n", " 'env': 'production',\n", - " 'widgetConfig': {\n", + " 'layerConfig': {\n", " \"body\": {}\n", - " }\n", + " },\n", + " 'legendConfig': {},\n", + " 'interactionConfig': {},\n", + " 'applicationConfig': {}\n", " }\n", - "responseWidget = postAssets(urlWidget, bodyWidget, headers)\n", - "responseWidget" - ], + "responseLayer = postAssets(urlLayer, bodyLayer, headers)\n", + "responseLayer" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'data': {'id': '5f169df0-a293-4588-bbcd-521ee9484cd6',\n", @@ -922,27 +931,33 @@ " 'updatedAt': '2021-06-07T09:36:17.154Z'}}}" ] }, + "execution_count": 10, "metadata": {}, - "execution_count": 10 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "urlWidget = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/widget'\n", + "bodyWidget = {\n", + " 'application': ['rw'],\n", + " 'name': 'test-121',\n", + " 'default': True,\n", + " 'published': False,\n", + " 'env': 'production',\n", + " 'widgetConfig': {\n", + " \"body\": {}\n", + " }\n", + " }\n", + "responseWidget = postAssets(urlWidget, bodyWidget, headers)\n", + "responseWidget" + ] }, { "cell_type": "code", "execution_count": 11, - "source": [ - "urlVocabulary = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/vocabulary/knowledge_graph'\n", - "bodyVocabulary = {\n", - " 'application': 'rw',\n", - " 'tags':[\"geospatial\"]\n", - " }\n", - "responseVocabulary = postAssets(urlVocabulary, bodyVocabulary, headers)\n", - "responseVocabulary" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'data': [{'id': 'knowledge_graph',\n", @@ -952,29 +967,27 @@ " 'application': 'rw'}}]}" ] }, + "execution_count": 11, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "urlVocabulary = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/vocabulary/knowledge_graph'\n", + "bodyVocabulary = {\n", + " 'application': 'rw',\n", + " 'tags':[\"geospatial\"]\n", + " }\n", + "responseVocabulary = postAssets(urlVocabulary, bodyVocabulary, headers)\n", + "responseVocabulary" + ] }, { "cell_type": "code", "execution_count": 12, - "source": [ - "urlMetadataDataset = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/metadata'\n", - "bodyMetadataDataset = {\n", - " 'application': 'rw',\n", - " 'language':'ENG',\n", - " 'name':'this is a dummy dataset',\n", - " 'description':'Lorem Ipsum'\n", - " }\n", - "responseMetadataDataset = postAssets(urlMetadataDataset, bodyMetadataDataset, headers)\n", - "responseMetadataDataset" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'data': [{'id': '60bde8962852be001ba7e42b',\n", @@ -991,15 +1004,28 @@ " 'status': 'published'}}]}" ] }, + "execution_count": 12, "metadata": {}, - "execution_count": 12 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "urlMetadataDataset = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/metadata'\n", + "bodyMetadataDataset = {\n", + " 'application': 'rw',\n", + " 'language':'ENG',\n", + " 'name':'this is a dummy dataset',\n", + " 'description':'Lorem Ipsum'\n", + " }\n", + "responseMetadataDataset = postAssets(urlMetadataDataset, bodyMetadataDataset, headers)\n", + "responseMetadataDataset" + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "urlMetadataLayer = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/layer/{responseLayer[\"data\"].get(\"id\")}/metadata'\n", "bodyMetadataLayer = {\n", @@ -1010,27 +1036,14 @@ " }\n", "responseMetadataLayer = postAssets(urlMetadataLayer, bodyMetadataLayer, headers)\n", "responseMetadataLayer" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 13, - "source": [ - "urlMetadatawidget = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/widget/{responseWidget[\"data\"].get(\"id\")}/metadata'\n", - "bodyMetadatawidget = {\n", - " 'application': 'rw',\n", - " 'language':'ENG',\n", - " 'name':'this is a dummy widget',\n", - " 'description':'Lorem Ipsum'\n", - " }\n", - "responseMetadatawidget = postAssets(urlMetadatawidget, bodyMetadatawidget, headers)\n", - "responseMetadatawidget" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "{'data': [{'id': '60bde89a3cc064001b3675b9',\n", @@ -1047,80 +1060,92 @@ " 'status': 'published'}}]}" ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 13 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "urlMetadatawidget = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/widget/{responseWidget[\"data\"].get(\"id\")}/metadata'\n", + "bodyMetadatawidget = {\n", + " 'application': 'rw',\n", + " 'language':'ENG',\n", + " 'name':'this is a dummy widget',\n", + " 'description':'Lorem Ipsum'\n", + " }\n", + "responseMetadatawidget = postAssets(urlMetadatawidget, bodyMetadatawidget, headers)\n", + "responseMetadatawidget" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "#### List of assets:\n", "\n", "* we need to make sure that this list is in sync with the document we have shared with the assets" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 14, - "source": [ - "# in the future we can automate this listing based on the doc using the google sheet api both for writing and reading from\n", - "# providing a sample of the list by printing it\n", - "datasetsProd = [responseDataset['data']['id']]\n", - "datasetsProd" - ], + "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "['6a3aa408-b3d3-44c6-89b7-93fbfa545489']" ] }, + "execution_count": 14, "metadata": {}, - "execution_count": 14 + "output_type": "execute_result" } ], - "metadata": {} + "source": [ + "# in the future we can automate this listing based on the doc using the google sheet api both for writing and reading from\n", + "# providing a sample of the list by printing it\n", + "datasetsProd = [responseDataset['data']['id']]\n", + "datasetsProd" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Backup Data in both environments" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "#backupAssets('prod')\n", "#backupAssets('staging')" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Only do this if you want to clean data in staging. \n", "* You will need to be logged in" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "#deleteDataFrom()" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Copy resources from production to staging. \n", "The running time will depend on the size of the asset. \n", @@ -1129,44 +1154,35 @@ "- type: this can be a \"layer\", a \"dataset\", a \"widget\", \"vocabulary\", \"metadata\"\n", "- prodId: the id of the item in `production`\n", "- stagingId: the id of the item in `staging`" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 18, + "metadata": {}, + "outputs": [], "source": [ "# enter the API ID of the dataset on production to copy/sync here\n", "prod_API_ID = ['']# ex: '79e06dd8-a2ae-45eb-8e99-e73bc87ec946'\n", "# keep the syncFile list empty\n", "syncFile = []" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 12, - "source": [ - "# copy a dataset on production to staging\n", - "for datasetId in prod_API_ID:\n", - " syncFile.append(copyAssets([datasetId], False, fromEnv='prod', toEnv = 'staging'))\n", - "for syncfile in syncFile:\n", - " with open(syncfile) as json_file:\n", - " syncList = json.load(json_file)\n", - " syncAssets(syncList, fromEnv='prod', toEnv='staging')" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[94mPreparing to copy from prod to staging...\u001b[0m\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "ERROR:root:response: \n", "ERROR:root:\n", @@ -1177,24 +1193,24 @@ ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "creating sync file with name: dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json\n", "\u001b[92mcopy process finished\u001b[0m\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "INFO:root:sync [prod]dataset: 42859b52-31f2-419c-ac14-8b0cbd6bbb6f\n", "INFO:root:with [staging]dataset: e95fe72e-eb7f-486c-ad0e-b0cc52ac3b94\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[94mPreparing to sync from prod to staging...\u001b[0m\n", "update sync file dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json\n", @@ -1202,142 +1218,154 @@ ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "INFO:root:sync [prod]dataset: 42859b52-31f2-419c-ac14-8b0cbd6bbb6f\n", "INFO:root:with [staging]dataset: e95fe72e-eb7f-486c-ad0e-b0cc52ac3b94\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[94mPreparing to sync from prod to staging...\u001b[0m\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "INFO:root:sync [prod]widget: 2cb5af4f-2bfc-49f3-9f99-ac415e98c7db\n", "INFO:root:with [staging]widget: 1804d8e0-0de5-4b9a-8ecd-b55c9ff176fb\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "update sync file dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json\n", "\u001b[92msync process finished\u001b[0m\n" ] } ], - "metadata": {} + "source": [ + "# copy a dataset on production to staging\n", + "for datasetId in prod_API_ID:\n", + " syncFile.append(copyAssets([datasetId], False, fromEnv='prod', toEnv = 'staging'))\n", + "for syncfile in syncFile:\n", + " with open(syncfile) as json_file:\n", + " syncList = json.load(json_file)\n", + " syncAssets(syncList, fromEnv='prod', toEnv='staging')" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Open sync list of assets, match items with list and update them." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 19, - "source": [ - "# sync dataset production <> staging\n", - "# use the printed json filename in the previous cell\n", - "if len(syncFile)==0:\n", - " syncFile = [f'dataset_sync_files/RW_prod_staging_match_{datasetId}.json' for datasetId in prod_API_ID]\n", - "for syncfile in syncFile:\n", - " with open(syncfile) as json_file:\n", - " syncList = json.load(json_file)\n", - "\n", - " syncAssets(syncList, fromEnv='prod', toEnv='staging')" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "INFO:root:sync [prod]dataset: 42859b52-31f2-419c-ac14-8b0cbd6bbb6f\n", "INFO:root:with [staging]dataset: 05f90e71-fef4-445c-82d9-65e77d732494\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[94mPreparing to sync from prod to staging...\u001b[0m\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "INFO:root:sync [prod]widget: 2cb5af4f-2bfc-49f3-9f99-ac415e98c7db\n", "INFO:root:with [staging]widget: eedaa69b-7d14-4541-9a0c-1033bcddd072\n" ] }, { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "update sync file dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json\n", "\u001b[92msync process finished\u001b[0m\n" ] } ], - "metadata": {} + "source": [ + "# sync dataset production <> staging\n", + "# use the printed json filename in the previous cell\n", + "if len(syncFile)==0:\n", + " syncFile = [f'dataset_sync_files/RW_prod_staging_match_{datasetId}.json' for datasetId in prod_API_ID]\n", + "for syncfile in syncFile:\n", + " with open(syncfile) as json_file:\n", + " syncList = json.load(json_file)\n", + "\n", + " syncAssets(syncList, fromEnv='prod', toEnv='staging')" + ] }, { "cell_type": "code", "execution_count": 17, - "source": [ - "# delete testing datasets from both envs after testing:\n", - "deleteDataFrom('prod', [responseDataset['data']['id']])" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ "\u001b[93mAre you sure you want to delete ['6a3aa408-b3d3-44c6-89b7-93fbfa545489'] in prod:\u001b[0m Y/n Y\n" ] }, { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "INFO:root:deleting https://api.resourcewatch.org/v1/dataset/6a3aa408-b3d3-44c6-89b7-93fbfa545489... \n" ] } ], - "metadata": {} + "source": [ + "# delete testing datasets from both envs after testing:\n", + "deleteDataFrom('prod', [responseDataset['data']['id']])" + ] }, { "cell_type": "code", "execution_count": 16, - "source": [ - "deleteDataFrom('staging', [syncList[0]['stagingId']])" - ], + "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "INFO:root:deleting https://staging-api.resourcewatch.org/v1/dataset/e95fe72e-eb7f-486c-ad0e-b0cc52ac3b94... \n" ] } ], - "metadata": {} + "source": [ + "deleteDataFrom('staging', [syncList[0]['stagingId']])" + ] } ], "metadata": { + "interpreter": { + "hash": "1936b053440c27f530542f53326030d97194af8aa0e5ac751988556632f9c990" + }, "kernelspec": { - "name": "python3", - "display_name": "Python 3.7.7 64-bit ('base': conda)" + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1349,12 +1377,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.7" - }, - "interpreter": { - "hash": "1936b053440c27f530542f53326030d97194af8aa0e5ac751988556632f9c990" + "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} From 71cfb2b06b898e2755bc7cb14f43d163a388896d Mon Sep 17 00:00:00 2001 From: Alicia Date: Fri, 3 Sep 2021 18:43:10 +0200 Subject: [PATCH 2/2] make sure that we open the file on read if it has data --- ResourceWatch/example_migrate_script.ipynb | 427 ++++++++++++++++----- 1 file changed, 341 insertions(+), 86 deletions(-) diff --git a/ResourceWatch/example_migrate_script.ipynb b/ResourceWatch/example_migrate_script.ipynb index 4880ff0..0c3e0ef 100644 --- a/ResourceWatch/example_migrate_script.ipynb +++ b/ResourceWatch/example_migrate_script.ipynb @@ -171,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 75, "metadata": {}, "outputs": [], "source": [ @@ -690,51 +690,58 @@ " pass\n", " \n", " # We are assuming that the first item in the resources is a dataset.\n", - " filename = f'dataset_sync_files/RW_prod_staging_match_{resources[0][\"prodId\"]}.json'\n", - " try:\n", - " ### The logic here is try to see if the file already exists and reads it\n", - " ### if not it will create it.\n", - " fileExists = os.path.exists(filename)\n", - " if len(resources) > 0:\n", - " with open(filename, 'w+') as outfile:\n", - " if fileExists:\n", - " oldfile = json.load(outfile) # we save here the old sync data.\n", - " # Here there are a couple of drivers: \n", - " # Do we consider that the latest version of sync file generated is the right one? \n", - " # What if there is a failure?\n", - " # Do we want to combine them? on the old code i'm seeing an assumption \n", - " # related metadata being the latest thing.\n", - " difference = list(dictdiffer.diff(resources, oldfile))\n", - " if difference == []:\n", - " break\n", - " else:\n", - " writeOptions = {\n", - " 'Y': resources,\n", - " 'N': oldfile,\n", - " 'M': dictdiffer.patch(difference, resources) \n", - " }\n", - " for diff in difference: \n", - " print(diff)\n", - " userConfirmation = input(f'{bcolors.WARNING} Do you want to overwrite or merge \\\n", - " {str(oldfile)} with {str(resources)}:{bcolors.ENDC} \\\n", - " Y/M/N') or \"N\"\n", - " if userConfirmation not in ('Y', 'N', 'M'):\n", - " raise NameError(f'User confirmation option not valid: {userConfirmation}')\n", - " \n", - " json.dump(writeOptions[userConfirmation], outfile, sort_keys=True)\n", - " else:\n", - " json.dump(resources, outfile, sort_keys=True)\n", - " \n", - " print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n", - " return filename\n", - " except Error as e:\n", - " raise e\n", + " \n", + " syncFile(resources, sync)\n", + " \n", " \n", "def syncAssets(syncList, remove = False, fromEnv='prod', toEnv='staging'):\n", " '''\n", " Allows sync of Assets\n", " '''\n", - " return copyAssets(syncList, True, remove, fromEnv, toEnv)" + " return copyAssets(syncList, True, remove, fromEnv, toEnv)\n", + "\n", + "def syncFile(resources:list, sync:bool)-> str:\n", + " try:\n", + " ### The logic here is try to see if the file already exists and reads it\n", + " ### if not it will create it.\n", + " filename = f'dataset_sync_files/RW_prod_staging_match_{resources[0][\"prodId\"]}.json'\n", + " fileExists = os.path.exists(filename)\n", + " if len(resources) > 0:\n", + " if fileExists and (os.path.getsize(filename) > 0):\n", + " with open(filename, 'r') as outfile:\n", + " oldfile = json.loads(outfile.read()) # we save here the old sync data.\n", + " # Here there are a couple of drivers: \n", + " # Do we consider that the latest version of sync file generated is the right one? \n", + " # What if there is a failure?\n", + " # Do we want to combine them? on the old code i'm seeing an assumption \n", + " # related metadata being the latest thing.\n", + " difference = list(dictdiffer.diff(resources, oldfile))\n", + " if difference == []:\n", + " print('no change in sync file detected')\n", + " pass\n", + " else:\n", + " writeOptions = {\n", + " 'Y': resources,\n", + " 'N': oldfile,\n", + " 'M': dictdiffer.patch(difference, resources) \n", + " }\n", + " for diff in difference: \n", + " print(diff)\n", + " userConfirmation = input(f'{bcolors.WARNING} Do you want to overwrite, merge or leave the file as it is?:{bcolors.ENDC} \\\n", + " Y/M/N') or \"N\"\n", + " if userConfirmation not in ('Y', 'N', 'M'):\n", + " raise NameError(f'User confirmation option not valid: {userConfirmation}')\n", + "\n", + " with open(filename, 'w') as outfile:\n", + " json.dump(writeOptions[userConfirmation], outfile, sort_keys=True)\n", + " else:\n", + " with open(filename, 'w') as outfile:\n", + " json.dump(resources, outfile, sort_keys=True)\n", + "\n", + " print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n", + " return filename\n", + " except Exception as e:\n", + " raise e" ] }, { @@ -764,16 +771,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'data': {'id': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n", + "{'data': {'id': '020723f0-0238-4612-b5b7-245554ae198f',\n", " 'type': 'dataset',\n", " 'attributes': {'name': 'This is a test',\n", - " 'slug': 'This-is-a-test_9',\n", + " 'slug': 'This-is-a-test_15',\n", " 'type': None,\n", " 'subtitle': None,\n", " 'application': ['rw'],\n", @@ -810,14 +817,14 @@ " 'clonedHost': {},\n", " 'errorMessage': None,\n", " 'taskId': None,\n", - " 'createdAt': '2021-06-07T09:36:12.332Z',\n", - " 'updatedAt': '2021-06-07T09:36:12.332Z',\n", + " 'createdAt': '2021-09-03T15:47:43.364Z',\n", + " 'updatedAt': '2021-09-03T15:47:43.364Z',\n", " 'dataLastUpdated': None,\n", " 'widgetRelevantProps': [],\n", " 'layerRelevantProps': []}}}" ] }, - "execution_count": 8, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -850,17 +857,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'data': {'id': 'c21dd7ab-e729-4811-9433-8333b1d7c9e9',\n", + "{'data': {'id': '38051cae-72e0-4662-94e4-a8a3dc19ca85',\n", " 'type': 'layer',\n", " 'attributes': {'name': 'test-121',\n", - " 'slug': 'test-121_2',\n", - " 'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n", + " 'slug': 'test-121_5',\n", + " 'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n", " 'application': ['rw'],\n", " 'iso': [],\n", " 'provider': 'cartodb',\n", @@ -874,11 +881,11 @@ " 'interactionConfig': {},\n", " 'applicationConfig': {},\n", " 'staticImageConfig': {},\n", - " 'createdAt': '2021-06-07T09:36:15.327Z',\n", - " 'updatedAt': '2021-06-07T09:36:15.327Z'}}}" + " 'createdAt': '2021-09-03T15:47:44.433Z',\n", + " 'updatedAt': '2021-09-03T15:47:44.433Z'}}}" ] }, - "execution_count": 9, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -905,17 +912,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'data': {'id': '5f169df0-a293-4588-bbcd-521ee9484cd6',\n", + "{'data': {'id': '810de298-66b1-47ea-93f9-8593f4c6b43a',\n", " 'type': 'widget',\n", " 'attributes': {'name': 'test-121',\n", - " 'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n", - " 'slug': 'test-121_2',\n", + " 'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n", + " 'slug': 'test-121_5',\n", " 'userId': '57a0aa1071e394dd32ffe137',\n", " 'application': ['rw'],\n", " 'verified': False,\n", @@ -927,11 +934,11 @@ " 'env': 'production',\n", " 'widgetConfig': {'body': {}},\n", " 'template': False,\n", - " 'createdAt': '2021-06-07T09:36:17.153Z',\n", - " 'updatedAt': '2021-06-07T09:36:17.154Z'}}}" + " 'createdAt': '2021-09-03T15:47:45.139Z',\n", + " 'updatedAt': '2021-09-03T15:47:45.139Z'}}}" ] }, - "execution_count": 10, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -954,7 +961,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -967,7 +974,7 @@ " 'application': 'rw'}}]}" ] }, - "execution_count": 11, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -984,27 +991,27 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'data': [{'id': '60bde8962852be001ba7e42b',\n", + "{'data': [{'id': '613243a3d32fb1001aad821f',\n", " 'type': 'metadata',\n", - " 'attributes': {'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n", + " 'attributes': {'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n", " 'application': 'rw',\n", - " 'resource': {'id': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n", + " 'resource': {'id': '020723f0-0238-4612-b5b7-245554ae198f',\n", " 'type': 'dataset'},\n", " 'language': 'eng',\n", " 'name': 'this is a dummy dataset',\n", " 'description': 'Lorem Ipsum',\n", - " 'createdAt': '2021-06-07T09:36:22.304Z',\n", - " 'updatedAt': '2021-06-07T09:36:22.304Z',\n", + " 'createdAt': '2021-09-03T15:47:47.614Z',\n", + " 'updatedAt': '2021-09-03T15:47:47.614Z',\n", " 'status': 'published'}}]}" ] }, - "execution_count": 12, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -1023,9 +1030,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': [{'id': '613243a45390eb001b6a592e',\n", + " 'type': 'metadata',\n", + " 'attributes': {'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n", + " 'application': 'rw',\n", + " 'resource': {'id': '38051cae-72e0-4662-94e4-a8a3dc19ca85',\n", + " 'type': 'layer'},\n", + " 'language': 'eng',\n", + " 'name': 'this is a dummy Layer',\n", + " 'description': 'Lorem Ipsum',\n", + " 'createdAt': '2021-09-03T15:47:48.818Z',\n", + " 'updatedAt': '2021-09-03T15:47:48.818Z',\n", + " 'status': 'published'}}]}" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "urlMetadataLayer = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/layer/{responseLayer[\"data\"].get(\"id\")}/metadata'\n", "bodyMetadataLayer = {\n", @@ -1040,27 +1069,27 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'data': [{'id': '60bde89a3cc064001b3675b9',\n", + "{'data': [{'id': '613243a71614ef001a2400b3',\n", " 'type': 'metadata',\n", - " 'attributes': {'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n", + " 'attributes': {'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n", " 'application': 'rw',\n", - " 'resource': {'id': '5f169df0-a293-4588-bbcd-521ee9484cd6',\n", + " 'resource': {'id': '810de298-66b1-47ea-93f9-8593f4c6b43a',\n", " 'type': 'widget'},\n", " 'language': 'eng',\n", " 'name': 'this is a dummy widget',\n", " 'description': 'Lorem Ipsum',\n", - " 'createdAt': '2021-06-07T09:36:26.194Z',\n", - " 'updatedAt': '2021-06-07T09:36:26.194Z',\n", + " 'createdAt': '2021-09-03T15:47:51.047Z',\n", + " 'updatedAt': '2021-09-03T15:47:51.047Z',\n", " 'status': 'published'}}]}" ] }, - "execution_count": 13, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -1088,16 +1117,18 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, + "execution_count": 42, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { "text/plain": [ - "['6a3aa408-b3d3-44c6-89b7-93fbfa545489']" + "['020723f0-0238-4612-b5b7-245554ae198f']" ] }, - "execution_count": 14, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -1109,6 +1140,180 @@ "datasetsProd" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Clean copy" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[94mPreparing to copy from prod to staging...\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:creating metadata for layer...\n", + "INFO:root:creating metadata for widget...\n", + "INFO:root:creating metadata\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('change', [0, 'stagingId'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n", + "('change', [2, 'stagingId'], ('faa18045-ea7f-4750-a7a7-a8f14ca7f100', 'cde0e510-5e89-41b3-a0b1-7188581de62c'))\n", + "('change', [3, 'stagingId', 0, 'id'], ('61324e6ccbd8f4001a738054', '61324c08cbd8f4001a73804c'))\n", + "('change', [3, 'stagingId', 0, 'attributes', 'dataset'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n", + "('change', [3, 'stagingId', 0, 'attributes', 'resource', 'id'], ('faa18045-ea7f-4750-a7a7-a8f14ca7f100', 'cde0e510-5e89-41b3-a0b1-7188581de62c'))\n", + "('change', [3, 'stagingId', 0, 'attributes', 'createdAt'], ('2021-09-03T16:33:48.361Z', '2021-09-03T16:23:36.614Z'))\n", + "('change', [3, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:33:48.361Z', '2021-09-03T16:23:36.614Z'))\n", + "('change', [4, 'stagingId'], ('126c7399-deb8-47f2-973c-0bb89dda3c1b', '39a74925-e0fe-4223-b636-112a119ad7ec'))\n", + "('change', [5, 'stagingId', 0, 'id'], ('61324e6dcbd8f4001a738055', '61324c0acbd8f4001a73804d'))\n", + "('change', [5, 'stagingId', 0, 'attributes', 'dataset'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n", + "('change', [5, 'stagingId', 0, 'attributes', 'resource', 'id'], ('126c7399-deb8-47f2-973c-0bb89dda3c1b', '39a74925-e0fe-4223-b636-112a119ad7ec'))\n", + "('change', [5, 'stagingId', 0, 'attributes', 'createdAt'], ('2021-09-03T16:33:49.830Z', '2021-09-03T16:23:38.109Z'))\n", + "('change', [5, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:33:49.830Z', '2021-09-03T16:23:38.109Z'))\n", + "('change', [6, 'stagingId', 0, 'id'], ('61324e6ecbd8f4001a738056', '61324c0acbd8f4001a73804e'))\n", + "('change', [6, 'stagingId', 0, 'attributes', 'dataset'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n", + "('change', [6, 'stagingId', 0, 'attributes', 'resource', 'id'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n", + "('change', [6, 'stagingId', 0, 'attributes', 'createdAt'], ('2021-09-03T16:33:50.562Z', '2021-09-03T16:23:38.838Z'))\n", + "('change', [6, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:33:50.562Z', '2021-09-03T16:23:38.838Z'))\n", + " Do you want to overwrite, merge or leave the file as it is?: Y/M/NN\n", + "\u001b[92mcopy process finished\u001b[0m\n" + ] + } + ], + "source": [ + "copyAssets(datasetsProd, sync=False, removeAssets=False, fromEnv='prod', toEnv = 'staging')" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:sync [prod]dataset: 020723f0-0238-4612-b5b7-245554ae198f\n", + "INFO:root:with [staging]dataset: b95e88be-3a3f-40df-8648-16000da09a63\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[94mPreparing to sync from prod to staging...\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR:root:response: \n", + "ERROR:root:\n", + "ERROR:root:url: \n", + "ERROR:root:https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63/vocabulary/knowledge_graph\n", + "ERROR:root:body: \n", + "ERROR:root:{\"application\": \"rw\", \"tags\": [\"geospatial\"]}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93mPost operation was not succesfull, trying to update instead\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:sync [prod]layer: 38051cae-72e0-4662-94e4-a8a3dc19ca85\n", + "INFO:root:with [staging]layer: cde0e510-5e89-41b3-a0b1-7188581de62c\n", + "INFO:root:creating metadata for layer...\n", + "ERROR:root:response: \n", + "ERROR:root:\n", + "ERROR:root:url: \n", + "ERROR:root:https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63/layer/cde0e510-5e89-41b3-a0b1-7188581de62c/metadata\n", + "ERROR:root:body: \n", + "ERROR:root:{\"application\": \"rw\", \"resource\": {\"id\": \"38051cae-72e0-4662-94e4-a8a3dc19ca85\", \"type\": \"layer\"}, \"language\": \"eng\", \"name\": \"this is a dummy Layer\", \"description\": \"Lorem Ipsum\"}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93mPost operation was not succesfull, trying to update instead\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:sync [prod]widget: 810de298-66b1-47ea-93f9-8593f4c6b43a\n", + "INFO:root:with [staging]widget: 39a74925-e0fe-4223-b636-112a119ad7ec\n", + "INFO:root:creating metadata for widget...\n", + "ERROR:root:response: \n", + "ERROR:root:\n", + "ERROR:root:url: \n", + "ERROR:root:https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63/widget/39a74925-e0fe-4223-b636-112a119ad7ec/metadata\n", + "ERROR:root:body: \n", + "ERROR:root:{\"application\": \"rw\", \"resource\": {\"id\": \"810de298-66b1-47ea-93f9-8593f4c6b43a\", \"type\": \"widget\"}, \"language\": \"eng\", \"name\": \"this is a dummy widget\", \"description\": \"Lorem Ipsum\"}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93mPost operation was not succesfull, trying to update instead\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:creating metadata\n", + "ERROR:root:response: \n", + "ERROR:root:\n", + "ERROR:root:url: \n", + "ERROR:root:https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63/metadata\n", + "ERROR:root:body: \n", + "ERROR:root:{\"application\": \"rw\", \"resource\": {\"id\": \"020723f0-0238-4612-b5b7-245554ae198f\", \"type\": \"dataset\"}, \"language\": \"eng\", \"name\": \"this is a dummy dataset\", \"description\": \"Lorem Ipsum\"}\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[93mPost operation was not succesfull, trying to update instead\u001b[0m\n", + "('change', [3, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:34:02.697Z', '2021-09-03T16:23:36.614Z'))\n", + "('change', [5, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:34:04.751Z', '2021-09-03T16:23:38.109Z'))\n", + "('change', [6, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:34:05.814Z', '2021-09-03T16:23:38.838Z'))\n", + " Do you want to overwrite, merge or leave the file as it is?: Y/M/NN\n", + "\u001b[92msync process finished\u001b[0m\n" + ] + } + ], + "source": [ + "with open('dataset_sync_files/RW_prod_staging_match_020723f0-0238-4612-b5b7-245554ae198f.json') as json_file:\n", + " syncList = json.load(json_file)\n", + "syncAssets(syncList, fromEnv='prod', toEnv='staging')" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1136,11 +1341,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 83, "metadata": {}, "outputs": [], "source": [ - "#deleteDataFrom()" + "os.remove(f\"dataset_sync_files/RW_prod_staging_match_{responseDataset['data']['id']}.json\")" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Are you sure you want to delete ['020723f0-0238-4612-b5b7-245554ae198f'] in prod: Y/nY\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:deleting https://api.resourcewatch.org/v1/dataset/020723f0-0238-4612-b5b7-245554ae198f... \n" + ] + } + ], + "source": [ + "deleteDataFrom('prod', datasetsProd)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Are you sure you want to delete ['b95e88be-3a3f-40df-8648-16000da09a63'] in staging: Y/nY\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:deleting https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63... \n" + ] + } + ], + "source": [ + "deleteDataFrom('staging', ['b95e88be-3a3f-40df-8648-16000da09a63'])\n", + "\n" ] }, { @@ -1252,7 +1506,8 @@ "source": [ "# copy a dataset on production to staging\n", "for datasetId in prod_API_ID:\n", - " syncFile.append(copyAssets([datasetId], False, fromEnv='prod', toEnv = 'staging'))\n", + " syncFile.append(copyAssets([datasetId], sync=False, removeAssets=False, fromEnv='prod', toEnv = 'staging'))\n", + "#repeating the same operation?\n", "for syncfile in syncFile:\n", " with open(syncfile) as json_file:\n", " syncList = json.load(json_file)\n",