From 1009fbc133f8b729720eb1a22ac07489a0a23c56 Mon Sep 17 00:00:00 2001
From: Alicia <alicia.arenzana@gmail.com>
Date: Fri, 3 Sep 2021 17:21:38 +0200
Subject: [PATCH 1/2] updated script with improve logic for assets deletion on
 copy/sync, and a new squeeze form of updating the sync file per dataset

---
 ResourceWatch/example_migrate_script.ipynb | 641 +++++++++++----------
 1 file changed, 333 insertions(+), 308 deletions(-)

diff --git a/ResourceWatch/example_migrate_script.ipynb b/ResourceWatch/example_migrate_script.ipynb
index 138c384..4880ff0 100644
--- a/ResourceWatch/example_migrate_script.ipynb
+++ b/ResourceWatch/example_migrate_script.ipynb
@@ -2,20 +2,21 @@
  "cells": [
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "# Migration and sync of assets between prod and staging"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## Summary"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "Currently the production API is the one that has the latest updated data by the WRI team. \n",
     "This notebook copies assets from `production` to `staging` maintening the match between IDs. Optionally, it would be possible to copy assets back from `staging` to `production`. \n",
@@ -24,31 +25,32 @@
     "1. upload/update assest to `production`\n",
     "2. make a copy of the assests from `production` to `staging` using this script\n",
     "3. synchronise the ids of the assets.\n"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## Instructions\n",
     "\n",
     "1. run the `Functions`.\n",
     "2. create a list with the assets urls to copy.\n",
     "3. `Processing` has the steps to carry out the migration. "
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "## Functions\n",
     "These are the functions we need to create and synchronise assets from `staging` to `production`."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import getpass\n",
     "import requests as re\n",
@@ -56,25 +58,27 @@
     "from datetime import datetime\n",
     "import logging\n",
     "import time\n",
+    "import os\n",
+    "import dictdiffer\n",
     "logger = logging.getLogger()\n",
     "logger.setLevel(logging.INFO)"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "staging_server = \"https://staging-api.resourcewatch.org\"\n",
     "prod_server = \"https://api.resourcewatch.org\""
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "class bcolors:\n",
     "    HEADER = '\\033[95m'\n",
@@ -86,13 +90,13 @@
     "    ENDC = '\\033[0m'\n",
     "    BOLD = '\\033[1m'\n",
     "    UNDERLINE = '\\033[4m'"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "def auth(env='prod'):\n",
     "    serverUrl = {\n",
@@ -108,42 +112,50 @@
     "        response.raise_for_status()\n",
     "        print(f'{bcolors.OKGREEN}Successfully logged into {env}{bcolors.ENDC}')\n",
     "    return response.json().get('data').get('token')"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "source": [
-    "token = {\n",
-    "    'staging': auth('staging'),\n",
-    "    'prod':auth('prod')\n",
-    "}"
-   ],
+   "execution_count": 9,
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "You are login into \u001b[95m\u001b[1mstaging\u001b[0m\n",
+      "Email: alicia.arenzana@vizzuality.com\n",
+      "Password: ········\n",
       "\u001b[92mSuccessfully logged into staging\u001b[0m\n",
       "You are login into \u001b[95m\u001b[1mprod\u001b[0m\n",
+      "Email: alicia.arenzana@vizzuality.com\n",
+      "Password: ········\n",
       "\u001b[92mSuccessfully logged into prod\u001b[0m\n"
      ]
     }
    ],
-   "metadata": {}
+   "source": [
+    "token = {\n",
+    "    'staging': auth('staging'),\n",
+    "    'prod':auth('prod')\n",
+    "}"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# @TODO \n",
     "# * Migrate one day the body payloads to data model classes and refactor to classes following inheritance and recursive property copies\n",
     "# * Type function with Mypy\n",
     "# * Add proper method descriptions\n",
     "# * Refactor methods to reuse more code\n",
+    "# * https://findwork.dev/blog/advanced-usage-python-requests-timeouts-retries-hooks/ \n",
+    "# retries and calls should be taking into account retry and backof factor from Request\n",
     "#from typing import List\n",
     "#from pydantic import BaseModel, parse_obj_as\n",
     "# class DatasetModel(BaseModel):\n",
@@ -154,14 +166,14 @@
     "\n",
     "# class metadataModel(BaseModel):\n",
     "     \n",
-    "# class vocabularyModel(BaseModel):\n"
-   ],
-   "outputs": [],
-   "metadata": {}
+    "# class vocabularyModel(BaseModel):"
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "def setTokenHeader(env, token=token):\n",
     "    '''\n",
@@ -530,7 +542,7 @@
     "    else:\n",
     "        return None\n",
     "    \n",
-    "def copyAssets(assetList, sync=False, fromEnv='prod', toEnv='staging'):\n",
+    "def copyAssets(assetList, sync=False, removeAssets=False, fromEnv='prod', toEnv='staging'):\n",
     "    '''\n",
     "    Creates a new copy or syncs the assets that we set up in the fromEnv into the destination Env \n",
     "    '''\n",
@@ -541,25 +553,25 @@
     "        raise IndexError(f'asset list is empty or not defined')\n",
     "        \n",
     "    \n",
-    "    dataAssets = []    \n",
-    "    \n",
+    "    dataAssets = []  \n",
+    "\n",
     "    if sync:\n",
     "        newDatasetList = [asset[f'{fromEnv}Id'] for asset in assetList if asset['type'] == 'dataset']\n",
     "        dataAssets = getAssetList(fromEnv, newDatasetList)\n",
-    "\n",
     "    else:   \n",
     "        dataAssets = getAssetList(fromEnv, assetList)\n",
-    "    \n",
-    "    try:\n",
-    "        print(f'{bcolors.OKBLUE}Preparing to {\"sync\" if sync else \"copy\"} from {fromEnv} to {toEnv}...{bcolors.ENDC}')\n",
-    "        resources = []\n",
-    "        \n",
-    "        # @TODO:\n",
-    "        # Improve loop performance with multiprocessing\n",
-    "        # move loops into reusable function based on type\n",
-    "        # For sync only path updated data\n",
-    "        \n",
-    "        for dataset in dataAssets['data']:\n",
+    "\n",
+    "    # @TODO:\n",
+    "    # Improve loop performance with multiprocessing\n",
+    "    # move loops into reusable function based on type\n",
+    "    # For sync only patch updated data\n",
+    "\n",
+    "    for dataset in dataAssets['data']:\n",
+    "        try:\n",
+    "            print(f'{bcolors.OKBLUE}Preparing to {\"sync\" if sync else \"copy\"} from {fromEnv} to {toEnv}...{bcolors.ENDC}')\n",
+    "            \n",
+    "            resources = [] # Move this to dataset level as syncfiles are created per dataset now.\n",
+    "            \n",
     "            toDatasetId = assetIdToBeSync(sync, assetList, dataset, fromEnv, toEnv)\n",
     "            if toDatasetId:\n",
     "                logger.info(f'sync [{fromEnv}]dataset: {dataset.get(\"id\")}')\n",
@@ -608,18 +620,18 @@
     "                        'type': 'metadata',\n",
     "                        f'{fromEnv}Id':layerMetadata.get('id'),\n",
     "                        f'{toEnv}Id': newMetadata['data']\n",
-    "                        })   \n",
-    "            \n",
-    "            # remove toEnv layers that are not on fromEnv            \n",
-    "            for layer in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('layer'):\n",
-    "                if layer.get(\"id\") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'layer']:\n",
-    "                    headers = setTokenHeader(toEnv)\n",
-    "                    serverUrl = {\n",
-    "                        'prod': prod_server,\n",
-    "                        'staging': staging_server\n",
-    "                    }\n",
-    "                    url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/layer/{layer.get(\"id\")}'\n",
-    "                    deleteAssets(url, headers)   \n",
+    "                        })\n",
+    "            # remove toEnv layers that are not on fromEnv using a safe net \n",
+    "            if removeAssets:        \n",
+    "                for layer in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('layer'):\n",
+    "                    if layer.get(\"id\") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'layer']:\n",
+    "                        headers = setTokenHeader(toEnv)\n",
+    "                        serverUrl = {\n",
+    "                            'prod': prod_server,\n",
+    "                            'staging': staging_server\n",
+    "                        }\n",
+    "                        url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/layer/{layer.get(\"id\")}'\n",
+    "                        deleteAssets(url, headers)   \n",
     "            \n",
     "            # sync widgets\n",
     "            for widget in dataset['attributes'].get('widget'):\n",
@@ -648,17 +660,17 @@
     "                        f'{fromEnv}Id':widgetMetadata.get('id'),\n",
     "                        f'{toEnv}Id': newMetadata['data']\n",
     "                        })\n",
-    "\n",
-    "            # remove toEnv widgets that are not on fromEnv            \n",
-    "            for widget in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('widget'):\n",
-    "                if widget.get(\"id\") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'widget']:\n",
-    "                    headers = setTokenHeader(toEnv)\n",
-    "                    serverUrl = {\n",
-    "                        'prod': prod_server,\n",
-    "                        'staging': staging_server\n",
-    "                    }\n",
-    "                    url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/widget/{widget.get(\"id\")}'\n",
-    "                    deleteAssets(url, headers)       \n",
+    "            # remove toEnv widgets that are not on fromEnv using a safe net\n",
+    "            if removeAssets:          \n",
+    "                for widget in getAssetList(toEnv, [toDatasetId])['data'][0]['attributes'].get('widget'):\n",
+    "                    if widget.get(\"id\") not in [asset[f'{toEnv}Id'] for asset in resources if asset['type'] == 'widget']:\n",
+    "                        headers = setTokenHeader(toEnv)\n",
+    "                        serverUrl = {\n",
+    "                            'prod': prod_server,\n",
+    "                            'staging': staging_server\n",
+    "                        }\n",
+    "                        url = f'{serverUrl[toEnv]}/v1/dataset/{toDatasetId}/widget/{widget.get(\"id\")}'\n",
+    "                        deleteAssets(url, headers)       \n",
     "\n",
     "            for metadata in dataset['attributes'].get('metadata'):\n",
     "                logger.info('creating metadata')\n",
@@ -670,105 +682,92 @@
     "                f'{toEnv}Id': newMetadata['data']\n",
     "                })\n",
     "            \n",
-    "    except NameError or IndexError as e:\n",
-    "        logger.error(e)\n",
-    "        raise e\n",
-    "    except:\n",
-    "        pass\n",
+    "            ## Here we will add the logic to create the sync files.\n",
+    "        except NameError or IndexError as e:\n",
+    "            logger.error(e)\n",
+    "            raise e\n",
+    "        except:\n",
+    "            pass\n",
     "    \n",
-    "    filename = f'dataset_sync_files/RW_prod_staging_match_{resources[0][\"prodId\"]}.json'\n",
-    "    if not sync and len(resources) > 0:\n",
-    "        print(f'creating sync file with name: {filename}')\n",
-    "        with open(filename, 'w') as outfile:\n",
-    "            json.dump(resources, outfile)\n",
-    "            print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n",
-    "            return filename\n",
-    "\n",
-    "    elif sync and len(resources) > 0:\n",
-    "        if resources[-1]['type'] == 'metadata':\n",
-    "            print(f'update sync file {filename}')\n",
-    "            with open(filename, 'w') as outfile:\n",
-    "                json.dump(resources, outfile)\n",
+    "        # We are assuming that the first item in the resources is a dataset.\n",
+    "        filename = f'dataset_sync_files/RW_prod_staging_match_{resources[0][\"prodId\"]}.json'\n",
+    "        try:\n",
+    "            ### The logic here is try to see if the file already exists and reads it\n",
+    "            ### if not it will create it.\n",
+    "            fileExists = os.path.exists(filename)\n",
+    "            if len(resources) > 0:\n",
+    "                with open(filename, 'w+') as outfile:\n",
+    "                    if fileExists:\n",
+    "                        oldfile = json.load(outfile) # we save here the old sync data.\n",
+    "                        # Here there are a couple of drivers: \n",
+    "                        # Do we consider that the latest version of sync file generated is the right one? \n",
+    "                        # What if there is a failure?\n",
+    "                        # Do we want to combine them? on the old code i'm seeing an assumption \n",
+    "                        # related metadata being the latest thing.\n",
+    "                        difference = list(dictdiffer.diff(resources, oldfile))\n",
+    "                        if difference == []:\n",
+    "                            break\n",
+    "                        else:\n",
+    "                            writeOptions = {\n",
+    "                                'Y': resources,\n",
+    "                                'N': oldfile,\n",
+    "                                'M': dictdiffer.patch(difference, resources) \n",
+    "                                }\n",
+    "                            for diff in difference:         \n",
+    "                                print(diff)\n",
+    "                            userConfirmation = input(f'{bcolors.WARNING} Do you want to overwrite or merge \\\n",
+    "                                {str(oldfile)}  with  {str(resources)}:{bcolors.ENDC} \\\n",
+    "                                Y/M/N') or \"N\"\n",
+    "                            if userConfirmation not in ('Y', 'N', 'M'):\n",
+    "                                raise NameError(f'User confirmation option not valid: {userConfirmation}')\n",
+    "                            \n",
+    "                            json.dump(writeOptions[userConfirmation], outfile, sort_keys=True)\n",
+    "                    else:\n",
+    "                        json.dump(resources, outfile, sort_keys=True)\n",
+    "                \n",
     "                print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n",
     "                return filename\n",
-    "        else:\n",
-    "            with open(filename,\"r\") as oldfile:\n",
-    "                oldfile = json.load(oldfile)\n",
-    "            if oldfile[-1]['type'] == 'metadata' and resources[-1]['type'] != 'metadata':\n",
-    "                print('update sync file fail, please run sync again')\n",
-    "            if oldfile[-1]['type'] != 'metadata':\n",
-    "                print(f'update sync file {filename}')\n",
-    "                with open(filename, 'w') as outfile:\n",
-    "                    json.dump(resources, outfile)\n",
-    "                    print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n",
-    "                    return filename\n",
+    "        except Error as e:\n",
+    "            raise e\n",
     "        \n",
-    "def syncAssets(syncList, fromEnv='prod', toEnv='staging'):\n",
+    "def syncAssets(syncList, remove = False, fromEnv='prod', toEnv='staging'):\n",
     "    '''\n",
     "    Allows sync of Assets\n",
     "    '''\n",
-    "    \n",
-    "    return copyAssets(syncList, True, fromEnv, toEnv)"
-   ],
-   "outputs": [],
-   "metadata": {}
+    "    return copyAssets(syncList, True, remove, fromEnv, toEnv)"
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "# Processing\n",
     "## Get list of assets that we want to modify or sync"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "#### List of assets:\n",
     "\n",
     "* `datasetsProd` will contain the id of the assets in productioon that need to be migrated to `staging`. We need to make sure that this list is in sync with the document we have shared with the assets."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### For testing purposes\n",
     "Dummy assests to create assets in production environment"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 8,
-   "source": [
-    "# Dummy data to test the notebook: creation of a dummy dataset with a layer in production.\n",
-    "toEnv = 'prod'\n",
-    "serverUrl = {\n",
-    "        'prod': prod_server,\n",
-    "        'staging': staging_server\n",
-    "    }\n",
-    "headers = setTokenHeader(toEnv)\n",
-    "urlDataset = f'{serverUrl[toEnv]}/v1/dataset'\n",
-    "bodyDataset = {'dataset':{\n",
-    "    'application': ['rw'],\n",
-    "    'name': 'This is a test',\n",
-    "    'connectorType': 'rest',\n",
-    "    'provider': 'cartodb',\n",
-    "    'published': False,\n",
-    "    'overwrite': False,\n",
-    "    'protected':False,\n",
-    "    'env': 'production',\n",
-    "    'connectorUrl': \"https://wri-rw.carto.com/api/v2/sql?q=select * from air_temo_anomalies\"\n",
-    "    }\n",
-    "}\n",
-    "\n",
-    "responseDataset = postAssets(urlDataset, bodyDataset, headers)\n",
-    "responseDataset"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'data': {'id': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n",
@@ -818,37 +817,43 @@
        "   'layerRelevantProps': []}}}"
       ]
      },
+     "execution_count": 8,
      "metadata": {},
-     "execution_count": 8
+     "output_type": "execute_result"
     }
    ],
-   "metadata": {}
+   "source": [
+    "# Dummy data to test the notebook: creation of a dummy dataset with a layer in production.\n",
+    "toEnv = 'prod'\n",
+    "serverUrl = {\n",
+    "        'prod': prod_server,\n",
+    "        'staging': staging_server\n",
+    "    }\n",
+    "headers = setTokenHeader(toEnv)\n",
+    "urlDataset = f'{serverUrl[toEnv]}/v1/dataset'\n",
+    "bodyDataset = {'dataset':{\n",
+    "    'application': ['rw'],\n",
+    "    'name': 'This is a test',\n",
+    "    'connectorType': 'rest',\n",
+    "    'provider': 'cartodb',\n",
+    "    'published': False,\n",
+    "    'overwrite': False,\n",
+    "    'protected':False,\n",
+    "    'env': 'production',\n",
+    "    'connectorUrl': \"https://wri-rw.carto.com/api/v2/sql?q=select * from air_temo_anomalies\"\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "responseDataset = postAssets(urlDataset, bodyDataset, headers)\n",
+    "responseDataset"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "source": [
-    "urlLayer = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/layer'\n",
-    "bodyLayer = {\n",
-    "        'application': ['rw'],\n",
-    "        'name': 'test-121',\n",
-    "        'provider': 'cartodb',\n",
-    "        'default': True,\n",
-    "        'published': False,\n",
-    "        'env': 'production',\n",
-    "        'layerConfig': {\n",
-    "            \"body\": {}\n",
-    "            },\n",
-    "        'legendConfig': {},\n",
-    "        'interactionConfig': {},\n",
-    "        'applicationConfig': {}\n",
-    "    }\n",
-    "responseLayer = postAssets(urlLayer, bodyLayer, headers)\n",
-    "responseLayer"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'data': {'id': 'c21dd7ab-e729-4811-9433-8333b1d7c9e9',\n",
@@ -873,33 +878,37 @@
        "   'updatedAt': '2021-06-07T09:36:15.327Z'}}}"
       ]
      },
+     "execution_count": 9,
      "metadata": {},
-     "execution_count": 9
+     "output_type": "execute_result"
     }
    ],
-   "metadata": {}
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
    "source": [
-    "urlWidget = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/widget'\n",
-    "bodyWidget = {\n",
+    "urlLayer = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/layer'\n",
+    "bodyLayer = {\n",
     "        'application': ['rw'],\n",
     "        'name': 'test-121',\n",
+    "        'provider': 'cartodb',\n",
     "        'default': True,\n",
     "        'published': False,\n",
     "        'env': 'production',\n",
-    "        'widgetConfig': {\n",
+    "        'layerConfig': {\n",
     "            \"body\": {}\n",
-    "            }\n",
+    "            },\n",
+    "        'legendConfig': {},\n",
+    "        'interactionConfig': {},\n",
+    "        'applicationConfig': {}\n",
     "    }\n",
-    "responseWidget = postAssets(urlWidget, bodyWidget, headers)\n",
-    "responseWidget"
-   ],
+    "responseLayer = postAssets(urlLayer, bodyLayer, headers)\n",
+    "responseLayer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'data': {'id': '5f169df0-a293-4588-bbcd-521ee9484cd6',\n",
@@ -922,27 +931,33 @@
        "   'updatedAt': '2021-06-07T09:36:17.154Z'}}}"
       ]
      },
+     "execution_count": 10,
      "metadata": {},
-     "execution_count": 10
+     "output_type": "execute_result"
     }
    ],
-   "metadata": {}
+   "source": [
+    "urlWidget = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/widget'\n",
+    "bodyWidget = {\n",
+    "        'application': ['rw'],\n",
+    "        'name': 'test-121',\n",
+    "        'default': True,\n",
+    "        'published': False,\n",
+    "        'env': 'production',\n",
+    "        'widgetConfig': {\n",
+    "            \"body\": {}\n",
+    "            }\n",
+    "    }\n",
+    "responseWidget = postAssets(urlWidget, bodyWidget, headers)\n",
+    "responseWidget"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 11,
-   "source": [
-    "urlVocabulary = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/vocabulary/knowledge_graph'\n",
-    "bodyVocabulary = {\n",
-    "        'application': 'rw',\n",
-    "        'tags':[\"geospatial\"]\n",
-    "    }\n",
-    "responseVocabulary = postAssets(urlVocabulary, bodyVocabulary, headers)\n",
-    "responseVocabulary"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'data': [{'id': 'knowledge_graph',\n",
@@ -952,29 +967,27 @@
        "    'application': 'rw'}}]}"
       ]
      },
+     "execution_count": 11,
      "metadata": {},
-     "execution_count": 11
+     "output_type": "execute_result"
     }
    ],
-   "metadata": {}
+   "source": [
+    "urlVocabulary = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/vocabulary/knowledge_graph'\n",
+    "bodyVocabulary = {\n",
+    "        'application': 'rw',\n",
+    "        'tags':[\"geospatial\"]\n",
+    "    }\n",
+    "responseVocabulary = postAssets(urlVocabulary, bodyVocabulary, headers)\n",
+    "responseVocabulary"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "source": [
-    "urlMetadataDataset = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/metadata'\n",
-    "bodyMetadataDataset = {\n",
-    "        'application': 'rw',\n",
-    "        'language':'ENG',\n",
-    "        'name':'this is a dummy dataset',\n",
-    "        'description':'Lorem Ipsum'\n",
-    "    }\n",
-    "responseMetadataDataset = postAssets(urlMetadataDataset, bodyMetadataDataset, headers)\n",
-    "responseMetadataDataset"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'data': [{'id': '60bde8962852be001ba7e42b',\n",
@@ -991,15 +1004,28 @@
        "    'status': 'published'}}]}"
       ]
      },
+     "execution_count": 12,
      "metadata": {},
-     "execution_count": 12
+     "output_type": "execute_result"
     }
    ],
-   "metadata": {}
+   "source": [
+    "urlMetadataDataset = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/metadata'\n",
+    "bodyMetadataDataset = {\n",
+    "        'application': 'rw',\n",
+    "        'language':'ENG',\n",
+    "        'name':'this is a dummy dataset',\n",
+    "        'description':'Lorem Ipsum'\n",
+    "    }\n",
+    "responseMetadataDataset = postAssets(urlMetadataDataset, bodyMetadataDataset, headers)\n",
+    "responseMetadataDataset"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "urlMetadataLayer = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/layer/{responseLayer[\"data\"].get(\"id\")}/metadata'\n",
     "bodyMetadataLayer = {\n",
@@ -1010,27 +1036,14 @@
     "    }\n",
     "responseMetadataLayer = postAssets(urlMetadataLayer, bodyMetadataLayer, headers)\n",
     "responseMetadataLayer"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 13,
-   "source": [
-    "urlMetadatawidget = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/widget/{responseWidget[\"data\"].get(\"id\")}/metadata'\n",
-    "bodyMetadatawidget = {\n",
-    "        'application': 'rw',\n",
-    "        'language':'ENG',\n",
-    "        'name':'this is a dummy widget',\n",
-    "        'description':'Lorem Ipsum'\n",
-    "    }\n",
-    "responseMetadatawidget = postAssets(urlMetadatawidget, bodyMetadatawidget, headers)\n",
-    "responseMetadatawidget"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "{'data': [{'id': '60bde89a3cc064001b3675b9',\n",
@@ -1047,80 +1060,92 @@
        "    'status': 'published'}}]}"
       ]
      },
+     "execution_count": 13,
      "metadata": {},
-     "execution_count": 13
+     "output_type": "execute_result"
     }
    ],
-   "metadata": {}
+   "source": [
+    "urlMetadatawidget = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/widget/{responseWidget[\"data\"].get(\"id\")}/metadata'\n",
+    "bodyMetadatawidget = {\n",
+    "        'application': 'rw',\n",
+    "        'language':'ENG',\n",
+    "        'name':'this is a dummy widget',\n",
+    "        'description':'Lorem Ipsum'\n",
+    "    }\n",
+    "responseMetadatawidget = postAssets(urlMetadatawidget, bodyMetadatawidget, headers)\n",
+    "responseMetadatawidget"
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "#### List of assets:\n",
     "\n",
     "* we need to make sure that this list is in sync with the document we have shared with the assets"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 14,
-   "source": [
-    "# in the future we can automate this listing based on the doc using the google sheet api both for writing and reading from\n",
-    "# providing a sample of the list by printing it\n",
-    "datasetsProd = [responseDataset['data']['id']]\n",
-    "datasetsProd"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "execute_result",
      "data": {
       "text/plain": [
        "['6a3aa408-b3d3-44c6-89b7-93fbfa545489']"
       ]
      },
+     "execution_count": 14,
      "metadata": {},
-     "execution_count": 14
+     "output_type": "execute_result"
     }
    ],
-   "metadata": {}
+   "source": [
+    "# in the future we can automate this listing based on the doc using the google sheet api both for writing and reading from\n",
+    "# providing a sample of the list by printing it\n",
+    "datasetsProd = [responseDataset['data']['id']]\n",
+    "datasetsProd"
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### Backup Data in both environments"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "#backupAssets('prod')\n",
     "#backupAssets('staging')"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### Only do this if you want to clean data in staging. \n",
     "* You will need to be logged in"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "#deleteDataFrom()"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### Copy resources from production to staging. \n",
     "The running time will depend on the size of the asset.   \n",
@@ -1129,44 +1154,35 @@
     "- type: this can be a \"layer\", a \"dataset\", a \"widget\", \"vocabulary\", \"metadata\"\n",
     "- prodId: the id of the item in `production`\n",
     "- stagingId: the id of the item in `staging`"
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# enter the API ID of the dataset on production to copy/sync here\n",
     "prod_API_ID = ['']# ex: '79e06dd8-a2ae-45eb-8e99-e73bc87ec946'\n",
     "# keep the syncFile list empty\n",
     "syncFile = []"
-   ],
-   "outputs": [],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "source": [
-    "# copy a dataset on production to staging\n",
-    "for datasetId in prod_API_ID:\n",
-    "    syncFile.append(copyAssets([datasetId], False, fromEnv='prod', toEnv = 'staging'))\n",
-    "for syncfile in syncFile:\n",
-    "    with open(syncfile) as json_file:\n",
-    "        syncList = json.load(json_file)\n",
-    "    syncAssets(syncList, fromEnv='prod', toEnv='staging')"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "\u001b[94mPreparing to copy from prod to staging...\u001b[0m\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stderr",
+     "output_type": "stream",
      "text": [
       "ERROR:root:response: \n",
       "ERROR:root:<Response [400]>\n",
@@ -1177,24 +1193,24 @@
      ]
     },
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "creating sync file with name: dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json\n",
       "\u001b[92mcopy process finished\u001b[0m\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stderr",
+     "output_type": "stream",
      "text": [
       "INFO:root:sync [prod]dataset: 42859b52-31f2-419c-ac14-8b0cbd6bbb6f\n",
       "INFO:root:with [staging]dataset: e95fe72e-eb7f-486c-ad0e-b0cc52ac3b94\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "\u001b[94mPreparing to sync from prod to staging...\u001b[0m\n",
       "update sync file dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json\n",
@@ -1202,142 +1218,154 @@
      ]
     },
     {
-     "output_type": "stream",
      "name": "stderr",
+     "output_type": "stream",
      "text": [
       "INFO:root:sync [prod]dataset: 42859b52-31f2-419c-ac14-8b0cbd6bbb6f\n",
       "INFO:root:with [staging]dataset: e95fe72e-eb7f-486c-ad0e-b0cc52ac3b94\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "\u001b[94mPreparing to sync from prod to staging...\u001b[0m\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stderr",
+     "output_type": "stream",
      "text": [
       "INFO:root:sync [prod]widget: 2cb5af4f-2bfc-49f3-9f99-ac415e98c7db\n",
       "INFO:root:with [staging]widget: 1804d8e0-0de5-4b9a-8ecd-b55c9ff176fb\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "update sync file dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json\n",
       "\u001b[92msync process finished\u001b[0m\n"
      ]
     }
    ],
-   "metadata": {}
+   "source": [
+    "# copy a dataset on production to staging\n",
+    "for datasetId in prod_API_ID:\n",
+    "    syncFile.append(copyAssets([datasetId], False, fromEnv='prod', toEnv = 'staging'))\n",
+    "for syncfile in syncFile:\n",
+    "    with open(syncfile) as json_file:\n",
+    "        syncList = json.load(json_file)\n",
+    "    syncAssets(syncList, fromEnv='prod', toEnv='staging')"
+   ]
   },
   {
    "cell_type": "markdown",
+   "metadata": {},
    "source": [
     "### Open sync list of assets, match items with list and update them."
-   ],
-   "metadata": {}
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 19,
-   "source": [
-    "# sync dataset production <> staging\n",
-    "# use the printed json filename in the previous cell\n",
-    "if len(syncFile)==0:\n",
-    "    syncFile = [f'dataset_sync_files/RW_prod_staging_match_{datasetId}.json' for datasetId in prod_API_ID]\n",
-    "for syncfile in syncFile:\n",
-    "    with open(syncfile) as json_file:\n",
-    "        syncList = json.load(json_file)\n",
-    "\n",
-    "    syncAssets(syncList, fromEnv='prod', toEnv='staging')"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stderr",
+     "output_type": "stream",
      "text": [
       "INFO:root:sync [prod]dataset: 42859b52-31f2-419c-ac14-8b0cbd6bbb6f\n",
       "INFO:root:with [staging]dataset: 05f90e71-fef4-445c-82d9-65e77d732494\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "\u001b[94mPreparing to sync from prod to staging...\u001b[0m\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stderr",
+     "output_type": "stream",
      "text": [
       "INFO:root:sync [prod]widget: 2cb5af4f-2bfc-49f3-9f99-ac415e98c7db\n",
       "INFO:root:with [staging]widget: eedaa69b-7d14-4541-9a0c-1033bcddd072\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "update sync file dataset_sync_files/RW_prod_staging_match_42859b52-31f2-419c-ac14-8b0cbd6bbb6f.json\n",
       "\u001b[92msync process finished\u001b[0m\n"
      ]
     }
    ],
-   "metadata": {}
+   "source": [
+    "# sync dataset production <> staging\n",
+    "# use the printed json filename in the previous cell\n",
+    "if len(syncFile)==0:\n",
+    "    syncFile = [f'dataset_sync_files/RW_prod_staging_match_{datasetId}.json' for datasetId in prod_API_ID]\n",
+    "for syncfile in syncFile:\n",
+    "    with open(syncfile) as json_file:\n",
+    "        syncList = json.load(json_file)\n",
+    "\n",
+    "    syncAssets(syncList, fromEnv='prod', toEnv='staging')"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 17,
-   "source": [
-    "# delete testing datasets from both envs after testing:\n",
-    "deleteDataFrom('prod', [responseDataset['data']['id']])"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stdout",
+     "output_type": "stream",
      "text": [
       "\u001b[93mAre you sure you want to delete         ['6a3aa408-b3d3-44c6-89b7-93fbfa545489'] in prod:\u001b[0m         Y/n Y\n"
      ]
     },
     {
-     "output_type": "stream",
      "name": "stderr",
+     "output_type": "stream",
      "text": [
       "INFO:root:deleting https://api.resourcewatch.org/v1/dataset/6a3aa408-b3d3-44c6-89b7-93fbfa545489... \n"
      ]
     }
    ],
-   "metadata": {}
+   "source": [
+    "# delete testing datasets from both envs after testing:\n",
+    "deleteDataFrom('prod', [responseDataset['data']['id']])"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": 16,
-   "source": [
-    "deleteDataFrom('staging', [syncList[0]['stagingId']])"
-   ],
+   "metadata": {},
    "outputs": [
     {
-     "output_type": "stream",
      "name": "stderr",
+     "output_type": "stream",
      "text": [
       "INFO:root:deleting https://staging-api.resourcewatch.org/v1/dataset/e95fe72e-eb7f-486c-ad0e-b0cc52ac3b94... \n"
      ]
     }
    ],
-   "metadata": {}
+   "source": [
+    "deleteDataFrom('staging', [syncList[0]['stagingId']])"
+   ]
   }
  ],
  "metadata": {
+  "interpreter": {
+   "hash": "1936b053440c27f530542f53326030d97194af8aa0e5ac751988556632f9c990"
+  },
   "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3.7.7 64-bit ('base': conda)"
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1349,12 +1377,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
-  },
-  "interpreter": {
-   "hash": "1936b053440c27f530542f53326030d97194af8aa0e5ac751988556632f9c990"
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}

From 71cfb2b06b898e2755bc7cb14f43d163a388896d Mon Sep 17 00:00:00 2001
From: Alicia <alicia.arenzana@gmail.com>
Date: Fri, 3 Sep 2021 18:43:10 +0200
Subject: [PATCH 2/2] make sure that we open the file on read if it has data

---
 ResourceWatch/example_migrate_script.ipynb | 427 ++++++++++++++++-----
 1 file changed, 341 insertions(+), 86 deletions(-)

diff --git a/ResourceWatch/example_migrate_script.ipynb b/ResourceWatch/example_migrate_script.ipynb
index 4880ff0..0c3e0ef 100644
--- a/ResourceWatch/example_migrate_script.ipynb
+++ b/ResourceWatch/example_migrate_script.ipynb
@@ -171,7 +171,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 75,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -690,51 +690,58 @@
     "            pass\n",
     "    \n",
     "        # We are assuming that the first item in the resources is a dataset.\n",
-    "        filename = f'dataset_sync_files/RW_prod_staging_match_{resources[0][\"prodId\"]}.json'\n",
-    "        try:\n",
-    "            ### The logic here is try to see if the file already exists and reads it\n",
-    "            ### if not it will create it.\n",
-    "            fileExists = os.path.exists(filename)\n",
-    "            if len(resources) > 0:\n",
-    "                with open(filename, 'w+') as outfile:\n",
-    "                    if fileExists:\n",
-    "                        oldfile = json.load(outfile) # we save here the old sync data.\n",
-    "                        # Here there are a couple of drivers: \n",
-    "                        # Do we consider that the latest version of sync file generated is the right one? \n",
-    "                        # What if there is a failure?\n",
-    "                        # Do we want to combine them? on the old code i'm seeing an assumption \n",
-    "                        # related metadata being the latest thing.\n",
-    "                        difference = list(dictdiffer.diff(resources, oldfile))\n",
-    "                        if difference == []:\n",
-    "                            break\n",
-    "                        else:\n",
-    "                            writeOptions = {\n",
-    "                                'Y': resources,\n",
-    "                                'N': oldfile,\n",
-    "                                'M': dictdiffer.patch(difference, resources) \n",
-    "                                }\n",
-    "                            for diff in difference:         \n",
-    "                                print(diff)\n",
-    "                            userConfirmation = input(f'{bcolors.WARNING} Do you want to overwrite or merge \\\n",
-    "                                {str(oldfile)}  with  {str(resources)}:{bcolors.ENDC} \\\n",
-    "                                Y/M/N') or \"N\"\n",
-    "                            if userConfirmation not in ('Y', 'N', 'M'):\n",
-    "                                raise NameError(f'User confirmation option not valid: {userConfirmation}')\n",
-    "                            \n",
-    "                            json.dump(writeOptions[userConfirmation], outfile, sort_keys=True)\n",
-    "                    else:\n",
-    "                        json.dump(resources, outfile, sort_keys=True)\n",
-    "                \n",
-    "                print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n",
-    "                return filename\n",
-    "        except Error as e:\n",
-    "            raise e\n",
+    "        \n",
+    "        syncFile(resources, sync)\n",
+    "        \n",
     "        \n",
     "def syncAssets(syncList, remove = False, fromEnv='prod', toEnv='staging'):\n",
     "    '''\n",
     "    Allows sync of Assets\n",
     "    '''\n",
-    "    return copyAssets(syncList, True, remove, fromEnv, toEnv)"
+    "    return copyAssets(syncList, True, remove, fromEnv, toEnv)\n",
+    "\n",
+    "def syncFile(resources:list, sync:bool)-> str:\n",
+    "    try:\n",
+    "        ### The logic here is try to see if the file already exists and reads it\n",
+    "        ### if not it will create it.\n",
+    "        filename = f'dataset_sync_files/RW_prod_staging_match_{resources[0][\"prodId\"]}.json'\n",
+    "        fileExists = os.path.exists(filename)\n",
+    "        if len(resources) > 0:\n",
+    "            if fileExists and (os.path.getsize(filename) > 0):\n",
+    "                with open(filename, 'r') as outfile:\n",
+    "                    oldfile = json.loads(outfile.read()) # we save here the old sync data.\n",
+    "                # Here there are a couple of drivers: \n",
+    "                # Do we consider that the latest version of sync file generated is the right one? \n",
+    "                # What if there is a failure?\n",
+    "                # Do we want to combine them? on the old code i'm seeing an assumption \n",
+    "                # related metadata being the latest thing.\n",
+    "                difference = list(dictdiffer.diff(resources, oldfile))\n",
+    "                if difference == []:\n",
+    "                    print('no change in sync file detected')\n",
+    "                    pass\n",
+    "                else:\n",
+    "                    writeOptions = {\n",
+    "                        'Y': resources,\n",
+    "                        'N': oldfile,\n",
+    "                        'M': dictdiffer.patch(difference, resources) \n",
+    "                        }\n",
+    "                    for diff in difference:         \n",
+    "                        print(diff)\n",
+    "                    userConfirmation = input(f'{bcolors.WARNING} Do you want to overwrite, merge or leave the file as it is?:{bcolors.ENDC} \\\n",
+    "                        Y/M/N') or \"N\"\n",
+    "                    if userConfirmation not in ('Y', 'N', 'M'):\n",
+    "                        raise NameError(f'User confirmation option not valid: {userConfirmation}')\n",
+    "\n",
+    "                    with open(filename, 'w') as outfile:\n",
+    "                        json.dump(writeOptions[userConfirmation], outfile, sort_keys=True)\n",
+    "            else:\n",
+    "                with open(filename, 'w') as outfile:\n",
+    "                    json.dump(resources, outfile, sort_keys=True)\n",
+    "\n",
+    "            print(f'{bcolors.OKGREEN}{\"sync\" if sync else \"copy\"} process finished{bcolors.ENDC}')\n",
+    "            return filename\n",
+    "    except Exception as e:\n",
+    "        raise e"
    ]
   },
   {
@@ -764,16 +771,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'data': {'id': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n",
+       "{'data': {'id': '020723f0-0238-4612-b5b7-245554ae198f',\n",
        "  'type': 'dataset',\n",
        "  'attributes': {'name': 'This is a test',\n",
-       "   'slug': 'This-is-a-test_9',\n",
+       "   'slug': 'This-is-a-test_15',\n",
        "   'type': None,\n",
        "   'subtitle': None,\n",
        "   'application': ['rw'],\n",
@@ -810,14 +817,14 @@
        "   'clonedHost': {},\n",
        "   'errorMessage': None,\n",
        "   'taskId': None,\n",
-       "   'createdAt': '2021-06-07T09:36:12.332Z',\n",
-       "   'updatedAt': '2021-06-07T09:36:12.332Z',\n",
+       "   'createdAt': '2021-09-03T15:47:43.364Z',\n",
+       "   'updatedAt': '2021-09-03T15:47:43.364Z',\n",
        "   'dataLastUpdated': None,\n",
        "   'widgetRelevantProps': [],\n",
        "   'layerRelevantProps': []}}}"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -850,17 +857,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'data': {'id': 'c21dd7ab-e729-4811-9433-8333b1d7c9e9',\n",
+       "{'data': {'id': '38051cae-72e0-4662-94e4-a8a3dc19ca85',\n",
        "  'type': 'layer',\n",
        "  'attributes': {'name': 'test-121',\n",
-       "   'slug': 'test-121_2',\n",
-       "   'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n",
+       "   'slug': 'test-121_5',\n",
+       "   'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n",
        "   'application': ['rw'],\n",
        "   'iso': [],\n",
        "   'provider': 'cartodb',\n",
@@ -874,11 +881,11 @@
        "   'interactionConfig': {},\n",
        "   'applicationConfig': {},\n",
        "   'staticImageConfig': {},\n",
-       "   'createdAt': '2021-06-07T09:36:15.327Z',\n",
-       "   'updatedAt': '2021-06-07T09:36:15.327Z'}}}"
+       "   'createdAt': '2021-09-03T15:47:44.433Z',\n",
+       "   'updatedAt': '2021-09-03T15:47:44.433Z'}}}"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -905,17 +912,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'data': {'id': '5f169df0-a293-4588-bbcd-521ee9484cd6',\n",
+       "{'data': {'id': '810de298-66b1-47ea-93f9-8593f4c6b43a',\n",
        "  'type': 'widget',\n",
        "  'attributes': {'name': 'test-121',\n",
-       "   'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n",
-       "   'slug': 'test-121_2',\n",
+       "   'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n",
+       "   'slug': 'test-121_5',\n",
        "   'userId': '57a0aa1071e394dd32ffe137',\n",
        "   'application': ['rw'],\n",
        "   'verified': False,\n",
@@ -927,11 +934,11 @@
        "   'env': 'production',\n",
        "   'widgetConfig': {'body': {}},\n",
        "   'template': False,\n",
-       "   'createdAt': '2021-06-07T09:36:17.153Z',\n",
-       "   'updatedAt': '2021-06-07T09:36:17.154Z'}}}"
+       "   'createdAt': '2021-09-03T15:47:45.139Z',\n",
+       "   'updatedAt': '2021-09-03T15:47:45.139Z'}}}"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -954,7 +961,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -967,7 +974,7 @@
        "    'application': 'rw'}}]}"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -984,27 +991,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'data': [{'id': '60bde8962852be001ba7e42b',\n",
+       "{'data': [{'id': '613243a3d32fb1001aad821f',\n",
        "   'type': 'metadata',\n",
-       "   'attributes': {'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n",
+       "   'attributes': {'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n",
        "    'application': 'rw',\n",
-       "    'resource': {'id': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n",
+       "    'resource': {'id': '020723f0-0238-4612-b5b7-245554ae198f',\n",
        "     'type': 'dataset'},\n",
        "    'language': 'eng',\n",
        "    'name': 'this is a dummy dataset',\n",
        "    'description': 'Lorem Ipsum',\n",
-       "    'createdAt': '2021-06-07T09:36:22.304Z',\n",
-       "    'updatedAt': '2021-06-07T09:36:22.304Z',\n",
+       "    'createdAt': '2021-09-03T15:47:47.614Z',\n",
+       "    'updatedAt': '2021-09-03T15:47:47.614Z',\n",
        "    'status': 'published'}}]}"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1023,9 +1030,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'data': [{'id': '613243a45390eb001b6a592e',\n",
+       "   'type': 'metadata',\n",
+       "   'attributes': {'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n",
+       "    'application': 'rw',\n",
+       "    'resource': {'id': '38051cae-72e0-4662-94e4-a8a3dc19ca85',\n",
+       "     'type': 'layer'},\n",
+       "    'language': 'eng',\n",
+       "    'name': 'this is a dummy Layer',\n",
+       "    'description': 'Lorem Ipsum',\n",
+       "    'createdAt': '2021-09-03T15:47:48.818Z',\n",
+       "    'updatedAt': '2021-09-03T15:47:48.818Z',\n",
+       "    'status': 'published'}}]}"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "urlMetadataLayer = f'{urlDataset}/{responseDataset[\"data\"].get(\"id\")}/layer/{responseLayer[\"data\"].get(\"id\")}/metadata'\n",
     "bodyMetadataLayer = {\n",
@@ -1040,27 +1069,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'data': [{'id': '60bde89a3cc064001b3675b9',\n",
+       "{'data': [{'id': '613243a71614ef001a2400b3',\n",
        "   'type': 'metadata',\n",
-       "   'attributes': {'dataset': '6a3aa408-b3d3-44c6-89b7-93fbfa545489',\n",
+       "   'attributes': {'dataset': '020723f0-0238-4612-b5b7-245554ae198f',\n",
        "    'application': 'rw',\n",
-       "    'resource': {'id': '5f169df0-a293-4588-bbcd-521ee9484cd6',\n",
+       "    'resource': {'id': '810de298-66b1-47ea-93f9-8593f4c6b43a',\n",
        "     'type': 'widget'},\n",
        "    'language': 'eng',\n",
        "    'name': 'this is a dummy widget',\n",
        "    'description': 'Lorem Ipsum',\n",
-       "    'createdAt': '2021-06-07T09:36:26.194Z',\n",
-       "    'updatedAt': '2021-06-07T09:36:26.194Z',\n",
+       "    'createdAt': '2021-09-03T15:47:51.047Z',\n",
+       "    'updatedAt': '2021-09-03T15:47:51.047Z',\n",
        "    'status': 'published'}}]}"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1088,16 +1117,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
+   "execution_count": 42,
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['6a3aa408-b3d3-44c6-89b7-93fbfa545489']"
+       "['020723f0-0238-4612-b5b7-245554ae198f']"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1109,6 +1140,180 @@
     "datasetsProd"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Clean copy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[94mPreparing to copy from prod to staging...\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:creating metadata for layer...\n",
+      "INFO:root:creating metadata for widget...\n",
+      "INFO:root:creating metadata\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "('change', [0, 'stagingId'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n",
+      "('change', [2, 'stagingId'], ('faa18045-ea7f-4750-a7a7-a8f14ca7f100', 'cde0e510-5e89-41b3-a0b1-7188581de62c'))\n",
+      "('change', [3, 'stagingId', 0, 'id'], ('61324e6ccbd8f4001a738054', '61324c08cbd8f4001a73804c'))\n",
+      "('change', [3, 'stagingId', 0, 'attributes', 'dataset'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n",
+      "('change', [3, 'stagingId', 0, 'attributes', 'resource', 'id'], ('faa18045-ea7f-4750-a7a7-a8f14ca7f100', 'cde0e510-5e89-41b3-a0b1-7188581de62c'))\n",
+      "('change', [3, 'stagingId', 0, 'attributes', 'createdAt'], ('2021-09-03T16:33:48.361Z', '2021-09-03T16:23:36.614Z'))\n",
+      "('change', [3, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:33:48.361Z', '2021-09-03T16:23:36.614Z'))\n",
+      "('change', [4, 'stagingId'], ('126c7399-deb8-47f2-973c-0bb89dda3c1b', '39a74925-e0fe-4223-b636-112a119ad7ec'))\n",
+      "('change', [5, 'stagingId', 0, 'id'], ('61324e6dcbd8f4001a738055', '61324c0acbd8f4001a73804d'))\n",
+      "('change', [5, 'stagingId', 0, 'attributes', 'dataset'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n",
+      "('change', [5, 'stagingId', 0, 'attributes', 'resource', 'id'], ('126c7399-deb8-47f2-973c-0bb89dda3c1b', '39a74925-e0fe-4223-b636-112a119ad7ec'))\n",
+      "('change', [5, 'stagingId', 0, 'attributes', 'createdAt'], ('2021-09-03T16:33:49.830Z', '2021-09-03T16:23:38.109Z'))\n",
+      "('change', [5, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:33:49.830Z', '2021-09-03T16:23:38.109Z'))\n",
+      "('change', [6, 'stagingId', 0, 'id'], ('61324e6ecbd8f4001a738056', '61324c0acbd8f4001a73804e'))\n",
+      "('change', [6, 'stagingId', 0, 'attributes', 'dataset'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n",
+      "('change', [6, 'stagingId', 0, 'attributes', 'resource', 'id'], ('0b34d693-e7bd-48d6-b9e8-38a2f0f7c246', 'b95e88be-3a3f-40df-8648-16000da09a63'))\n",
+      "('change', [6, 'stagingId', 0, 'attributes', 'createdAt'], ('2021-09-03T16:33:50.562Z', '2021-09-03T16:23:38.838Z'))\n",
+      "('change', [6, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:33:50.562Z', '2021-09-03T16:23:38.838Z'))\n",
+      " Do you want to overwrite, merge or leave the file as it is?:                         Y/M/NN\n",
+      "\u001b[92mcopy process finished\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "copyAssets(datasetsProd, sync=False, removeAssets=False, fromEnv='prod', toEnv = 'staging')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:sync [prod]dataset: 020723f0-0238-4612-b5b7-245554ae198f\n",
+      "INFO:root:with [staging]dataset: b95e88be-3a3f-40df-8648-16000da09a63\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[94mPreparing to sync from prod to staging...\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "ERROR:root:response: \n",
+      "ERROR:root:<Response [400]>\n",
+      "ERROR:root:url: \n",
+      "ERROR:root:https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63/vocabulary/knowledge_graph\n",
+      "ERROR:root:body: \n",
+      "ERROR:root:{\"application\": \"rw\", \"tags\": [\"geospatial\"]}\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[93mPost operation was not succesfull, trying to update instead\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:sync [prod]layer: 38051cae-72e0-4662-94e4-a8a3dc19ca85\n",
+      "INFO:root:with [staging]layer: cde0e510-5e89-41b3-a0b1-7188581de62c\n",
+      "INFO:root:creating metadata for layer...\n",
+      "ERROR:root:response: \n",
+      "ERROR:root:<Response [400]>\n",
+      "ERROR:root:url: \n",
+      "ERROR:root:https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63/layer/cde0e510-5e89-41b3-a0b1-7188581de62c/metadata\n",
+      "ERROR:root:body: \n",
+      "ERROR:root:{\"application\": \"rw\", \"resource\": {\"id\": \"38051cae-72e0-4662-94e4-a8a3dc19ca85\", \"type\": \"layer\"}, \"language\": \"eng\", \"name\": \"this is a dummy Layer\", \"description\": \"Lorem Ipsum\"}\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[93mPost operation was not succesfull, trying to update instead\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:sync [prod]widget: 810de298-66b1-47ea-93f9-8593f4c6b43a\n",
+      "INFO:root:with [staging]widget: 39a74925-e0fe-4223-b636-112a119ad7ec\n",
+      "INFO:root:creating metadata for widget...\n",
+      "ERROR:root:response: \n",
+      "ERROR:root:<Response [400]>\n",
+      "ERROR:root:url: \n",
+      "ERROR:root:https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63/widget/39a74925-e0fe-4223-b636-112a119ad7ec/metadata\n",
+      "ERROR:root:body: \n",
+      "ERROR:root:{\"application\": \"rw\", \"resource\": {\"id\": \"810de298-66b1-47ea-93f9-8593f4c6b43a\", \"type\": \"widget\"}, \"language\": \"eng\", \"name\": \"this is a dummy widget\", \"description\": \"Lorem Ipsum\"}\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[93mPost operation was not succesfull, trying to update instead\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:creating metadata\n",
+      "ERROR:root:response: \n",
+      "ERROR:root:<Response [400]>\n",
+      "ERROR:root:url: \n",
+      "ERROR:root:https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63/metadata\n",
+      "ERROR:root:body: \n",
+      "ERROR:root:{\"application\": \"rw\", \"resource\": {\"id\": \"020723f0-0238-4612-b5b7-245554ae198f\", \"type\": \"dataset\"}, \"language\": \"eng\", \"name\": \"this is a dummy dataset\", \"description\": \"Lorem Ipsum\"}\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[93mPost operation was not succesfull, trying to update instead\u001b[0m\n",
+      "('change', [3, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:34:02.697Z', '2021-09-03T16:23:36.614Z'))\n",
+      "('change', [5, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:34:04.751Z', '2021-09-03T16:23:38.109Z'))\n",
+      "('change', [6, 'stagingId', 0, 'attributes', 'updatedAt'], ('2021-09-03T16:34:05.814Z', '2021-09-03T16:23:38.838Z'))\n",
+      " Do you want to overwrite, merge or leave the file as it is?:                         Y/M/NN\n",
+      "\u001b[92msync process finished\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "with open('dataset_sync_files/RW_prod_staging_match_020723f0-0238-4612-b5b7-245554ae198f.json') as json_file:\n",
+    "    syncList = json.load(json_file)\n",
+    "syncAssets(syncList, fromEnv='prod', toEnv='staging')"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -1136,11 +1341,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 83,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#deleteDataFrom()"
+    "os.remove(f\"dataset_sync_files/RW_prod_staging_match_{responseDataset['data']['id']}.json\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Are you sure you want to delete         ['020723f0-0238-4612-b5b7-245554ae198f'] in prod:         Y/nY\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:deleting https://api.resourcewatch.org/v1/dataset/020723f0-0238-4612-b5b7-245554ae198f... \n"
+     ]
+    }
+   ],
+   "source": [
+    "deleteDataFrom('prod', datasetsProd)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Are you sure you want to delete         ['b95e88be-3a3f-40df-8648-16000da09a63'] in staging:         Y/nY\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:deleting https://staging-api.resourcewatch.org/v1/dataset/b95e88be-3a3f-40df-8648-16000da09a63... \n"
+     ]
+    }
+   ],
+   "source": [
+    "deleteDataFrom('staging', ['b95e88be-3a3f-40df-8648-16000da09a63'])\n",
+    "\n"
    ]
   },
   {
@@ -1252,7 +1506,8 @@
    "source": [
     "# copy a dataset on production to staging\n",
     "for datasetId in prod_API_ID:\n",
-    "    syncFile.append(copyAssets([datasetId], False, fromEnv='prod', toEnv = 'staging'))\n",
+    "    syncFile.append(copyAssets([datasetId], sync=False, removeAssets=False, fromEnv='prod', toEnv = 'staging'))\n",
+    "#repeating the same operation?\n",
     "for syncfile in syncFile:\n",
     "    with open(syncfile) as json_file:\n",
     "        syncList = json.load(json_file)\n",