diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a239a1d..fc1d04d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -32,6 +32,9 @@ jobs:
           curl -sSL https://install.python-poetry.org | python3 - --version $POETRY_VERSION
           poetry --version
 
+      - name: Add Poetry to PATH
+        run: echo "$HOME/.local/bin" >> $GITHUB_PATH
+
       - name: Cache Poetry virtual environment
         uses: actions/cache@v3
         with:
diff --git a/README.md b/README.md
index 81098bc..9f49049 100644
--- a/README.md
+++ b/README.md
@@ -66,18 +66,12 @@ testing, linting, and more.
 This project uses [Poetry](https://python-poetry.org/) for dependency management.
 To set up your development environment:
 
-1. **Create and activate the virtual environment:**
+**Create and activate the virtual environment:**
 
    ```bash
    make .venv
    ```
 
-2. **Install project dependencies:**
-
-   ```bash
-   poetry install
-   ```
-
 ### Running Normalisation Scripts
 
 Each data source module under `src` contains scripts for data normalisation.
diff --git a/notebooks/exploration.ipynb b/notebooks/exploration.ipynb
new file mode 100644
index 0000000..7d8339c
--- /dev/null
+++ b/notebooks/exploration.ipynb
@@ -0,0 +1,221 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notebook: Exploration of csv files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"Exploration notebook for data analysis.\n",
+    "\n",
+    "This notebook contains data exploration steps for disaster analysis.\n",
+    "\"\"\"\n",
+    "\n",
+    "import hashlib\n",
+    "import sys\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from src.data_consolidation.dictionary import STANDARD_COLUMNS\n",
+    "\n",
+    "module_path = Path(\"..\").resolve()\n",
+    "sys.path.append(str(module_path))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_dat(dat_file: str) -> pd:\n",
+    "    \"\"\"Reads a CSV file from the data_prep directory.\"\"\"\n",
+    "    dat_dir = Path(\"../data_prep/\").resolve()\n",
+    "    dat_path = dat_dir / dat_file\n",
+    "    return pd.read_csv(dat_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "glide_prep_df = read_dat(\"glide_prep.csv\")\n",
+    "gdacs_prep_df = read_dat(\"gdacs_prep.csv\")\n",
+    "emdat_prep_df = read_dat(\"emdat_prep.csv\")\n",
+    "disaster_charter_df = read_dat(\"disaster_charter_prep.csv\")\n",
+    "cerf_df = read_dat(\"cerf_prep.csv\")\n",
+    "idmc_df = read_dat(\"idmc_prep.csv\")\n",
+    "ifrc_df = read_dat(\"ifrc_prep.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pre_dfs = [\n",
+    "    glide_prep_df,\n",
+    "    gdacs_prep_df,\n",
+    "    emdat_prep_df,\n",
+    "    disaster_charter_df,\n",
+    "    cerf_df,\n",
+    "    idmc_df,\n",
+    "    ifrc_df,\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_18473/1884474460.py:8: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
+      "  all_data = pd.concat(pre_dfs, ignore_index=True)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i, df in enumerate(pre_dfs):\n",
+    "    missing_cols = set(STANDARD_COLUMNS) - set(df.columns)\n",
+    "    for col in missing_cols:\n",
+    "        df[col] = None\n",
+    "    df_standard = df[STANDARD_COLUMNS]\n",
+    "    pre_dfs[i] = df_standard\n",
+    "\n",
+    "all_data = pd.concat(pre_dfs, ignore_index=True)\n",
+    "all_data[\"Date\"] = pd.to_datetime(all_data[\"Date\"], errors=\"coerce\")\n",
+    "group_key = [\"Event_Type\", \"Country\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def consolidate_group(group: pd.DataFrame) -> dict:\n",
+    "    \"\"\"Consolidates a group of data.\"\"\"\n",
+    "    consolidated_row = {}\n",
+    "    event_ids = sorted(set(group[\"Source_Event_IDs\"].dropna().astype(str).tolist()))\n",
+    "    consolidated_row[\"Event_ID\"] = event_ids\n",
+    "    unique_str = \"|\".join(event_ids)\n",
+    "    disaster_impact_id = \"DI_\" + hashlib.sha256(unique_str.encode(\"utf-8\")).hexdigest()\n",
+    "    consolidated_row[\"Disaster_Impact_ID\"] = disaster_impact_id\n",
+    "    for column in group.columns:\n",
+    "        if column in group_key or column in [\"Event_ID\", \"Disaster_Impact_ID\"]:\n",
+    "            if column == \"Disaster_Impact_ID\":\n",
+    "                continue\n",
+    "            consolidated_row[column] = sorted(\n",
+    "                set(group[column].dropna().astype(str).tolist()),\n",
+    "            )\n",
+    "        else:\n",
+    "            values = group[column].dropna().tolist()\n",
+    "            if values:\n",
+    "                if all(isinstance(val, list) for val in values):\n",
+    "                    flat_values = [item for sublist in values for item in sublist]\n",
+    "                    consolidated_row[column] = sorted(set(map(str, flat_values)))\n",
+    "                else:\n",
+    "                    consolidated_row[column] = sorted(set(map(str, values)))\n",
+    "            else:\n",
+    "                consolidated_row[column] = None\n",
+    "    return consolidated_row"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_data[\"Date_Group\"] = all_data[\"Date\"].apply(\n",
+    "    lambda x: (\n",
+    "        x - pd.Timedelta(days=7),\n",
+    "        x + pd.Timedelta(days=7),\n",
+    "    )\n",
+    "    if pd.notna(x)\n",
+    "    else (None, None),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def group_by_date_range(data: pd.DataFrame, date_col: str) -> list:\n",
+    "    \"\"\"Groups data by date range.\"\"\"\n",
+    "    rows = []\n",
+    "    used_indices = set()\n",
+    "    for idx, row in data.iterrows():\n",
+    "        if idx in used_indices or pd.isna(row[date_col]):\n",
+    "            continue\n",
+    "        matching_rows = data[\n",
+    "            (data[date_col] >= row[\"Date_Group\"][0])\n",
+    "            & (data[date_col] <= row[\"Date_Group\"][1])\n",
+    "            & (data[\"Event_Type\"] == row[\"Event_Type\"])\n",
+    "            & (data[\"Country\"] == row[\"Country\"])\n",
+    "        ]\n",
+    "        used_indices.update(matching_rows.index)\n",
+    "        rows.append(consolidate_group(matching_rows))\n",
+    "    return rows\n",
+    "\n",
+    "\n",
+    "unified_rows = group_by_date_range(all_data, \"Date\")\n",
+    "unified_df = pd.DataFrame(unified_rows)\n",
+    "unified_df = unified_df[STANDARD_COLUMNS]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Matrix is empty (all zeros); no Circos plot to display.\n"
+     ]
+    }
+   ],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/poetry.lock b/poetry.lock
index 142c6cd..bbe54ed 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -72,6 +72,21 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi
 tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
 tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
 
+[[package]]
+name = "autopep8"
+version = "2.3.2"
+description = "A tool that automatically formats Python code to conform to the PEP 8 style guide"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "autopep8-2.3.2-py2.py3-none-any.whl", hash = "sha256:ce8ad498672c845a0c3de2629c15b635ec2b05ef8177a6e7c91c74f3e9b51128"},
+    {file = "autopep8-2.3.2.tar.gz", hash = "sha256:89440a4f969197b69a995e4ce0661b031f455a9f776d2c5ba3dbd83466931758"},
+]
+
+[package.dependencies]
+pycodestyle = ">=2.12.0"
+tomli = {version = "*", markers = "python_version < \"3.11\""}
+
 [[package]]
 name = "azure-core"
 version = "1.32.0"
@@ -1948,6 +1963,26 @@ files = [
 msal = ">=1.29,<2"
 portalocker = ">=1.4,<3"
 
+[[package]]
+name = "nbqa"
+version = "1.9.1"
+description = "Run any standard Python code quality tool on a Jupyter Notebook"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "nbqa-1.9.1-py3-none-any.whl", hash = "sha256:95552d2f6c2c038136252a805aa78d85018aef922586270c3a074332737282e5"},
+    {file = "nbqa-1.9.1.tar.gz", hash = "sha256:a1f4bcf587c597302fed295951001fc4e1be4ce0e77e1ab1b25ac2fbe3db0cdd"},
+]
+
+[package.dependencies]
+autopep8 = ">=1.5"
+ipython = ">=7.8.0"
+tokenize-rt = ">=3.2.0"
+tomli = "*"
+
+[package.extras]
+toolchain = ["black", "blacken-docs", "flake8", "isort", "jupytext", "mypy", "pylint", "pyupgrade", "ruff"]
+
 [[package]]
 name = "nest-asyncio"
 version = "1.6.0"
@@ -3994,6 +4029,17 @@ files = [
     {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"},
 ]
 
+[[package]]
+name = "tokenize-rt"
+version = "6.1.0"
+description = "A wrapper around the stdlib `tokenize` which roundtrips."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "tokenize_rt-6.1.0-py2.py3-none-any.whl", hash = "sha256:d706141cdec4aa5f358945abe36b911b8cbdc844545da99e811250c0cee9b6fc"},
+    {file = "tokenize_rt-6.1.0.tar.gz", hash = "sha256:e8ee836616c0877ab7c7b54776d2fefcc3bde714449a206762425ae114b53c86"},
+]
+
 [[package]]
 name = "tomli"
 version = "2.0.2"
@@ -4373,4 +4419,4 @@ files = [
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "f9a97deb62b63308062f23af2146692b8c76ba4f5e1abdc2dc03292e6aa07c41"
+content-hash = "a4df48d04e23f1ebe6f2e8f0517f4945124c081a90a696503731b5f9b28f60a7"
diff --git a/pyproject.toml b/pyproject.toml
index d3ed39b..16a8d7c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,7 @@ pycirclize = "^1.8.0"
 circlify = "^0.15.0"
 azure-storage-blob = "^12.24.1"
 azure-identity = "^1.19.0"
+nbqa = "^1.9.1"
 
 [tool.poetry.group.dev.dependencies]
 pre-commit = "^3.8.0"