mapaction · ediakatos · Mar 13, 2025 · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -41,7 +41,10 @@ jobs:
             ${{ runner.os }}-poetry-${{ matrix.python-version }}-
 
       - name: Install dependencies
-        run: make .venv
+        run: |
+          poetry install --no-root
+          echo "VIRTUAL_ENV=$(poetry env info --path)" >> $GITHUB_ENV
+          echo "$VIRTUAL_ENV/bin" >> $GITHUB_PATH
 
       - name: Lint code
         run: make lint

diff --git a/README.md b/README.md
@@ -66,18 +66,12 @@ testing, linting, and more.
 This project uses [Poetry](https://python-poetry.org/) for dependency management.
 To set up your development environment:
 
-1. **Create and activate the virtual environment:**
+**Create and activate the virtual environment:**
 
    ```bash
    make .venv
    ```
 
-2. **Install project dependencies:**
-
-   ```bash
-   poetry install
-   ```
-
 ### Running Normalisation Scripts
 
 Each data source module under `src` contains scripts for data normalisation.

diff --git a/notebooks/exploration.ipynb b/notebooks/exploration.ipynb
@@ -0,0 +1,97 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notebook: Exploration of csv files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"Exploration notebook for data analysis.\n",
+    "\n",
+    "This notebook contains data exploration steps for disaster analysis.\n",
+    "\"\"\"\n",
+    "\n",
+    "import sys\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "module_path = Path(\"..\").resolve()\n",
+    "sys.path.append(str(module_path))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_dat(dat_file: str) -> pd:\n",
+    "    \"\"\"Reads a CSV file from the data_prep directory.\"\"\"\n",
+    "    dat_dir = Path(\"../data_prep/\").resolve()\n",
+    "    dat_path = dat_dir / dat_file\n",
+    "    return pd.read_csv(dat_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "glide_prep_df = read_dat(\"glide_prep.csv\")\n",
+    "gdacs_prep_df = read_dat(\"gdacs_prep.csv\")\n",
+    "emdat_prep_df = read_dat(\"emdat_prep.csv\")\n",
+    "disaster_charter_df = read_dat(\"disaster_charter_prep.csv\")\n",
+    "cerf_df = read_dat(\"cerf_prep.csv\")\n",
+    "idmc_df = read_dat(\"idmc_prep.csv\")\n",
+    "ifrc_df = read_dat(\"ifrc_prep.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pre_dfs = [\n",
+    "    glide_prep_df,\n",
+    "    gdacs_prep_df,\n",
+    "    emdat_prep_df,\n",
+    "    disaster_charter_df,\n",
+    "    cerf_df,\n",
+    "    idmc_df,\n",
+    "    ifrc_df,\n",
+    "]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,7 @@ pymarkdownlnt = "^0.9.23"
 pytest = "^8.3.3"
 ruff = "^0.6.8"
 ipykernel = "^6.29.5"
+nbqa = "^1.9.1"
 
 [build-system]
 requires = ["poetry-core"]