diff --git a/data/montreal/data_cleaning/README.md b/data/montreal/data_cleaning/README.md
new file mode 100644
index 00000000..80a9bc65
--- /dev/null
+++ b/data/montreal/data_cleaning/README.md
@@ -0,0 +1,14 @@
+City of Vancouver has ~9,500 employees.
+
+The financial statement on page 21 states that it reports on the following consolidated entities:
+
+1. Hastings Institute Inc.
+2. Vancouver Civic Development Corporation
+3. Harbour Park Development Ltd.
+4. City of Vancouver Public Housing Corporation
+5. Parking Corporation of Vancouver (EasyPark)
+6. Pacific National Exhibition (PNE)
+7. Vancouver Economic Commission
+8. Vancouver Affordable Housing Agency
+
+It seems to be missing a lot of other entities or organizations which I am sure is covered (e.g. Vancouver Police Department? Vancouver Fire Department?)
diff --git a/data/montreal/data_cleaning/montreal_financial_statement_data_2024.xlsx b/data/montreal/data_cleaning/montreal_financial_statement_data_2024.xlsx
new file mode 100644
index 00000000..e5b8d1f4
Binary files /dev/null and b/data/montreal/data_cleaning/montreal_financial_statement_data_2024.xlsx differ
diff --git a/data/montreal/data_cleaning/raw_data/Annual financial report 2024 - English version.pdf b/data/montreal/data_cleaning/raw_data/Annual financial report 2024 - English version.pdf
new file mode 100644
index 00000000..c8abee3e
Binary files /dev/null and b/data/montreal/data_cleaning/raw_data/Annual financial report 2024 - English version.pdf differ
diff --git a/data/montreal/data_cleaning/sankey_builder.ipynb b/data/montreal/data_cleaning/sankey_builder.ipynb
new file mode 100644
index 00000000..78248300
--- /dev/null
+++ b/data/montreal/data_cleaning/sankey_builder.ipynb
@@ -0,0 +1,407 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Sankey Builder\n",
+ "\n",
+ "The goal is to get Montreal's financial data from the raw pdf files into the same structure used in Canada Spend's sankey structure (see here)\n",
+ "\n",
+ "The excel file was built by scraping the PDF data of the financial statements.\n",
+ "\n",
+ "This script is used to build out the sankey json structure."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import json\n",
+ "from pathlib import Path\n",
+ "\n",
+ "# ========= Config =========\n",
+ "excel_path = Path(\"./montreal_financial_statement_data_2024.xlsx\") # <-- change if needed\n",
+ "sheet_name = \"cleaned_data\"\n",
+ "value_col = \"value\"\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Hierarchy (left -> right). This is the flipped order you asked for:\n",
+ "# Category first, then the specific source, then capital/op, then transfer.\n",
+ "# You can tweak this list if you want a different grouping precedence.\n",
+ "column_order = [\n",
+ " \"sankey_2\",\n",
+ " \"sankey_3\", # (spelled as in the sheet)\n",
+ " \"sankey_4\",\n",
+ " \"sankey_5\"\n",
+ "]\n",
+ "\n",
+ "# ========= Load =========\n",
+ "df = pd.read_excel(excel_path, sheet_name=sheet_name)\n",
+ "df[value_col] = pd.to_numeric(df[value_col], errors=\"coerce\").fillna(0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sankey_4 | \n",
+ " sankey_3 | \n",
+ " sankey_2 | \n",
+ " sankey_1 | \n",
+ " value | \n",
+ " source | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Taxes | \n",
+ " revenue | \n",
+ " 4174298 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Payments in lieu of taxes | \n",
+ " revenue | \n",
+ " 287923 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Quota shares | \n",
+ " revenue | \n",
+ " 544237 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Service rendered | \n",
+ " revenue | \n",
+ " 2155564 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Fee Collection | \n",
+ " revenue | \n",
+ " 416493 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sankey_4 sankey_3 sankey_2 sankey_1 value source\n",
+ "0 NaN NaN Taxes revenue 4174298 NaN\n",
+ "1 NaN NaN Payments in lieu of taxes revenue 287923 NaN\n",
+ "2 NaN NaN Quota shares revenue 544237 NaN\n",
+ "3 NaN NaN Service rendered revenue 2155564 NaN\n",
+ "4 NaN NaN Fee Collection revenue 416493 NaN"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Data Cleaning\n",
+ "\n",
+ "We need to clean the data a little bit. The values are reported in thousands, where Ontario's was in millions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sankey_4 | \n",
+ " sankey_3 | \n",
+ " sankey_2 | \n",
+ " sankey_1 | \n",
+ " value | \n",
+ " source | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Taxes | \n",
+ " revenue | \n",
+ " 4.174298 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Payments in lieu of taxes | \n",
+ " revenue | \n",
+ " 0.287923 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Quota shares | \n",
+ " revenue | \n",
+ " 0.544237 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Service rendered | \n",
+ " revenue | \n",
+ " 2.155564 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Fee Collection | \n",
+ " revenue | \n",
+ " 0.416493 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sankey_4 sankey_3 sankey_2 sankey_1 value source\n",
+ "0 NaN NaN Taxes revenue 4.174298 NaN\n",
+ "1 NaN NaN Payments in lieu of taxes revenue 0.287923 NaN\n",
+ "2 NaN NaN Quota shares revenue 0.544237 NaN\n",
+ "3 NaN NaN Service rendered revenue 2.155564 NaN\n",
+ "4 NaN NaN Fee Collection revenue 0.416493 NaN"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['value'] = df['value'] / 1000000\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Split revenue vs spending\n",
+ "rev_df = df[df[\"sankey_1\"] == \"revenue\"].copy()\n",
+ "exp_df = df[df[\"sankey_1\"] == \"spending\"].copy()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def build_tree(sub_df: pd.DataFrame, cols, value_col=\"value\", root_name=\"Root\", round_to=3):\n",
+ " \"\"\"\n",
+ " Build a nested dict {\"name\": root_name, \"children\":[...]} for Sankey.\n",
+ " - Each row contributes its value to a path formed by non-null labels in `cols`.\n",
+ " - Internal nodes get \"children\"; leaves get {\"name\": ..., \"amount\": ...}.\n",
+ " \"\"\"\n",
+ " def make_node():\n",
+ " return {\"__children\": {}, \"__amount\": 0.0}\n",
+ "\n",
+ " root = make_node()\n",
+ "\n",
+ " for _, row in sub_df.iterrows():\n",
+ " amt = float(row[value_col])\n",
+ " if not amt:\n",
+ " continue\n",
+ "\n",
+ " # Path from chosen columns, skipping nulls\n",
+ " path = []\n",
+ " for c in cols:\n",
+ " val = row.get(c)\n",
+ " if pd.notna(val):\n",
+ " path.append(str(val))\n",
+ "\n",
+ " # Accumulate down the trie\n",
+ " node = root\n",
+ " node[\"__amount\"] += amt\n",
+ " for label in path:\n",
+ " if label not in node[\"__children\"]:\n",
+ " node[\"__children\"][label] = make_node()\n",
+ " node = node[\"__children\"][label]\n",
+ " node[\"__amount\"] += amt\n",
+ "\n",
+ " # Collapse trie -> Sankey schema\n",
+ " def collapse(node, name):\n",
+ " if node[\"__children\"]:\n",
+ " return {\n",
+ " \"name\": name,\n",
+ " \"children\": [\n",
+ " collapse(child_node, child_name)\n",
+ " for child_name, child_node in node[\"__children\"].items()\n",
+ " ],\n",
+ " }\n",
+ " else:\n",
+ " return {\"name\": name, \"amount\": round(node[\"__amount\"], round_to)}\n",
+ "\n",
+ " return {\n",
+ " \"name\": root_name,\n",
+ " \"children\": [\n",
+ " collapse(child_node, child_name)\n",
+ " for child_name, child_node in root[\"__children\"].items()\n",
+ " ],\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Build both sides\n",
+ "revenue_data = build_tree(rev_df, column_order, value_col=value_col, root_name=\"Revenue\")\n",
+ "spending_data = build_tree(exp_df, column_order, value_col=value_col, root_name=\"Spending\")\n",
+ "total_spend = round(float(exp_df[value_col].sum()), 3)\n",
+ "total_revenue = round(float(rev_df[value_col].sum()), 3)\n",
+ "\n",
+ "# Compose output\n",
+ "out = {\n",
+ " \"total\": total_revenue - total_spend,\n",
+ " \"spending\": total_spend,\n",
+ " \"revenue\": total_revenue,\n",
+ " \"spending_data\": spending_data,\n",
+ " \"revenue_data\": revenue_data,\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Wrote: ..\\sankey.json\n",
+ "Totals: {'revenue': 9.676, 'spending': 8.714, 'total': 0.9619999999999997}\n"
+ ]
+ }
+ ],
+ "source": [
+ "# ========= Save & Preview =========\n",
+ "out_path = Path(\"../sankey.json\") # rename if you like\n",
+ "with open(out_path, \"w\", encoding=\"utf-8\") as f:\n",
+ " json.dump(out, f, ensure_ascii=False, indent=2)\n",
+ "\n",
+ "print(\"Wrote:\", out_path)\n",
+ "print(\"Totals:\", {\"revenue\": out[\"revenue\"], \"spending\": out[\"spending\"], \"total\": out[\"total\"]})\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.14.0"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/data/montreal/sankey.json b/data/montreal/sankey.json
new file mode 100644
index 00000000..ca69a132
--- /dev/null
+++ b/data/montreal/sankey.json
@@ -0,0 +1,236 @@
+{
+ "total": 0.9619999999999997,
+ "spending": 8.714,
+ "revenue": 9.676,
+ "spending_data": {
+ "name": "Spending",
+ "children": [
+ {
+ "name": "General Administration",
+ "children": [
+ {
+ "name": "Municipal administration",
+ "amount": 0.82
+ },
+ {
+ "name": "Eliminations",
+ "amount": -0.009
+ }
+ ]
+ },
+ {
+ "name": "Public Security",
+ "children": [
+ {
+ "name": "Municipal administration",
+ "amount": 1.365
+ },
+ {
+ "name": "Eliminations",
+ "amount": -0.001
+ }
+ ]
+ },
+ {
+ "name": "Transportation",
+ "children": [
+ {
+ "name": "Municipal administration",
+ "amount": 1.69
+ },
+ {
+ "name": "Controlled Organizations",
+ "amount": 2.085
+ },
+ {
+ "name": "Eliminations",
+ "amount": -0.162
+ }
+ ]
+ },
+ {
+ "name": "Environmental Hygiene",
+ "children": [
+ {
+ "name": "Municipal administration",
+ "amount": 0.686
+ },
+ {
+ "name": "Eliminations",
+ "amount": -0.001
+ }
+ ]
+ },
+ {
+ "name": "Health and Welfare",
+ "children": [
+ {
+ "name": "Municipal administration",
+ "amount": 0.276
+ },
+ {
+ "name": "Controlled Organizations",
+ "amount": 0.078
+ },
+ {
+ "name": "Eliminations",
+ "amount": -0.021
+ }
+ ]
+ },
+ {
+ "name": "Urban Planning and Development",
+ "children": [
+ {
+ "name": "Municipal administration",
+ "amount": 0.33
+ },
+ {
+ "name": "Controlled Organizations",
+ "amount": 0.003
+ },
+ {
+ "name": "Eliminations",
+ "amount": -0.003
+ }
+ ]
+ },
+ {
+ "name": "Recreation and Culture",
+ "children": [
+ {
+ "name": "Municipal administration",
+ "amount": 0.963
+ },
+ {
+ "name": "Controlled Organizations",
+ "amount": 0.084
+ },
+ {
+ "name": "Eliminations",
+ "amount": -0.04
+ }
+ ]
+ },
+ {
+ "name": "Financing Expenses",
+ "children": [
+ {
+ "name": "Municipal administration",
+ "amount": 0.431
+ },
+ {
+ "name": "Controlled Organizations",
+ "amount": 0.196
+ },
+ {
+ "name": "Eliminations",
+ "amount": -0.056
+ }
+ ]
+ }
+ ]
+ },
+ "revenue_data": {
+ "name": "Revenue",
+ "children": [
+ {
+ "name": "Taxes",
+ "amount": 4.174
+ },
+ {
+ "name": "Payments in lieu of taxes",
+ "amount": 0.288
+ },
+ {
+ "name": "Quota shares",
+ "amount": 0.544
+ },
+ {
+ "name": "Service rendered",
+ "amount": 2.156
+ },
+ {
+ "name": "Fee Collection",
+ "amount": 0.416
+ },
+ {
+ "name": "Fines and penalties",
+ "amount": 0.214
+ },
+ {
+ "name": "Other Revenues",
+ "amount": 0.159
+ },
+ {
+ "name": "Transfers",
+ "children": [
+ {
+ "name": "Transfers for Operating Activities",
+ "children": [
+ {
+ "name": "Government of Canada",
+ "amount": 0.032
+ },
+ {
+ "name": "Government of Quebec",
+ "amount": 0.742
+ },
+ {
+ "name": "Communaute metropolitaine de Montreal",
+ "amount": 0.068
+ }
+ ]
+ },
+ {
+ "name": "Transfers for Investing Activities",
+ "children": [
+ {
+ "name": "Government of Canada",
+ "amount": 0.109
+ },
+ {
+ "name": "Government of Quebec",
+ "amount": 0.557
+ },
+ {
+ "name": "Communaute metropolitaine de Montreal",
+ "amount": 0.001
+ },
+ {
+ "name": "Other",
+ "amount": 0.004
+ }
+ ]
+ }
+ ]
+ },
+ {
+ "name": "Portfolio Investment Income",
+ "children": [
+ {
+ "name": "Restricted to the Sinking Fund",
+ "amount": 0.092
+ },
+ {
+ "name": "Unrestricted",
+ "amount": 0.027
+ }
+ ]
+ },
+ {
+ "name": "Other Interest Income",
+ "children": [
+ {
+ "name": "Tax arrears",
+ "amount": 0.033
+ },
+ {
+ "name": "Interest on cash and other interest",
+ "amount": 0.059
+ }
+ ]
+ }
+ ]
+ }
+}
diff --git a/data/montreal/summary.json b/data/montreal/summary.json
new file mode 100644
index 00000000..ea02bf67
--- /dev/null
+++ b/data/montreal/summary.json
@@ -0,0 +1,76 @@
+{
+ "name": "Montreal",
+ "financialYear": "2024",
+ "source": "https://mtl.ged.montreal.ca/constellio/?collection=mtlca&portal=REPDOCVDM#!displayDocument/00000120414",
+ "totalProvincialSpending": 0,
+ "totalProvincialSpendingFormatted": "",
+ "totalEmployees": 28000,
+ "netDebt": 0,
+ "totalDebt": null,
+ "debtInterest": null,
+ "population": 1762949,
+ "budgetBalance": 0,
+ "budgetBalanceFormatted": "",
+ "perCapitaSpending": 6064.8,
+ "propertyTaxPerCapita": 2367.79,
+ "propertyTaxRevenue": 4.174,
+ "propertyTaxRevenueFormatted": "$4.2B",
+ "ministries": [
+ {
+ "name": "General Administration",
+ "slug": "general-administration",
+ "totalSpending": 0.810408,
+ "totalSpendingFormatted": "$810M",
+ "percentage": 7.58,
+ "percentageFormatted": "7.58%"
+ },
+ {
+ "name": "Public Security",
+ "slug": "public-security",
+ "totalSpending": 1.364228,
+ "totalSpendingFormatted": "$1.36B",
+ "percentage": 12.76,
+ "percentageFormatted": "12.76%"
+ },
+ {
+ "name": "Transportation",
+ "slug": "transportation",
+ "totalSpending": 3.612593,
+ "totalSpendingFormatted": "$3.61B",
+ "percentage": 33.79,
+ "percentageFormatted": "33.79%"
+ },
+ {
+ "name": "Environmental Hygiene",
+ "slug": "environmental-hygiene",
+ "totalSpending": 0.684792,
+ "totalSpendingFormatted": "$685M",
+ "percentage": 6.4,
+ "percentageFormatted": "6.40%"
+ },
+ {
+ "name": "Health and Welfare",
+ "slug": "health-and-welfare",
+ "totalSpending": 0.33347,
+ "totalSpendingFormatted": "$333M",
+ "percentage": 3.12,
+ "percentageFormatted": "3.12%"
+ },
+ {
+ "name": "Urban Planning and Development",
+ "slug": "urban-planning-and-development",
+ "totalSpending": 0.330257,
+ "totalSpendingFormatted": "$330M",
+ "percentage": 3.09,
+ "percentageFormatted": "3.09%"
+ },
+ {
+ "name": "Recreation and Culture",
+ "slug": "recreation-and-culture",
+ "totalSpending": 1.006407,
+ "totalSpendingFormatted": "$1.01B",
+ "percentage": 9.41,
+ "percentageFormatted": "9.41%"
+ }
+ ]
+}
diff --git a/data/vancouver/Vancouver_Financial_Data_Summary.pdf b/data/vancouver/Vancouver_Financial_Data_Summary.pdf
new file mode 100644
index 00000000..c3d339d2
Binary files /dev/null and b/data/vancouver/Vancouver_Financial_Data_Summary.pdf differ
diff --git a/data/vancouver/summary.json b/data/vancouver/summary.json
index 087e9d64..d66cd978 100644
--- a/data/vancouver/summary.json
+++ b/data/vancouver/summary.json
@@ -1,7 +1,7 @@
{
"name": "Vancouver",
"financialYear": "2024",
- "source": "https://vancouver.ca/your-government/financial-reports-and-information.aspx",
+ "source": "https://vancouver.ca/files/cov/2024-annual-financial-report.pdf",
"totalProvincialSpending": 3.269551,
"totalProvincialSpendingFormatted": "$3.3B",
"totalEmployees": 9500,
diff --git a/src/components/MainLayout/index.tsx b/src/components/MainLayout/index.tsx
index 6e90b156..2260f213 100644
--- a/src/components/MainLayout/index.tsx
+++ b/src/components/MainLayout/index.tsx
@@ -20,6 +20,7 @@ const provinces = [
];
const municipalities = [
+ { slug: "montreal", name: "Montreal" },
{ slug: "toronto", name: "Toronto" },
{ slug: "vancouver", name: "Vancouver" },
];