diff --git a/data/montreal/data_cleaning/README.md b/data/montreal/data_cleaning/README.md new file mode 100644 index 00000000..80a9bc65 --- /dev/null +++ b/data/montreal/data_cleaning/README.md @@ -0,0 +1,14 @@ +City of Vancouver has ~9,500 employees. + +The financial statement on page 21 states that it reports on the following consolidated entities: + +1. Hastings Institute Inc. +2. Vancouver Civic Development Corporation +3. Harbour Park Development Ltd. +4. City of Vancouver Public Housing Corporation +5. Parking Corporation of Vancouver (EasyPark) +6. Pacific National Exhibition (PNE) +7. Vancouver Economic Commission +8. Vancouver Affordable Housing Agency + +It seems to be missing a lot of other entities or organizations which I am sure is covered (e.g. Vancouver Police Department? Vancouver Fire Department?) diff --git a/data/montreal/data_cleaning/montreal_financial_statement_data_2024.xlsx b/data/montreal/data_cleaning/montreal_financial_statement_data_2024.xlsx new file mode 100644 index 00000000..e5b8d1f4 Binary files /dev/null and b/data/montreal/data_cleaning/montreal_financial_statement_data_2024.xlsx differ diff --git a/data/montreal/data_cleaning/raw_data/Annual financial report 2024 - English version.pdf b/data/montreal/data_cleaning/raw_data/Annual financial report 2024 - English version.pdf new file mode 100644 index 00000000..c8abee3e Binary files /dev/null and b/data/montreal/data_cleaning/raw_data/Annual financial report 2024 - English version.pdf differ diff --git a/data/montreal/data_cleaning/sankey_builder.ipynb b/data/montreal/data_cleaning/sankey_builder.ipynb new file mode 100644 index 00000000..78248300 --- /dev/null +++ b/data/montreal/data_cleaning/sankey_builder.ipynb @@ -0,0 +1,407 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sankey Builder\n", + "\n", + "The goal is to get Montreal's financial data from the raw pdf files into the same structure used in Canada Spend's sankey structure (see here)\n", + "\n", + "The excel file was built by scraping the PDF data of the financial statements.\n", + "\n", + "This script is used to build out the sankey json structure." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import json\n", + "from pathlib import Path\n", + "\n", + "# ========= Config =========\n", + "excel_path = Path(\"./montreal_financial_statement_data_2024.xlsx\") # <-- change if needed\n", + "sheet_name = \"cleaned_data\"\n", + "value_col = \"value\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Hierarchy (left -> right). This is the flipped order you asked for:\n", + "# Category first, then the specific source, then capital/op, then transfer.\n", + "# You can tweak this list if you want a different grouping precedence.\n", + "column_order = [\n", + " \"sankey_2\",\n", + " \"sankey_3\", # (spelled as in the sheet)\n", + " \"sankey_4\",\n", + " \"sankey_5\"\n", + "]\n", + "\n", + "# ========= Load =========\n", + "df = pd.read_excel(excel_path, sheet_name=sheet_name)\n", + "df[value_col] = pd.to_numeric(df[value_col], errors=\"coerce\").fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sankey_4sankey_3sankey_2sankey_1valuesource
0NaNNaNTaxesrevenue4174298NaN
1NaNNaNPayments in lieu of taxesrevenue287923NaN
2NaNNaNQuota sharesrevenue544237NaN
3NaNNaNService renderedrevenue2155564NaN
4NaNNaNFee Collectionrevenue416493NaN
\n", + "
" + ], + "text/plain": [ + " sankey_4 sankey_3 sankey_2 sankey_1 value source\n", + "0 NaN NaN Taxes revenue 4174298 NaN\n", + "1 NaN NaN Payments in lieu of taxes revenue 287923 NaN\n", + "2 NaN NaN Quota shares revenue 544237 NaN\n", + "3 NaN NaN Service rendered revenue 2155564 NaN\n", + "4 NaN NaN Fee Collection revenue 416493 NaN" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Cleaning\n", + "\n", + "We need to clean the data a little bit. The values are reported in thousands, where Ontario's was in millions." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sankey_4sankey_3sankey_2sankey_1valuesource
0NaNNaNTaxesrevenue4.174298NaN
1NaNNaNPayments in lieu of taxesrevenue0.287923NaN
2NaNNaNQuota sharesrevenue0.544237NaN
3NaNNaNService renderedrevenue2.155564NaN
4NaNNaNFee Collectionrevenue0.416493NaN
\n", + "
" + ], + "text/plain": [ + " sankey_4 sankey_3 sankey_2 sankey_1 value source\n", + "0 NaN NaN Taxes revenue 4.174298 NaN\n", + "1 NaN NaN Payments in lieu of taxes revenue 0.287923 NaN\n", + "2 NaN NaN Quota shares revenue 0.544237 NaN\n", + "3 NaN NaN Service rendered revenue 2.155564 NaN\n", + "4 NaN NaN Fee Collection revenue 0.416493 NaN" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['value'] = df['value'] / 1000000\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Split revenue vs spending\n", + "rev_df = df[df[\"sankey_1\"] == \"revenue\"].copy()\n", + "exp_df = df[df[\"sankey_1\"] == \"spending\"].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def build_tree(sub_df: pd.DataFrame, cols, value_col=\"value\", root_name=\"Root\", round_to=3):\n", + " \"\"\"\n", + " Build a nested dict {\"name\": root_name, \"children\":[...]} for Sankey.\n", + " - Each row contributes its value to a path formed by non-null labels in `cols`.\n", + " - Internal nodes get \"children\"; leaves get {\"name\": ..., \"amount\": ...}.\n", + " \"\"\"\n", + " def make_node():\n", + " return {\"__children\": {}, \"__amount\": 0.0}\n", + "\n", + " root = make_node()\n", + "\n", + " for _, row in sub_df.iterrows():\n", + " amt = float(row[value_col])\n", + " if not amt:\n", + " continue\n", + "\n", + " # Path from chosen columns, skipping nulls\n", + " path = []\n", + " for c in cols:\n", + " val = row.get(c)\n", + " if pd.notna(val):\n", + " path.append(str(val))\n", + "\n", + " # Accumulate down the trie\n", + " node = root\n", + " node[\"__amount\"] += amt\n", + " for label in path:\n", + " if label not in node[\"__children\"]:\n", + " node[\"__children\"][label] = make_node()\n", + " node = node[\"__children\"][label]\n", + " node[\"__amount\"] += amt\n", + "\n", + " # Collapse trie -> Sankey schema\n", + " def collapse(node, name):\n", + " if node[\"__children\"]:\n", + " return {\n", + " \"name\": name,\n", + " \"children\": [\n", + " collapse(child_node, child_name)\n", + " for child_name, child_node in node[\"__children\"].items()\n", + " ],\n", + " }\n", + " else:\n", + " return {\"name\": name, \"amount\": round(node[\"__amount\"], round_to)}\n", + "\n", + " return {\n", + " \"name\": root_name,\n", + " \"children\": [\n", + " collapse(child_node, child_name)\n", + " for child_name, child_node in root[\"__children\"].items()\n", + " ],\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Build both sides\n", + "revenue_data = build_tree(rev_df, column_order, value_col=value_col, root_name=\"Revenue\")\n", + "spending_data = build_tree(exp_df, column_order, value_col=value_col, root_name=\"Spending\")\n", + "total_spend = round(float(exp_df[value_col].sum()), 3)\n", + "total_revenue = round(float(rev_df[value_col].sum()), 3)\n", + "\n", + "# Compose output\n", + "out = {\n", + " \"total\": total_revenue - total_spend,\n", + " \"spending\": total_spend,\n", + " \"revenue\": total_revenue,\n", + " \"spending_data\": spending_data,\n", + " \"revenue_data\": revenue_data,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Wrote: ..\\sankey.json\n", + "Totals: {'revenue': 9.676, 'spending': 8.714, 'total': 0.9619999999999997}\n" + ] + } + ], + "source": [ + "# ========= Save & Preview =========\n", + "out_path = Path(\"../sankey.json\") # rename if you like\n", + "with open(out_path, \"w\", encoding=\"utf-8\") as f:\n", + " json.dump(out, f, ensure_ascii=False, indent=2)\n", + "\n", + "print(\"Wrote:\", out_path)\n", + "print(\"Totals:\", {\"revenue\": out[\"revenue\"], \"spending\": out[\"spending\"], \"total\": out[\"total\"]})\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.14.0" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/data/montreal/sankey.json b/data/montreal/sankey.json new file mode 100644 index 00000000..ca69a132 --- /dev/null +++ b/data/montreal/sankey.json @@ -0,0 +1,236 @@ +{ + "total": 0.9619999999999997, + "spending": 8.714, + "revenue": 9.676, + "spending_data": { + "name": "Spending", + "children": [ + { + "name": "General Administration", + "children": [ + { + "name": "Municipal administration", + "amount": 0.82 + }, + { + "name": "Eliminations", + "amount": -0.009 + } + ] + }, + { + "name": "Public Security", + "children": [ + { + "name": "Municipal administration", + "amount": 1.365 + }, + { + "name": "Eliminations", + "amount": -0.001 + } + ] + }, + { + "name": "Transportation", + "children": [ + { + "name": "Municipal administration", + "amount": 1.69 + }, + { + "name": "Controlled Organizations", + "amount": 2.085 + }, + { + "name": "Eliminations", + "amount": -0.162 + } + ] + }, + { + "name": "Environmental Hygiene", + "children": [ + { + "name": "Municipal administration", + "amount": 0.686 + }, + { + "name": "Eliminations", + "amount": -0.001 + } + ] + }, + { + "name": "Health and Welfare", + "children": [ + { + "name": "Municipal administration", + "amount": 0.276 + }, + { + "name": "Controlled Organizations", + "amount": 0.078 + }, + { + "name": "Eliminations", + "amount": -0.021 + } + ] + }, + { + "name": "Urban Planning and Development", + "children": [ + { + "name": "Municipal administration", + "amount": 0.33 + }, + { + "name": "Controlled Organizations", + "amount": 0.003 + }, + { + "name": "Eliminations", + "amount": -0.003 + } + ] + }, + { + "name": "Recreation and Culture", + "children": [ + { + "name": "Municipal administration", + "amount": 0.963 + }, + { + "name": "Controlled Organizations", + "amount": 0.084 + }, + { + "name": "Eliminations", + "amount": -0.04 + } + ] + }, + { + "name": "Financing Expenses", + "children": [ + { + "name": "Municipal administration", + "amount": 0.431 + }, + { + "name": "Controlled Organizations", + "amount": 0.196 + }, + { + "name": "Eliminations", + "amount": -0.056 + } + ] + } + ] + }, + "revenue_data": { + "name": "Revenue", + "children": [ + { + "name": "Taxes", + "amount": 4.174 + }, + { + "name": "Payments in lieu of taxes", + "amount": 0.288 + }, + { + "name": "Quota shares", + "amount": 0.544 + }, + { + "name": "Service rendered", + "amount": 2.156 + }, + { + "name": "Fee Collection", + "amount": 0.416 + }, + { + "name": "Fines and penalties", + "amount": 0.214 + }, + { + "name": "Other Revenues", + "amount": 0.159 + }, + { + "name": "Transfers", + "children": [ + { + "name": "Transfers for Operating Activities", + "children": [ + { + "name": "Government of Canada", + "amount": 0.032 + }, + { + "name": "Government of Quebec", + "amount": 0.742 + }, + { + "name": "Communaute metropolitaine de Montreal", + "amount": 0.068 + } + ] + }, + { + "name": "Transfers for Investing Activities", + "children": [ + { + "name": "Government of Canada", + "amount": 0.109 + }, + { + "name": "Government of Quebec", + "amount": 0.557 + }, + { + "name": "Communaute metropolitaine de Montreal", + "amount": 0.001 + }, + { + "name": "Other", + "amount": 0.004 + } + ] + } + ] + }, + { + "name": "Portfolio Investment Income", + "children": [ + { + "name": "Restricted to the Sinking Fund", + "amount": 0.092 + }, + { + "name": "Unrestricted", + "amount": 0.027 + } + ] + }, + { + "name": "Other Interest Income", + "children": [ + { + "name": "Tax arrears", + "amount": 0.033 + }, + { + "name": "Interest on cash and other interest", + "amount": 0.059 + } + ] + } + ] + } +} diff --git a/data/montreal/summary.json b/data/montreal/summary.json new file mode 100644 index 00000000..ea02bf67 --- /dev/null +++ b/data/montreal/summary.json @@ -0,0 +1,76 @@ +{ + "name": "Montreal", + "financialYear": "2024", + "source": "https://mtl.ged.montreal.ca/constellio/?collection=mtlca&portal=REPDOCVDM#!displayDocument/00000120414", + "totalProvincialSpending": 0, + "totalProvincialSpendingFormatted": "", + "totalEmployees": 28000, + "netDebt": 0, + "totalDebt": null, + "debtInterest": null, + "population": 1762949, + "budgetBalance": 0, + "budgetBalanceFormatted": "", + "perCapitaSpending": 6064.8, + "propertyTaxPerCapita": 2367.79, + "propertyTaxRevenue": 4.174, + "propertyTaxRevenueFormatted": "$4.2B", + "ministries": [ + { + "name": "General Administration", + "slug": "general-administration", + "totalSpending": 0.810408, + "totalSpendingFormatted": "$810M", + "percentage": 7.58, + "percentageFormatted": "7.58%" + }, + { + "name": "Public Security", + "slug": "public-security", + "totalSpending": 1.364228, + "totalSpendingFormatted": "$1.36B", + "percentage": 12.76, + "percentageFormatted": "12.76%" + }, + { + "name": "Transportation", + "slug": "transportation", + "totalSpending": 3.612593, + "totalSpendingFormatted": "$3.61B", + "percentage": 33.79, + "percentageFormatted": "33.79%" + }, + { + "name": "Environmental Hygiene", + "slug": "environmental-hygiene", + "totalSpending": 0.684792, + "totalSpendingFormatted": "$685M", + "percentage": 6.4, + "percentageFormatted": "6.40%" + }, + { + "name": "Health and Welfare", + "slug": "health-and-welfare", + "totalSpending": 0.33347, + "totalSpendingFormatted": "$333M", + "percentage": 3.12, + "percentageFormatted": "3.12%" + }, + { + "name": "Urban Planning and Development", + "slug": "urban-planning-and-development", + "totalSpending": 0.330257, + "totalSpendingFormatted": "$330M", + "percentage": 3.09, + "percentageFormatted": "3.09%" + }, + { + "name": "Recreation and Culture", + "slug": "recreation-and-culture", + "totalSpending": 1.006407, + "totalSpendingFormatted": "$1.01B", + "percentage": 9.41, + "percentageFormatted": "9.41%" + } + ] +} diff --git a/data/vancouver/Vancouver_Financial_Data_Summary.pdf b/data/vancouver/Vancouver_Financial_Data_Summary.pdf new file mode 100644 index 00000000..c3d339d2 Binary files /dev/null and b/data/vancouver/Vancouver_Financial_Data_Summary.pdf differ diff --git a/data/vancouver/summary.json b/data/vancouver/summary.json index 087e9d64..d66cd978 100644 --- a/data/vancouver/summary.json +++ b/data/vancouver/summary.json @@ -1,7 +1,7 @@ { "name": "Vancouver", "financialYear": "2024", - "source": "https://vancouver.ca/your-government/financial-reports-and-information.aspx", + "source": "https://vancouver.ca/files/cov/2024-annual-financial-report.pdf", "totalProvincialSpending": 3.269551, "totalProvincialSpendingFormatted": "$3.3B", "totalEmployees": 9500, diff --git a/src/components/MainLayout/index.tsx b/src/components/MainLayout/index.tsx index 6e90b156..2260f213 100644 --- a/src/components/MainLayout/index.tsx +++ b/src/components/MainLayout/index.tsx @@ -20,6 +20,7 @@ const provinces = [ ]; const municipalities = [ + { slug: "montreal", name: "Montreal" }, { slug: "toronto", name: "Toronto" }, { slug: "vancouver", name: "Vancouver" }, ];