google · charlesdtdb · Aug 14, 2025
diff --git a/matched_markets/notebook/design_colab_for_tbrmm.ipynb b/matched_markets/notebook/design_colab_for_tbrmm.ipynb
@@ -23,7 +23,7 @@
       },
       "outputs": [],
       "source": [
-        "#@title Load the libraries needed for the design  \n",
+        "#@title Load the libraries needed for the design\n",
         "\n",
         "BAZEL_VERSION = '3.0.0'\n",
         "!wget https://github.com/bazelbuild/bazel/releases/download/{BAZEL_VERSION}/bazel-{BAZEL_VERSION}-installer-linux-x86_64.sh\n",
@@ -71,17 +71,32 @@
         "InteractiveShell.ast_node_interactivity = \"all\""
       ]
     },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Charlie's notes\n",
+        "It’s always best to have actual spend by geo, but that’s not always feasible. A workaround is to estimate spend by taking your overall ROI and dividing each geo’s revenue by the average ROI. If you’re running campaigns across geos, you should normally see relatively homogeneous ROI, since budget allocation models tend to distribute spend across geos to maximize return.\n",
+        "\n",
+        "Achilles’ heel of greedy matching:\n",
+        "It optimizes for the pre-period you feed it, so the treatment–control relationship looks stable in that window.\n",
+        "\n",
+        "\n",
+        "geo_eligibility_table should look like [\"geo\", \"control\", \"treatment\", \"exclude\"]: but its not working........"
+      ],
+      "metadata": {
+        "id": "kYGxPGP-roZ8"
+      }
+    },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "cellView": "form",
         "id": "_PXNPQe2uPEl"
       },
       "outputs": [],
       "source": [
         "#@markdown ---\n",
-        "#@markdown ### Enter the trix url for the sheet file containing the Client Sales Data: \n",
+        "#@markdown ### Enter the trix url for the sheet file containing the Client Sales Data:\n",
         "#@markdown The spreadsheet should contain the mandatory columns:\n",
         "#@markdown * date: date in the format YYYY-MM-DD\n",
         "#@markdown * geo: the number which identifies the geo\n",
@@ -92,10 +107,10 @@
         "#@markdown Other columns can be present in the spreadsheet.\n",
         "\n",
         "#@markdown Spreadsheet URL containing the geo level response and spend data\n",
-        "client_sales_table = \"add your url here, which should look like https://docs.google.com/spreadsheets/d/???/edit#gid=???\" #@param {type:\"string\"}\n",
+        "client_sales_table = \"\" #@param {type:\"string\"}\n",
         "\n",
         "#@markdown Leave the following field empty if you don't want to add constraint to the geo_eligibility\n",
-        "geo_eligibility_table = \"add your url here, which should look like https://docs.google.com/spreadsheets/d/???/edit#gid=???\" #@param {type:\"string\"}\n",
+        "geo_eligibility_table = \"\" #@param {type:\"string\"}\n",
         "auth.authenticate_user()\n",
         "creds, _ = google_auth.default()\n",
         "gc = gspread.authorize(creds)\n",
@@ -111,22 +126,57 @@
         "num_geos = geo_level_time_series[\"geo\"].nunique()\n",
         "\n",
         "if not geo_eligibility_table:\n",
-        "  geo_eligibility = None\n",
+        "    geo_eligibility = None\n",
         "else:\n",
-        "  wks = gc.open_by_url(geo_eligibility_table).sheet1\n",
-        "  data = wks.get_all_values()\n",
-        "  headers = data.pop(0)\n",
-        "  geo_eligibility = pd.DataFrame(data, columns=headers)\n",
-        "  for colname in [\"geo\", \"control\", \"treatment\", \"exclude\"]:\n",
-        "    geo_eligibility[colname] = pd.to_numeric(geo_eligibility[colname])\n",
-        "  # set missing geos in geo_eligibility as eligible for any assignment\n",
-        "  geo_eligibility = utils.default_geo_assignment(geo_level_time_series,\n",
-        "                                                 geo_eligibility)\n",
-        "  geo_eligibility = geoeligibility.GeoEligibility(geo_eligibility)\n",
-        "  geo_eligibility.data.index = pd.to_numeric(geo_eligibility.data.index,\n",
-        "                                             downcast=\"integer\").astype(str)\n"
+        "    wks = gc.open_by_url(geo_eligibility_table).sheet1\n",
+        "    data = wks.get_all_values()\n",
+        "    headers = data.pop(0)\n",
+        "    geo_eligibility = pd.DataFrame(data, columns=headers)\n",
+        "    for colname in [\"geo\", \"control\", \"treatment\", \"exclude\"]:\n",
+        "        geo_eligibility[colname] = pd.to_numeric(geo_eligibility[colname])\n",
+        "\n",
+        "    ## build defaults for ALL geos (eligible for either if not in the sheet)\n",
+        "    all_geos = (geo_level_time_series[[\"geo\"]]\n",
+        "                .drop_duplicates()\n",
+        "                .assign(control=1, treatment=1, exclude=0))\n",
+        "\n",
+        "    geo_eligibility = all_geos.merge(\n",
+        "        geo_eligibility[[\"geo\",\"control\",\"treatment\",\"exclude\"]],\n",
+        "        on=\"geo\", how=\"left\", suffixes=(\"\", \"_set\")\n",
+        "    )\n",
+        "    for col in [\"control\", \"treatment\", \"exclude\"]:\n",
+        "        geo_eligibility[col] = (\n",
+        "            geo_eligibility[f\"{col}_set\"]\n",
+        "            .fillna(geo_eligibility[col])  # keep defaults (1/1/0) if blank\n",
+        "            .fillna(0)\n",
+        "            .astype(int)\n",
+        "    )\n",
+        "    geo_eligibility.drop(columns=[c for c in geo_eligibility.columns if c.endswith(\"_set\")],\n",
+        "                        inplace=True)\n",
+        "\n",
+        "    geo_eligibility = geoeligibility.GeoEligibility(geo_eligibility)\n",
+        "    geo_eligibility.data.index = (\n",
+        "        pd.to_numeric(geo_eligibility.data.index, downcast=\"integer\").astype(str)\n",
+        ")"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "**minimum_detectable_iROAS:** thesamllest roi you d consider a win. that is a business decision in the end.\n",
+        "\n",
+        "\n",
+        "**What “experiment_budget” actually means**\n",
+        "It’s the incremental spend you plan to add in the TREATMENT geos during the test window, above their baseline. That’s the intensity that generates measurable lift in TBR. It is not your company’s net change in total spend.\n",
+        "\n",
+        "If you reallocate (cut somewhere, add somewhere else), the tool still wants the positive uplift going into treatment.\n",
+        "\n",
+        "Don’t subtract the reduction in control; just enter the extra you’re putting into treatment."
+      ],
+      "metadata": {
+        "id": "fzeLUoXi2ifp"
+      }
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -136,13 +186,13 @@
       },
       "outputs": [],
       "source": [
-        "#@title Select the parameters for the design of the experiment \n",
+        "#@title Select the parameters for the design of the experiment\n",
         "\n",
         "## The minimum detectable iROAS is defined as the value of the true iROAS such\n",
         "## that, given a confidence_level (input) % confidence level for a one-sided\n",
         "## test, gives a power_level (input) % power if the true iROAS is equal to the\n",
         "## minimum detectable iROAS.\n",
-        "minimum_detectable_iROAS =  3#@param{type: \"number\"}\n",
+        "minimum_detectable_iROAS =  6#@param{type: \"number\"}\n",
         "#@markdown Use an average order value of 1 if the design is based on\n",
         "#@markdown sales/revenue or an actual average order value (e.g. $80) for a\n",
         "#@markdown design based on transactions/footfall/contracts.\n",
@@ -153,7 +203,7 @@
         "experiment_duration_in_weeks = 4 #@param {type:\"integer\"}\n",
         "\n",
         "#@markdown List the maximum budget for the experiment e.g. 300000\n",
-        "experiment_budget =  300000#@param{type: \"number\"}\n",
+        "experiment_budget =  6000#@param{type: \"number\"}\n",
         "#@markdown List any alternative budget which you would like to test separated\n",
         "#@markdown by a comma, e.g. 125000, 150000\n",
         "alternative_budget = \"\" #@param{type: \"string\"}\n",
@@ -209,7 +259,7 @@
         "## Maximum number of geos to include in the search\n",
         "n_geos_max = num_geos\n",
         "## Maximum number of pretest timepoints to include in the time series for the\n",
-        "## purpose of estimating minimum detectable response \n",
+        "## purpose of estimating minimum detectable response\n",
         "n_pretest_max = n_pretest\n",
         "## Number of design to store during the exhaustive search\n",
         "n_designs = 3\n",
@@ -255,20 +305,26 @@
         "                                              parameters=tbr_parameters)"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "source": [],
+      "metadata": {
+        "id": "E9QB-l1-4DZb"
+      }
+    },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "cellView": "form",
         "id": "WZ9EopaibKRM"
       },
       "outputs": [],
       "source": [
-        "#@title Summary of the possible designs  \n",
+        "#@title Summary of the possible designs\n",
         "\n",
         "max_feasible_number_of_designs = 5 * 10 ** 6\n",
         "\n",
-        "if MMclass.count_max_designs() \u003c max_feasible_number_of_designs:\n",
+        "if MMclass.count_max_designs() < max_feasible_number_of_designs:\n",
         "  matched_designs = MMclass.exhaustive_search()\n",
         "else:\n",
         "  matched_designs = MMclass.greedy_search()\n",
@@ -310,7 +366,7 @@
         "first_day = geo_level_time_series[\"date\"].max() - pd.Timedelta(\n",
         "    str(experiment_duration_in_weeks) + \"W\")\n",
         "most_recent_geo_level_time_series = geo_level_time_series[\n",
-        "    geo_level_time_series['date'] \u003e first_day]\n",
+        "    geo_level_time_series['date'] > first_day]\n",
         "\n",
         "total_response = most_recent_geo_level_time_series[\"response\"].sum()\n",
         "total_spend = most_recent_geo_level_time_series[\"cost\"].sum()\n",
@@ -357,7 +413,7 @@
         "    \"\"\"\n",
         "    if float(row[\"Minimum detectable iROAS\"]) == minimum_detectable_iROAS:\n",
         "          return pd.Series('background-color: lightgreen', row.index)\n",
-        "    elif float(row[\"Minimum detectable iROAS\"]) \u003e minimum_detectable_iROAS:\n",
+        "    elif float(row[\"Minimum detectable iROAS\"]) > minimum_detectable_iROAS:\n",
         "          return pd.Series('background-color: orange', row.index)\n",
         "    else:\n",
         "          return pd.Series('background-color: beige', row.index)\n",
@@ -367,15 +423,15 @@
         "    Color a cell in red if its value is larger than the value\n",
         "    in input\n",
         "    \"\"\"\n",
-        "    color = 'red' if float(val.strip(' %')) \u003e value else 'black'\n",
+        "    color = 'red' if float(val.strip(' %')) > value else 'black'\n",
         "    return 'color: %s' % color\n",
         "\n",
         "def flag_warning_revenue(val, value):\n",
         "    \"\"\"\n",
         "    Color a cell in red if its value is smaller than the value\n",
         "    in input\n",
         "    \"\"\"\n",
-        "    color = 'red' if float(val.strip(' %')) \u003c value else 'black'\n",
+        "    color = 'red' if float(val.strip(' %')) < value else 'black'\n",
         "    return 'color: %s' % color\n",
         "\n",
         "\n",
@@ -399,8 +455,7 @@
         "        subset=[\"Revenue covered by treatment group\"]).apply(\n",
         "            is_optimal_design, axis=1)\n",
         "\n",
-        "designs_table\n",
-        ""
+        "designs_table\n"
       ]
     },
     {
@@ -412,7 +467,7 @@
       },
       "outputs": [],
       "source": [
-        "#@title Select the design to be used in the experiment  \n",
+        "#@title Select the design to be used in the experiment\n",
         "#@markdown Select the design using the number as displayed in the table in\n",
         "#@markdown the cell called \"Summary of the possible designs\".\n",
         "\n",
@@ -579,8 +634,7 @@
         "    ).configure_title(\n",
         "        fontSize=title_font_size\n",
         "    ).display()\n",
-        "\n",
-        ""
+        "\n"
       ]
     },
     {
@@ -592,7 +646,7 @@
       },
       "outputs": [],
       "source": [
-        "#@title Summary and Results  \n",
+        "#@title Summary and Results\n",
         "\n",
         "\n",
         "print(\"Data in input:\\n\")\n",
@@ -619,7 +673,7 @@
         "\n",
         "print(f\"The design has Power {100 * power_level:.3}+% with Type-I error \" +\n",
         "      f\"{100 *(1 - confidence_level):.3}% for testing H0: iROAS=0 vs \" +\n",
-        "      f\"H1: iROAS \u003e= {final_design['Minimum detectable iROAS'].values[0]}\")"
+        "      f\"H1: iROAS >= {final_design['Minimum detectable iROAS'].values[0]}\")"
       ]
     },
     {
@@ -631,7 +685,7 @@
       },
       "outputs": [],
       "source": [
-        "#@title Report stores for treatment and control separately and write to trix \n",
+        "#@title Report stores for treatment and control separately and write to trix\n",
         "\n",
         "#@markdown ###Insert the name google sheets in which we will save the data.\n",
         "#@markdown The trix contains 4 worksheets, named:\n",
@@ -699,30 +753,14 @@
       },
       "outputs": [],
       "source": [
-        "\n",
-        ""
+        "\n"
       ]
     }
   ],
   "metadata": {
     "colab": {
-      "collapsed_sections": [],
-      "last_runtime": {
-        "build_target": "//research/colab/notebook:notebook_backend_py3",
-        "kind": "private"
-      },
-      "name": "Design Colab For TBR using Matched Markets.ipynb",
       "private_outputs": true,
-      "provenance": [
-        {
-          "file_id": "1FZKmwYwJq6Pha6jva_ZlXbzkYR-FsUsZ",
-          "timestamp": 1599811516392
-        },
-        {
-          "file_id": "12HvShKlSGzigZh_J71uxV8_l_l8JacCW",
-          "timestamp": 1595575793650
-        }
-      ]
+      "provenance": []
     },
     "kernelspec": {
       "display_name": "Python 3",
@@ -731,4 +769,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
+}