feat: add a helper for initializing logging

shuangwu5 · mplatzer · web-flow · commit 06884ded06bb · 2025-01-27T16:20:09.000+01:00
Co-authored-by: Michael Platzer &lt;michael.platzer@gmail.com&gt;
diff --git a/README.md b/README.md
@@ -27,6 +27,9 @@ import pandas as pd
 import webbrowser
 from mostlyai import qa
 
+# initialize logging to stdout
+qa.init_logging()
+
 # fetch original + synthetic data
 base_url = "https://github.com/mostly-ai/mostlyai-qa/raw/refs/heads/main/examples/quick-start"
 syn = pd.read_csv(f"{base_url}/census2k-syn_mostly.csv.gz")
@@ -53,6 +56,9 @@ webbrowser.open(f"file://{report_path.absolute()}")
 ```python
 from mostlyai import qa
 
+# initialize logging to stdout
+qa.init_logging()
+
 # analyze single-table data
 report_path, metrics = qa.report(
     syn_tgt_data = synthetic_df,
diff --git a/examples/benchmark.ipynb b/examples/benchmark.ipynb
@@ -14,13 +14,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "initial_id",
    "metadata": {},
+   "outputs": [],
    "source": [
     "import pandas as pd\n",
     "from mostlyai import qa\n",
     "\n",
+    "qa.init_logging()  # initialize logging to stdout\n",
+    "\n",
     "path = \"https://github.com/mostly-ai/paper-fidelity-accuracy/raw/refs/heads/main/data/\"\n",
     "datasets = [\"adult\", \"bank-marketing\", \"credit-default\", \"online-shoppers\"]\n",
     "synthesizers = [\n",
@@ -67,8 +70,7 @@
     "\n",
     "    df = pd.DataFrame(rows)\n",
     "    df.to_csv(\"benchmark-examples.csv\", index=False)"
-   ],
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -82,9 +84,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "98420371-8893-4877-b7f0-c083c923fdd9",
    "metadata": {},
+   "outputs": [],
    "source": [
     "import pandas as pd\n",
     "\n",
@@ -97,18 +100,17 @@
     ")\n",
     "df[\"dcr_ratio\"] = df[\"distances_dcr_training\"] / df[\"distances_dcr_holdout\"]\n",
     "df"
-   ],
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "258a91c1-9895-437e-b8b9-71b98711d332",
    "metadata": {},
+   "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
     "\n",
-    "\n",
     "def plot_dataset(df, dataset):\n",
     "    # Define the color mapping for each synthesizer\n",
     "    color_mapping = {\n",
@@ -160,58 +162,49 @@
     "\n",
     "    plt.tight_layout()\n",
     "    # plt.savefig('fig_adult.png')"
-   ],
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "8ee02e02-2370-480f-a76e-1e7c044a725e",
    "metadata": {},
+   "outputs": [],
    "source": [
     "plot_dataset(df.loc[(df.dataset == \"adult\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Adult\")"
-   ],
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "f1fc39df-6e95-43f0-bc76-45d2a4a4f852",
    "metadata": {},
+   "outputs": [],
    "source": [
     "plot_dataset(df.loc[(df.dataset == \"bank-marketing\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Bank Marketing\")"
-   ],
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "2ab32edf-61f0-4550-a942-0e679df8efe1",
    "metadata": {},
+   "outputs": [],
    "source": [
     "plot_dataset(df.loc[(df.dataset == \"credit-default\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Credit Default\")"
-   ],
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "37778260-1535-481b-abf4-3bf493a49127",
    "metadata": {},
+   "outputs": [],
    "source": [
     "plot_dataset(\n",
     "    df.loc[(df.dataset == \"online-shoppers\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Online Shoppers\"\n",
     ")"
-   ],
-   "outputs": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9c274698-c59d-4a8e-9471-aa607abe8278",
-   "metadata": {},
-   "source": [],
-   "outputs": []
+   ]
   }
  ],
  "metadata": {
@@ -230,7 +223,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.7"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/mostlyai/qa/__init__.py b/mostlyai/qa/__init__.py
@@ -17,10 +17,11 @@
 import pandas as pd
 from packaging.version import Version
 
+from mostlyai.qa.logging import init_logging
 from mostlyai.qa.reporting import report
 from mostlyai.qa.reporting_from_statistics import report_from_statistics
 
-__all__ = ["report", "report_from_statistics"]
+__all__ = ["report", "report_from_statistics", "init_logging"]
 __version__ = "1.5.0"
 
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
diff --git a/mostlyai/qa/logging.py b/mostlyai/qa/logging.py
@@ -0,0 +1,34 @@
+# Copyright 2025 MOSTLY AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import logging
+
+_LOG = logging.getLogger(__name__.rsplit(".", 1)[0])  # get the logger with the root module name (mostlyai.qa)
+
+
+def init_logging() -> None:
+    """
+    Initialize the logging configuration to stdout.
+    """
+
+    # log to stdout
+    handler = logging.StreamHandler(stream=sys.stdout)
+    handler.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)-7s: %(message)s"))
+    handler.setLevel(logging.INFO)
+
+    if not _LOG.hasHandlers():
+        _LOG.addHandler(handler)
+        _LOG.setLevel(logging.INFO)
+        _LOG.propagate = False