Skip to content

Commit 06884de

Browse files
shuangwu5mplatzer
andauthored
feat: add a helper for initializing logging
Co-authored-by: Michael Platzer <michael.platzer@gmail.com>
1 parent faa8c9e commit 06884de

File tree

4 files changed

+66
-32
lines changed

4 files changed

+66
-32
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ import pandas as pd
2727
import webbrowser
2828
from mostlyai import qa
2929

30+
# initialize logging to stdout
31+
qa.init_logging()
32+
3033
# fetch original + synthetic data
3134
base_url = "https://github.com/mostly-ai/mostlyai-qa/raw/refs/heads/main/examples/quick-start"
3235
syn = pd.read_csv(f"{base_url}/census2k-syn_mostly.csv.gz")
@@ -53,6 +56,9 @@ webbrowser.open(f"file://{report_path.absolute()}")
5356
```python
5457
from mostlyai import qa
5558

59+
# initialize logging to stdout
60+
qa.init_logging()
61+
5662
# analyze single-table data
5763
report_path, metrics = qa.report(
5864
syn_tgt_data = synthetic_df,

examples/benchmark.ipynb

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,16 @@
1414
},
1515
{
1616
"cell_type": "code",
17-
"execution_count": 1,
17+
"execution_count": null,
1818
"id": "initial_id",
1919
"metadata": {},
20+
"outputs": [],
2021
"source": [
2122
"import pandas as pd\n",
2223
"from mostlyai import qa\n",
2324
"\n",
25+
"qa.init_logging() # initialize logging to stdout\n",
26+
"\n",
2427
"path = \"https://github.com/mostly-ai/paper-fidelity-accuracy/raw/refs/heads/main/data/\"\n",
2528
"datasets = [\"adult\", \"bank-marketing\", \"credit-default\", \"online-shoppers\"]\n",
2629
"synthesizers = [\n",
@@ -67,8 +70,7 @@
6770
"\n",
6871
" df = pd.DataFrame(rows)\n",
6972
" df.to_csv(\"benchmark-examples.csv\", index=False)"
70-
],
71-
"outputs": []
73+
]
7274
},
7375
{
7476
"cell_type": "markdown",
@@ -82,9 +84,10 @@
8284
},
8385
{
8486
"cell_type": "code",
85-
"execution_count": 2,
87+
"execution_count": null,
8688
"id": "98420371-8893-4877-b7f0-c083c923fdd9",
8789
"metadata": {},
90+
"outputs": [],
8891
"source": [
8992
"import pandas as pd\n",
9093
"\n",
@@ -97,18 +100,17 @@
97100
")\n",
98101
"df[\"dcr_ratio\"] = df[\"distances_dcr_training\"] / df[\"distances_dcr_holdout\"]\n",
99102
"df"
100-
],
101-
"outputs": []
103+
]
102104
},
103105
{
104106
"cell_type": "code",
105-
"execution_count": 3,
107+
"execution_count": null,
106108
"id": "258a91c1-9895-437e-b8b9-71b98711d332",
107109
"metadata": {},
110+
"outputs": [],
108111
"source": [
109112
"import matplotlib.pyplot as plt\n",
110113
"\n",
111-
"\n",
112114
"def plot_dataset(df, dataset):\n",
113115
" # Define the color mapping for each synthesizer\n",
114116
" color_mapping = {\n",
@@ -160,58 +162,49 @@
160162
"\n",
161163
" plt.tight_layout()\n",
162164
" # plt.savefig('fig_adult.png')"
163-
],
164-
"outputs": []
165+
]
165166
},
166167
{
167168
"cell_type": "code",
168-
"execution_count": 4,
169+
"execution_count": null,
169170
"id": "8ee02e02-2370-480f-a76e-1e7c044a725e",
170171
"metadata": {},
172+
"outputs": [],
171173
"source": [
172174
"plot_dataset(df.loc[(df.dataset == \"adult\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Adult\")"
173-
],
174-
"outputs": []
175+
]
175176
},
176177
{
177178
"cell_type": "code",
178-
"execution_count": 5,
179+
"execution_count": null,
179180
"id": "f1fc39df-6e95-43f0-bc76-45d2a4a4f852",
180181
"metadata": {},
182+
"outputs": [],
181183
"source": [
182184
"plot_dataset(df.loc[(df.dataset == \"bank-marketing\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Bank Marketing\")"
183-
],
184-
"outputs": []
185+
]
185186
},
186187
{
187188
"cell_type": "code",
188-
"execution_count": 6,
189+
"execution_count": null,
189190
"id": "2ab32edf-61f0-4550-a942-0e679df8efe1",
190191
"metadata": {},
192+
"outputs": [],
191193
"source": [
192194
"plot_dataset(df.loc[(df.dataset == \"credit-default\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Credit Default\")"
193-
],
194-
"outputs": []
195+
]
195196
},
196197
{
197198
"cell_type": "code",
198-
"execution_count": 7,
199+
"execution_count": null,
199200
"id": "37778260-1535-481b-abf4-3bf493a49127",
200201
"metadata": {},
202+
"outputs": [],
201203
"source": [
202204
"plot_dataset(\n",
203205
" df.loc[(df.dataset == \"online-shoppers\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Online Shoppers\"\n",
204206
")"
205-
],
206-
"outputs": []
207-
},
208-
{
209-
"cell_type": "code",
210-
"execution_count": null,
211-
"id": "9c274698-c59d-4a8e-9471-aa607abe8278",
212-
"metadata": {},
213-
"source": [],
214-
"outputs": []
207+
]
215208
}
216209
],
217210
"metadata": {
@@ -230,7 +223,7 @@
230223
"name": "python",
231224
"nbconvert_exporter": "python",
232225
"pygments_lexer": "ipython3",
233-
"version": "3.11.7"
226+
"version": "3.12.3"
234227
}
235228
},
236229
"nbformat": 4,

mostlyai/qa/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@
1717
import pandas as pd
1818
from packaging.version import Version
1919

20+
from mostlyai.qa.logging import init_logging
2021
from mostlyai.qa.reporting import report
2122
from mostlyai.qa.reporting_from_statistics import report_from_statistics
2223

23-
__all__ = ["report", "report_from_statistics"]
24+
__all__ = ["report", "report_from_statistics", "init_logging"]
2425
__version__ = "1.5.0"
2526

2627
os.environ["TOKENIZERS_PARALLELISM"] = "false"

mostlyai/qa/logging.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright 2025 MOSTLY AI
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import sys
16+
import logging
17+
18+
_LOG = logging.getLogger(__name__.rsplit(".", 1)[0]) # get the logger with the root module name (mostlyai.qa)
19+
20+
21+
def init_logging() -> None:
22+
"""
23+
Initialize the logging configuration to stdout.
24+
"""
25+
26+
# log to stdout
27+
handler = logging.StreamHandler(stream=sys.stdout)
28+
handler.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)-7s: %(message)s"))
29+
handler.setLevel(logging.INFO)
30+
31+
if not _LOG.hasHandlers():
32+
_LOG.addHandler(handler)
33+
_LOG.setLevel(logging.INFO)
34+
_LOG.propagate = False

0 commit comments

Comments
 (0)