|
14 | 14 | }, |
15 | 15 | { |
16 | 16 | "cell_type": "code", |
17 | | - "execution_count": 1, |
| 17 | + "execution_count": null, |
18 | 18 | "id": "initial_id", |
19 | 19 | "metadata": {}, |
| 20 | + "outputs": [], |
20 | 21 | "source": [ |
21 | 22 | "import pandas as pd\n", |
22 | 23 | "from mostlyai import qa\n", |
23 | 24 | "\n", |
| 25 | + "qa.init_logging() # initialize logging to stdout\n", |
| 26 | + "\n", |
24 | 27 | "path = \"https://github.com/mostly-ai/paper-fidelity-accuracy/raw/refs/heads/main/data/\"\n", |
25 | 28 | "datasets = [\"adult\", \"bank-marketing\", \"credit-default\", \"online-shoppers\"]\n", |
26 | 29 | "synthesizers = [\n", |
|
67 | 70 | "\n", |
68 | 71 | " df = pd.DataFrame(rows)\n", |
69 | 72 | " df.to_csv(\"benchmark-examples.csv\", index=False)" |
70 | | - ], |
71 | | - "outputs": [] |
| 73 | + ] |
72 | 74 | }, |
73 | 75 | { |
74 | 76 | "cell_type": "markdown", |
|
82 | 84 | }, |
83 | 85 | { |
84 | 86 | "cell_type": "code", |
85 | | - "execution_count": 2, |
| 87 | + "execution_count": null, |
86 | 88 | "id": "98420371-8893-4877-b7f0-c083c923fdd9", |
87 | 89 | "metadata": {}, |
| 90 | + "outputs": [], |
88 | 91 | "source": [ |
89 | 92 | "import pandas as pd\n", |
90 | 93 | "\n", |
|
97 | 100 | ")\n", |
98 | 101 | "df[\"dcr_ratio\"] = df[\"distances_dcr_training\"] / df[\"distances_dcr_holdout\"]\n", |
99 | 102 | "df" |
100 | | - ], |
101 | | - "outputs": [] |
| 103 | + ] |
102 | 104 | }, |
103 | 105 | { |
104 | 106 | "cell_type": "code", |
105 | | - "execution_count": 3, |
| 107 | + "execution_count": null, |
106 | 108 | "id": "258a91c1-9895-437e-b8b9-71b98711d332", |
107 | 109 | "metadata": {}, |
| 110 | + "outputs": [], |
108 | 111 | "source": [ |
109 | 112 | "import matplotlib.pyplot as plt\n", |
110 | 113 | "\n", |
111 | | - "\n", |
112 | 114 | "def plot_dataset(df, dataset):\n", |
113 | 115 | " # Define the color mapping for each synthesizer\n", |
114 | 116 | " color_mapping = {\n", |
|
160 | 162 | "\n", |
161 | 163 | " plt.tight_layout()\n", |
162 | 164 | " # plt.savefig('fig_adult.png')" |
163 | | - ], |
164 | | - "outputs": [] |
| 165 | + ] |
165 | 166 | }, |
166 | 167 | { |
167 | 168 | "cell_type": "code", |
168 | | - "execution_count": 4, |
| 169 | + "execution_count": null, |
169 | 170 | "id": "8ee02e02-2370-480f-a76e-1e7c044a725e", |
170 | 171 | "metadata": {}, |
| 172 | + "outputs": [], |
171 | 173 | "source": [ |
172 | 174 | "plot_dataset(df.loc[(df.dataset == \"adult\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Adult\")" |
173 | | - ], |
174 | | - "outputs": [] |
| 175 | + ] |
175 | 176 | }, |
176 | 177 | { |
177 | 178 | "cell_type": "code", |
178 | | - "execution_count": 5, |
| 179 | + "execution_count": null, |
179 | 180 | "id": "f1fc39df-6e95-43f0-bc76-45d2a4a4f852", |
180 | 181 | "metadata": {}, |
| 182 | + "outputs": [], |
181 | 183 | "source": [ |
182 | 184 | "plot_dataset(df.loc[(df.dataset == \"bank-marketing\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Bank Marketing\")" |
183 | | - ], |
184 | | - "outputs": [] |
| 185 | + ] |
185 | 186 | }, |
186 | 187 | { |
187 | 188 | "cell_type": "code", |
188 | | - "execution_count": 6, |
| 189 | + "execution_count": null, |
189 | 190 | "id": "2ab32edf-61f0-4550-a942-0e679df8efe1", |
190 | 191 | "metadata": {}, |
| 192 | + "outputs": [], |
191 | 193 | "source": [ |
192 | 194 | "plot_dataset(df.loc[(df.dataset == \"credit-default\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Credit Default\")" |
193 | | - ], |
194 | | - "outputs": [] |
| 195 | + ] |
195 | 196 | }, |
196 | 197 | { |
197 | 198 | "cell_type": "code", |
198 | | - "execution_count": 7, |
| 199 | + "execution_count": null, |
199 | 200 | "id": "37778260-1535-481b-abf4-3bf493a49127", |
200 | 201 | "metadata": {}, |
| 202 | + "outputs": [], |
201 | 203 | "source": [ |
202 | 204 | "plot_dataset(\n", |
203 | 205 | " df.loc[(df.dataset == \"online-shoppers\") & ~df.synthesizer.isin([\"ctgan\", \"mostly_e1\"])], \"Online Shoppers\"\n", |
204 | 206 | ")" |
205 | | - ], |
206 | | - "outputs": [] |
207 | | - }, |
208 | | - { |
209 | | - "cell_type": "code", |
210 | | - "execution_count": null, |
211 | | - "id": "9c274698-c59d-4a8e-9471-aa607abe8278", |
212 | | - "metadata": {}, |
213 | | - "source": [], |
214 | | - "outputs": [] |
| 207 | + ] |
215 | 208 | } |
216 | 209 | ], |
217 | 210 | "metadata": { |
|
230 | 223 | "name": "python", |
231 | 224 | "nbconvert_exporter": "python", |
232 | 225 | "pygments_lexer": "ipython3", |
233 | | - "version": "3.11.7" |
| 226 | + "version": "3.12.3" |
234 | 227 | } |
235 | 228 | }, |
236 | 229 | "nbformat": 4, |
|
0 commit comments