Skip to content

Commit d18276b

Browse files
authored
refactor: prefix internal methods and modules with underscore
1 parent 4949f64 commit d18276b

17 files changed

+82
-89
lines changed

mostlyai/qa/accuracy.py renamed to mostlyai/qa/_accuracy.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import scipy.stats
2727
from joblib import Parallel, delayed, parallel_config, cpu_count
2828

29-
from mostlyai.qa.common import (
29+
from mostlyai.qa._common import (
3030
CHARTS_COLORS,
3131
CHARTS_FONTS,
3232
EMPTY_BIN,
@@ -46,7 +46,7 @@
4646
)
4747
from plotly import graph_objs as go
4848

49-
from mostlyai.qa.filesystem import TemporaryWorkspace, Statistics
49+
from mostlyai.qa._filesystem import TemporaryWorkspace, Statistics
5050

5151
_LOG = logging.getLogger(__name__)
5252

mostlyai/qa/common.py renamed to mostlyai/qa/_common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import pandas as pd
2020
from rich.progress import Progress
2121

22-
from mostlyai.qa.filesystem import Statistics
22+
from mostlyai.qa._filesystem import Statistics
2323

2424
_LOG = logging.getLogger(__name__)
2525

mostlyai/qa/distances.py renamed to mostlyai/qa/_distances.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@
1717
import numpy as np
1818
from joblib import cpu_count
1919

20-
from mostlyai.qa.common import (
20+
from mostlyai.qa._common import (
2121
CHARTS_COLORS,
2222
CHARTS_FONTS,
2323
)
24-
from mostlyai.qa.filesystem import TemporaryWorkspace
24+
from mostlyai.qa._filesystem import TemporaryWorkspace
2525
from plotly import graph_objs as go
2626
from sklearn.neighbors import NearestNeighbors
2727

File renamed without changes.

mostlyai/qa/html_report.py renamed to mostlyai/qa/_html_report.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
import pandas as pd
2121
from jinja2 import Environment, FileSystemLoader
22-
from mostlyai.qa.accuracy import trim_label, filter_uni_acc_for_plotting, filter_biv_acc_for_plotting
23-
from mostlyai.qa.filesystem import TemporaryWorkspace
22+
from mostlyai.qa._accuracy import trim_label, filter_uni_acc_for_plotting, filter_biv_acc_for_plotting
23+
from mostlyai.qa._filesystem import TemporaryWorkspace
2424
from mostlyai.qa.assets import (
2525
HTML_ASSETS_PATH,
2626
read_html_assets,

mostlyai/qa/sampling.py renamed to mostlyai/qa/_sampling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
import pyarrow as pa
3535
import torch
3636

37-
from mostlyai.qa.common import (
37+
from mostlyai.qa._common import (
3838
CTX_COLUMN_PREFIX,
3939
TGT_COLUMN_PREFIX,
4040
NXT_COLUMN_PREFIX,

mostlyai/qa/similarity.py renamed to mostlyai/qa/_similarity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
from sklearn.decomposition import PCA
2020
from sklearn.metrics.pairwise import cosine_similarity
2121

22-
from mostlyai.qa.common import (
22+
from mostlyai.qa._common import (
2323
CHARTS_FONTS,
2424
CHARTS_COLORS,
2525
)
26-
from mostlyai.qa.filesystem import TemporaryWorkspace
26+
from mostlyai.qa._filesystem import TemporaryWorkspace
2727
import scipy.stats
2828
from sklearn.model_selection import StratifiedKFold
2929
from sklearn.linear_model import LogisticRegression

mostlyai/qa/report.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
import pandas as pd
2121
from pandas.core.dtypes.common import is_numeric_dtype, is_datetime64_dtype
2222

23-
from mostlyai.qa import distances, similarity, html_report
24-
from mostlyai.qa.accuracy import (
23+
from mostlyai.qa import _distances, _similarity, _html_report
24+
from mostlyai.qa._accuracy import (
2525
binning_data,
2626
calculate_correlations,
2727
plot_store_correlation_matrices,
@@ -37,8 +37,8 @@
3737
plot_store_bivariates,
3838
)
3939
from mostlyai.qa.metrics import Metrics, Accuracy, Similarity, Distances
40-
from mostlyai.qa.sampling import calculate_embeddings, pull_data_for_accuracy, pull_data_for_embeddings
41-
from mostlyai.qa.common import (
40+
from mostlyai.qa._sampling import calculate_embeddings, pull_data_for_accuracy, pull_data_for_embeddings
41+
from mostlyai.qa._common import (
4242
determine_data_size,
4343
ProgressCallback,
4444
PrerequisiteNotMetError,
@@ -49,7 +49,7 @@
4949
REPORT_CREDITS,
5050
ProgressCallbackWrapper,
5151
)
52-
from mostlyai.qa.filesystem import Statistics, TemporaryWorkspace
52+
from mostlyai.qa._filesystem import Statistics, TemporaryWorkspace
5353

5454
_LOG = logging.getLogger(__name__)
5555

@@ -166,7 +166,7 @@ def report(
166166
except PrerequisiteNotMetError as err:
167167
_LOG.info(err)
168168
statistics.mark_early_exit()
169-
html_report.store_early_exit_report(report_path)
169+
_html_report.store_early_exit_report(report_path)
170170
return report_path, None
171171

172172
# prepare datasets for accuracy
@@ -217,7 +217,7 @@ def report(
217217
syn[col] = syn[col].astype(trn[col].dtype)
218218

219219
_LOG.info("report accuracy and correlations")
220-
acc_uni, acc_biv, corr_trn = report_accuracy_and_correlations(
220+
acc_uni, acc_biv, corr_trn = _report_accuracy_and_correlations(
221221
trn=trn,
222222
syn=syn,
223223
statistics=statistics,
@@ -286,7 +286,7 @@ def report(
286286
progress.update(completed=80, total=100)
287287

288288
_LOG.info("report similarity")
289-
sim_cosine_trn_hol, sim_cosine_trn_syn, sim_auc_trn_hol, sim_auc_trn_syn = report_similarity(
289+
sim_cosine_trn_hol, sim_cosine_trn_syn, sim_auc_trn_hol, sim_auc_trn_syn = _report_similarity(
290290
syn_embeds=syn_embeds,
291291
trn_embeds=trn_embeds,
292292
hol_embeds=hol_embeds,
@@ -296,15 +296,15 @@ def report(
296296
progress.update(completed=90, total=100)
297297

298298
_LOG.info("report distances")
299-
dcr_trn, dcr_hol = report_distances(
299+
dcr_trn, dcr_hol = _report_distances(
300300
syn_embeds=syn_embeds,
301301
trn_embeds=trn_embeds,
302302
hol_embeds=hol_embeds,
303303
workspace=workspace,
304304
)
305305
progress.update(completed=99, total=100)
306306

307-
metrics = calculate_metrics(
307+
metrics = _calculate_metrics(
308308
acc_uni=acc_uni,
309309
acc_biv=acc_biv,
310310
dcr_trn=dcr_trn,
@@ -329,7 +329,7 @@ def report(
329329
"report_extra_info": report_extra_info,
330330
}
331331
statistics.store_meta(meta=meta)
332-
html_report.store_report(
332+
_html_report.store_report(
333333
report_path=report_path,
334334
report_type="model_report",
335335
workspace=workspace,
@@ -343,7 +343,7 @@ def report(
343343
return report_path, metrics
344344

345345

346-
def calculate_metrics(
346+
def _calculate_metrics(
347347
*,
348348
acc_uni: pd.DataFrame | None = None,
349349
acc_biv: pd.DataFrame | None = None,
@@ -419,7 +419,7 @@ def calculate_metrics(
419419
)
420420

421421

422-
def report_accuracy_and_correlations(
422+
def _report_accuracy_and_correlations(
423423
*,
424424
trn: pd.DataFrame,
425425
syn: pd.DataFrame,
@@ -529,7 +529,7 @@ def report_accuracy_and_correlations(
529529
return acc_uni, acc_biv, trn_corr
530530

531531

532-
def report_similarity(
532+
def _report_similarity(
533533
*,
534534
syn_embeds: np.ndarray,
535535
trn_embeds: np.ndarray,
@@ -538,17 +538,17 @@ def report_similarity(
538538
statistics: Statistics,
539539
) -> tuple[np.float64 | None, np.float64, np.float64 | None, np.float64]:
540540
_LOG.info("calculate centroid similarities")
541-
sim_cosine_trn_hol, sim_cosine_trn_syn = similarity.calculate_cosine_similarities(
541+
sim_cosine_trn_hol, sim_cosine_trn_syn = _similarity.calculate_cosine_similarities(
542542
syn_embeds=syn_embeds, trn_embeds=trn_embeds, hol_embeds=hol_embeds
543543
)
544544

545545
_LOG.info("calculate discriminator AUC")
546-
sim_auc_trn_hol, sim_auc_trn_syn = similarity.calculate_discriminator_auc(
546+
sim_auc_trn_hol, sim_auc_trn_syn = _similarity.calculate_discriminator_auc(
547547
syn_embeds=syn_embeds, trn_embeds=trn_embeds, hol_embeds=hol_embeds
548548
)
549549

550550
_LOG.info("plot and store PCA similarity contours")
551-
pca_model, _, trn_pca, hol_pca = similarity.plot_store_similarity_contours(
551+
pca_model, _, trn_pca, hol_pca = _similarity.plot_store_similarity_contours(
552552
syn_embeds=syn_embeds, trn_embeds=trn_embeds, hol_embeds=hol_embeds, workspace=workspace
553553
)
554554

@@ -566,15 +566,15 @@ def report_similarity(
566566
)
567567

568568

569-
def report_distances(
569+
def _report_distances(
570570
*,
571571
syn_embeds: np.ndarray,
572572
trn_embeds: np.ndarray,
573573
hol_embeds: np.ndarray | None,
574574
workspace: TemporaryWorkspace,
575575
) -> tuple[np.ndarray, np.ndarray | None]:
576-
dcr_trn, dcr_hol = distances.calculate_distances(
576+
dcr_trn, dcr_hol = _distances.calculate_distances(
577577
syn_embeds=syn_embeds, trn_embeds=trn_embeds, hol_embeds=hol_embeds
578578
)
579-
distances.plot_store_distances(dcr_trn, dcr_hol, workspace)
579+
_distances.plot_store_distances(dcr_trn, dcr_hol, workspace)
580580
return dcr_trn, dcr_hol

mostlyai/qa/report_from_statistics.py

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,9 @@
1818
import numpy as np
1919
import pandas as pd
2020

21-
import mostlyai.qa.accuracy
22-
import mostlyai.qa.sampling
23-
from mostlyai.qa import accuracy, similarity, html_report
24-
from mostlyai.qa.sampling import pull_data_for_embeddings, calculate_embeddings
25-
from mostlyai.qa.common import (
21+
from mostlyai.qa import _accuracy, _sampling, _similarity, _html_report
22+
from mostlyai.qa._sampling import pull_data_for_embeddings, calculate_embeddings
23+
from mostlyai.qa._common import (
2624
ProgressCallback,
2725
PrerequisiteNotMetError,
2826
check_min_sample_size,
@@ -31,7 +29,7 @@
3129
REPORT_CREDITS,
3230
ProgressCallbackWrapper,
3331
)
34-
from mostlyai.qa.filesystem import Statistics, TemporaryWorkspace
32+
from mostlyai.qa._filesystem import Statistics, TemporaryWorkspace
3533

3634
_LOG = logging.getLogger(__name__)
3735

@@ -72,7 +70,7 @@ def report_from_statistics(
7270
check_statistics_prerequisite(statistics)
7371
check_min_sample_size(syn_sample_size, 100, "synthetic")
7472
except PrerequisiteNotMetError:
75-
html_report.store_early_exit_report(report_path)
73+
_html_report.store_early_exit_report(report_path)
7674
return report_path
7775

7876
meta = statistics.load_meta()
@@ -87,7 +85,7 @@ def report_from_statistics(
8785

8886
# prepare data
8987
_LOG.info("sample synthetic data started")
90-
syn = mostlyai.qa.sampling.pull_data_for_accuracy(
88+
syn = _sampling.pull_data_for_accuracy(
9189
df_tgt=syn_tgt_data,
9290
df_ctx=syn_ctx_data,
9391
ctx_primary_key=ctx_primary_key,
@@ -98,7 +96,7 @@ def report_from_statistics(
9896
progress.update(completed=20, total=100)
9997

10098
# calculate and plot accuracy and correlations
101-
acc_uni, acc_biv, corr_trn = report_accuracy_and_correlations_from_statistics(
99+
acc_uni, acc_biv, corr_trn = _report_accuracy_and_correlations_from_statistics(
102100
syn=syn,
103101
statistics=statistics,
104102
workspace=workspace,
@@ -120,7 +118,7 @@ def report_from_statistics(
120118
)
121119

122120
_LOG.info("report similarity")
123-
report_similarity_from_statistics(
121+
_report_similarity_from_statistics(
124122
syn_embeds=syn_embeds,
125123
workspace=workspace,
126124
statistics=statistics,
@@ -136,7 +134,7 @@ def report_from_statistics(
136134
}
137135

138136
# HTML report
139-
html_report.store_report(
137+
_html_report.store_report(
140138
report_path=report_path,
141139
report_type="data_report",
142140
workspace=workspace,
@@ -150,7 +148,7 @@ def report_from_statistics(
150148
return report_path
151149

152150

153-
def report_accuracy_and_correlations_from_statistics(
151+
def _report_accuracy_and_correlations_from_statistics(
154152
*,
155153
syn: pd.DataFrame,
156154
statistics: Statistics,
@@ -161,7 +159,7 @@ def report_accuracy_and_correlations_from_statistics(
161159
syn = syn[bins.keys()].copy()
162160

163161
_LOG.info("calculate synthetic bins")
164-
syn_bin, _ = mostlyai.qa.accuracy.bin_data(syn, bins)
162+
syn_bin, _ = _accuracy.bin_data(syn, bins)
165163

166164
_LOG.info("load univariates and bivariates")
167165
acc_uni = statistics.load_univariate_accuracies()
@@ -180,31 +178,31 @@ def report_accuracy_and_correlations_from_statistics(
180178
corr_trn = statistics.load_correlations()
181179

182180
_LOG.info("calculate synthetic correlations")
183-
corr_syn = mostlyai.qa.accuracy.calculate_correlations(binned=syn_bin, corr_cols=corr_trn.columns)
181+
corr_syn = _accuracy.calculate_correlations(binned=syn_bin, corr_cols=corr_trn.columns)
184182

185183
_LOG.info("plot correlations")
186-
mostlyai.qa.accuracy.plot_store_correlation_matrices(corr_trn=corr_trn, corr_syn=corr_syn, workspace=workspace)
184+
_accuracy.plot_store_correlation_matrices(corr_trn=corr_trn, corr_syn=corr_syn, workspace=workspace)
187185

188186
_LOG.info("filter columns for plotting")
189187
syn = syn[acc_uni["column"]]
190188
acc_cols = list(set(acc_uni["column"]) | set(acc_biv["col1"]) | set(acc_biv["col2"]))
191189
syn_bin = syn_bin[acc_cols]
192190

193191
_LOG.info("calculate numeric KDEs for synthetic")
194-
syn_num_kdes = accuracy.calculate_numeric_uni_kdes(df=syn, trn_kdes=trn_num_kdes)
192+
syn_num_kdes = _accuracy.calculate_numeric_uni_kdes(df=syn, trn_kdes=trn_num_kdes)
195193

196194
_LOG.info("calculate categorical counts for synthetic")
197-
syn_cat_uni_cnts = accuracy.calculate_categorical_uni_counts(
195+
syn_cat_uni_cnts = _accuracy.calculate_categorical_uni_counts(
198196
df=syn,
199197
trn_col_counts=trn_cat_uni_cnts,
200198
hash_rare_values=False,
201199
)
202200

203201
_LOG.info("calculate bin counts for synthetic")
204-
syn_bin_cnts_uni, syn_bin_cnts_biv = accuracy.calculate_bin_counts(syn_bin)
202+
syn_bin_cnts_uni, syn_bin_cnts_biv = _accuracy.calculate_bin_counts(syn_bin)
205203

206204
_LOG.info("plot univariates")
207-
accuracy.plot_store_univariates(
205+
_accuracy.plot_store_univariates(
208206
trn_num_kdes=trn_num_kdes,
209207
syn_num_kdes=syn_num_kdes,
210208
trn_cat_cnts=trn_cat_uni_cnts,
@@ -217,7 +215,7 @@ def report_accuracy_and_correlations_from_statistics(
217215
)
218216

219217
_LOG.info("plot bivariates")
220-
accuracy.plot_store_bivariates(
218+
_accuracy.plot_store_bivariates(
221219
trn_cnts_uni=trn_bin_cnts_uni,
222220
syn_cnts_uni=syn_bin_cnts_uni,
223221
trn_cnts_biv=trn_bin_cnts_biv,
@@ -230,7 +228,7 @@ def report_accuracy_and_correlations_from_statistics(
230228
return acc_uni, acc_biv, corr_trn
231229

232230

233-
def report_similarity_from_statistics(
231+
def _report_similarity_from_statistics(
234232
*,
235233
syn_embeds: np.ndarray,
236234
statistics: Statistics,
@@ -246,6 +244,6 @@ def report_similarity_from_statistics(
246244
trn_pca, hol_pca = statistics.load_trn_hol_pcas()
247245

248246
_LOG.info("plot and store PCA similarity contours")
249-
similarity.plot_store_similarity_contours(
247+
_similarity.plot_store_similarity_contours(
250248
pca_model=pca_model, trn_pca=trn_pca, hol_pca=hol_pca, syn_embeds=syn_embeds, workspace=workspace
251249
)

tests/unit/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import pandas as pd
1919
import pytest
2020

21-
from mostlyai.qa.filesystem import TemporaryWorkspace
21+
from mostlyai.qa._filesystem import TemporaryWorkspace
2222

2323

2424
@pytest.fixture()

0 commit comments

Comments
 (0)