Skip to content

Commit 82cb0cf

Browse files
feat: follow-up to reproducibility PR (#199)
1 parent 5abc527 commit 82cb0cf

File tree

3 files changed

+2
-10
lines changed

3 files changed

+2
-10
lines changed

mostlyai/qa/_distances.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import time
1717
import numpy as np
1818
import networkx as nx
19-
import xxhash
2019
from sklearn.neighbors import NearestNeighbors
2120
from joblib import cpu_count
2221

@@ -159,13 +158,9 @@ def split_columns_into_random_groups(X, k):
159158
"""
160159
n_cols = X.shape[1]
161160

162-
# create a deterministic seed based on the input matrix
163-
seed = xxhash.xxh32(X.sum()).intdigest()
164-
rng = np.random.default_rng(seed)
165-
166161
# shuffle all column indices
167162
all_indices = np.arange(n_cols)
168-
rng.shuffle(all_indices)
163+
np.random.shuffle(all_indices)
169164

170165
# evenly divide shuffled indices into k groups
171166
base_size = n_cols // k
@@ -228,7 +223,6 @@ def correlation_graph(X):
228223
n_clusters=k,
229224
affinity="precomputed", # uses adj_matrix directly as similarity
230225
assign_labels="kmeans", # clustering on the embedding
231-
random_state=42,
232226
)
233227
try:
234228
labels = sc.fit_predict(adj_matrix)

mostlyai/qa/_embeddings.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ def encode_numerics(
5252
# normalize numeric features based on trn
5353
qt_scaler = QuantileTransformer(
5454
output_distribution="uniform",
55-
random_state=42,
5655
n_quantiles=min(100, len(trn) + len(hol)),
5756
)
5857
ori_num = pd.concat([trn_num[col], hol_num[col]]) if len(hol) > 0 else pd.DataFrame(trn_num[col])

mostlyai/qa/_similarity.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def calculate_mean_auc(embeds1, embeds2):
8282
y = np.hstack((labels1, labels2))
8383

8484
# initialize the cross-validator
85-
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
85+
kf = StratifiedKFold(n_splits=10, shuffle=True)
8686

8787
# initialize a list to store AUC scores
8888
auc_scores = []
@@ -99,7 +99,6 @@ def calculate_mean_auc(embeds1, embeds2):
9999
max_depth=10,
100100
min_samples_leaf=5,
101101
max_features=0.5,
102-
random_state=42,
103102
)
104103
clf.fit(X_train, y_train)
105104

0 commit comments

Comments
 (0)