-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfinal_testing.py
More file actions
41 lines (31 loc) · 1.29 KB
/
final_testing.py
File metadata and controls
41 lines (31 loc) · 1.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import helper_funcs as hf
import pandas as pd
import joblib
import gzip
def generate_predictions(tf_id, test_chroms=[3, 10, 17], markov_order=1):
model_path = f"{tf_id}_rf_model.joblib"
print(f"Loading model: {model_path}...")
model = joblib.load(model_path)
for chrom_num in test_chroms:
chr_id = f"chr{chrom_num}"
print(f"Processing {chr_id}...")
test_df = hf.build_feature_matrix([chrom_num], tf_id, markov_order)
features = ['ATAC', f'log_odds{tf_id}', f'FIMO_{tf_id}', 'PhastCons']
X_test = test_df[features].copy()
X_test['ATAC'] = X_test['ATAC'].map({'B': 1, 'U': 0})
probs = model.predict_proba(X_test)[:, 1]
submission_df = pd.DataFrame({
'chrom': chr_id,
'start': test_df['start'],
'end': test_df['end'],
'probability': probs
})
output_name = f"{tf_id}_{chr_id}_predictions.tsv.gz"
submission_df.to_csv(output_name, sep='\t', index=False, compression='gzip')
print(f"Saved: {output_name}")
if __name__ == "__main__":
for tf in ['CTCF', 'REST', 'EP300']:
try:
generate_predictions(tf)
except Exception as e:
print(f"Could not generate for {tf}: {e}")