Skip to content

Commit 0434321

Browse files
committed
Fix preprocessing logic
1 parent 9ecfc19 commit 0434321

File tree

4 files changed

+100
-8
lines changed

4 files changed

+100
-8
lines changed

brats/preprocessing/mlcube/mlcube.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,4 @@ tasks:
2828
statistics:
2929
parameters:
3030
inputs: {data_path: data/, parameters_file: {type: file, default: parameters.yaml}}
31-
outputs: {output_path: {type: file, default: statistics.yaml}}
31+
outputs: {output_path: {type: file, default: statistics.yaml}}

brats/preprocessing/project/mlcube.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010

1111
class PreprocessTask:
12-
"""Runs preprocessing given the input data path"""
12+
"""Run preprocessing given the input data path"""
1313

1414
@staticmethod
1515
def run(
@@ -26,6 +26,35 @@ def run(
2626
process = subprocess.Popen("./run.sh", cwd=".", env=env)
2727
process.wait()
2828

29+
class SanityCheckTask:
30+
"""Run sanity check"""
31+
32+
@staticmethod
33+
def run(
34+
data_path: str, parameters_file: str
35+
) -> None:
36+
37+
cmd = f"python3 sanity_check.py --data_path={data_path}"
38+
splitted_cmd = cmd.split()
39+
40+
process = subprocess.Popen(splitted_cmd, cwd=".")
41+
process.wait()
42+
43+
44+
class StatisticsTask:
45+
"""Run statistics"""
46+
47+
@staticmethod
48+
def run(
49+
data_path: str, parameters_file: str, output_path: str
50+
) -> None:
51+
52+
cmd = f"python3 statistics.py --data_path={data_path} --out_file={output_path}"
53+
splitted_cmd = cmd.split()
54+
55+
process = subprocess.Popen(splitted_cmd, cwd=".")
56+
process.wait()
57+
2958

3059
@app.command("prepare")
3160
def prepare(
@@ -42,19 +71,15 @@ def sanity_check(
4271
data_path: str = typer.Option(..., "--data_path"),
4372
parameters_file: str = typer.Option(..., "--parameters_file"),
4473
):
45-
pass
74+
SanityCheckTask.run(data_path, parameters_file)
4675

4776
@app.command("statistics")
4877
def statistics(
4978
data_path: str = typer.Option(..., "--data_path"),
5079
parameters_file: str = typer.Option(..., "--parameters_file"),
5180
output_path: str = typer.Option(..., "--output_path")
5281
):
53-
stats = {
54-
"stat": 1
55-
}
56-
with open(output_path, "w") as f:
57-
yaml.dump(stats, f)
82+
StatisticsTask.run(data_path, parameters_file, output_path)
5883

5984

6085
if __name__ == "__main__":
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""Sanity check logic"""
2+
import os
3+
import argparse
4+
5+
6+
def sanity_check(data):
7+
"""Runs a few checks to ensure data quality and integrity
8+
Args:
9+
names_df (pd.DataFrame): DataFrame containing transformed data.
10+
"""
11+
# Here you must add all the checks you consider important regarding the
12+
# state of the data
13+
assert len(data) > 0
14+
15+
16+
if __name__ == "__main__":
17+
parser = argparse.ArgumentParser("Medperf Model Sanity Check Example")
18+
parser.add_argument(
19+
"--data_path",
20+
dest="data",
21+
type=str,
22+
help="directory containing the prepared data",
23+
)
24+
25+
args = parser.parse_args()
26+
27+
data = os.listdir(args.data)
28+
sanity_check(data)
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import os
2+
import yaml
3+
import argparse
4+
5+
6+
def get_statistics(data_path):
7+
"""Computes statistics about the data. This statistics are uploaded
8+
to the Medperf platform under the data owner's approval. Include
9+
every statistic you consider useful for determining the nature of the
10+
data, but keep in mind that we want to keep the data as private as
11+
possible.
12+
"""
13+
14+
len_data = len(os.listdir(data_path))
15+
16+
stats = {
17+
"data length": len_data
18+
}
19+
20+
return stats
21+
22+
23+
if __name__ == "__main__":
24+
parser = argparse.ArgumentParser("MedPerf Statistics Example")
25+
parser.add_argument(
26+
"--data_path",
27+
type=str,
28+
help="directory containing the prepared data",
29+
)
30+
parser.add_argument(
31+
"--out_file", dest="out_file", type=str, help="file to store statistics"
32+
)
33+
34+
args = parser.parse_args()
35+
36+
stats = get_statistics(args.data_path)
37+
38+
with open(args.out_file, "w") as f:
39+
yaml.dump(stats, f)

0 commit comments

Comments
 (0)