-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path03_verify.py
More file actions
26 lines (20 loc) · 927 Bytes
/
03_verify.py
File metadata and controls
26 lines (20 loc) · 927 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import numpy as np
import pandas as pd
filename = 'out/data_withgroups_final.csv'
threshold = 22
def verify(df, threshold, col_list):
# Group data by all columns (except for index), then determine the "length"
# of each group (that is, the number of rows that belong to each group).
group_lengths = df.groupby(col_list).agg(
{col_list[0]: 'count'}).values
# Determine the minimum group size among all groups
v = group_lengths.min()
# Determine whether the smallest group size is at least as large as the
# threshold, in which case the data is compliant.
s = 'PASSED' if v >= threshold else 'FAILED'
print(f'Minimum number of matching rows found: {v} ({s})')
if __name__ == "__main__":
df = pd.read_csv(filename, index_col=0) # , dtype=np.int64
all_columns = list(df.columns)
print('Columns:', all_columns)
verify(df, threshold=threshold, col_list=all_columns)