-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_quick.py
More file actions
111 lines (89 loc) · 3.83 KB
/
test_quick.py
File metadata and controls
111 lines (89 loc) · 3.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python3
"""
Simple test script to verify the privacy-aware transformation framework.
"""
import sys
from pathlib import Path
import pandas as pd
# Add src to path
sys.path.insert(0, str(Path(__file__).parent / 'src'))
from privacy_aware_transform.metadata import SyntheticMetadataGenerator, MetadataLoader
from privacy_aware_transform.classifier import SensitivityClassifier
from privacy_aware_transform.policy import PolicyEngine
from privacy_aware_transform.transforms import TransformationEngine
from privacy_aware_transform.utils import print_classification_report
def main():
"""Main test execution."""
print("\n" + "="*80)
print("Privacy-Aware Data Transformation - Quick Test")
print("="*80 + "\n")
# Step 1: Generate sample metadata
print("[Step 1] Generating sample metadata...")
metadata_gen = SyntheticMetadataGenerator()
metadata_dir = Path('table_structure/metadata')
customers_meta = metadata_gen.generate_customer_metadata()
filepath = metadata_gen.save_metadata_yaml(customers_meta, str(metadata_dir))
print(f" ✓ Generated: {filepath}")
# Step 2: Load and classify
print("\n[Step 2] Classifying sensitive columns...")
loader = MetadataLoader(str(metadata_dir))
table_meta = loader.load_table_metadata('customers.yaml')
classifier = SensitivityClassifier(use_ml=False)
classifications = classifier.classify_table(table_meta.columns)
summary = classifier.get_classification_summary(classifications)
print(f" Classification Summary:")
for cls, count in summary.items():
print(f" {cls}: {count}")
# Step 3: Apply transformations
print("\n[Step 3] Applying privacy transformations...")
# Create sample data
sample_data = {
'customer_id': [1, 2, 3],
'first_name': ['John', 'Jane', 'Bob'],
'email': ['john@example.com', 'jane@example.com', 'bob@example.com'],
'registration_date': ['2020-01-01', '2021-06-15', '2020-12-01'],
'status': ['active', 'active', 'inactive']
}
df = pd.DataFrame(sample_data)
policy_engine = PolicyEngine()
transformation_engine = TransformationEngine()
# Transform for internal analyst
print(" Transforming for: internal_analyst")
transformed_data = {}
for col_name in df.columns:
if col_name in classifications:
classification = classifications[col_name]
rule = policy_engine.get_transformation_rule('internal_analyst', classification.sensitivity_class)
if rule:
transformed = transformation_engine.apply_transformation(
df[col_name].tolist(),
rule.transformation_type,
rule.parameters
)
transformed_data[col_name] = transformed
else:
transformed_data[col_name] = df[col_name].tolist()
transformed_df = pd.DataFrame(transformed_data)
print(" Original data (first row):")
for col in df.columns:
print(f" {col}: {df[col].iloc[0]}")
print(" Transformed data (first row):")
for col in transformed_df.columns:
print(f" {col}: {transformed_df[col].iloc[0]}")
# Step 4: Test CLI
print("\n[Step 4] Testing CLI interface...")
try:
from privacy_aware_transform.cli import cli
print(" ✓ CLI module loaded successfully")
except Exception as e:
print(f" ✗ Error loading CLI: {e}")
print("\n" + "="*80)
print("All tests completed successfully!")
print("="*80 + "\n")
print("Next steps:")
print(" 1. Run full example: python examples/example.py")
print(" 2. Use CLI: python -m privacy_aware_transform.cli --help")
print(" 3. Check generated metadata: table_structure/metadata/")
print()
if __name__ == '__main__':
main()