Skip to content

Commit e562f3b

Browse files
Sid MohanSid Mohan
authored andcommitted
cli tests passed
1 parent 2bf5379 commit e562f3b

File tree

2 files changed

+233
-0
lines changed

2 files changed

+233
-0
lines changed

datafog/client.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from .config import get_config
1919
from .main import DataFog
20+
from .models.anonymizer import Anonymizer, AnonymizerType, HashType
2021
from .models.spacy_nlp import SpacyAnnotator
2122

2223
app = typer.Typer()
@@ -161,5 +162,60 @@ def list_entities():
161162
typer.echo(annotator.list_entities())
162163

163164

165+
@app.command()
166+
def redact_text(text: str = typer.Argument(..., help="Text to redact")):
167+
"""
168+
Redact PII in text.
169+
170+
Args:
171+
text: Text to redact.
172+
173+
Prints the redacted text.
174+
"""
175+
annotator = SpacyAnnotator()
176+
anonymizer = Anonymizer(anonymizer_type=AnonymizerType.REDACT)
177+
annotations = annotator.annotate_text(text)
178+
result = anonymizer.anonymize(text, annotations)
179+
typer.echo(result.anonymized_text)
180+
181+
182+
@app.command()
183+
def replace_text(text: str = typer.Argument(..., help="Text to replace PII")):
184+
"""
185+
Replace PII in text with anonymized values.
186+
187+
Args:
188+
text: Text to replace PII.
189+
190+
Prints the text with PII replaced.
191+
"""
192+
annotator = SpacyAnnotator()
193+
anonymizer = Anonymizer(anonymizer_type=AnonymizerType.REPLACE)
194+
annotations = annotator.annotate_text(text)
195+
result = anonymizer.anonymize(text, annotations)
196+
typer.echo(result.anonymized_text)
197+
198+
199+
@app.command()
200+
def hash_text(
201+
text: str = typer.Argument(..., help="Text to hash PII"),
202+
hash_type: HashType = typer.Option(HashType.SHA256, help="Hash algorithm to use"),
203+
):
204+
"""
205+
Choose from SHA256, MD5, or SHA3-256 algorithms to hash detected PII in text.
206+
207+
Args:
208+
text: Text to hash PII.
209+
hash_type: Hash algorithm to use.
210+
211+
Prints the text with PII hashed.
212+
"""
213+
annotator = SpacyAnnotator()
214+
anonymizer = Anonymizer(anonymizer_type=AnonymizerType.HASH, hash_type=hash_type)
215+
annotations = annotator.annotate_text(text)
216+
result = anonymizer.anonymize(text, annotations)
217+
typer.echo(result.anonymized_text)
218+
219+
164220
if __name__ == "__main__":
165221
app()

tests/test_client.py

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@
44
from typer.testing import CliRunner
55

66
from datafog.client import app
7+
from datafog.models.annotator import AnnotationResult, AnnotatorMetadata
8+
from datafog.models.anonymizer import (
9+
AnonymizationResult,
10+
Anonymizer,
11+
AnonymizerType,
12+
HashType,
13+
)
14+
from datafog.models.common import EntityTypes
715

816
runner = CliRunner()
917

@@ -14,6 +22,38 @@ def mock_datafog():
1422
yield mock
1523

1624

25+
@pytest.fixture
26+
def sample_text():
27+
return "Jeff Smith works at DigiCorp Incorporated in Paris."
28+
29+
30+
@pytest.fixture
31+
def sample_annotations():
32+
return [
33+
AnnotationResult(
34+
start=0,
35+
end=9,
36+
score=1.0,
37+
entity_type=EntityTypes.PERSON,
38+
recognition_metadata=AnnotatorMetadata(),
39+
),
40+
AnnotationResult(
41+
start=20,
42+
end=42,
43+
score=1.0,
44+
entity_type=EntityTypes.ORGANIZATION,
45+
recognition_metadata=AnnotatorMetadata(),
46+
),
47+
AnnotationResult(
48+
start=46,
49+
end=51,
50+
score=1.0,
51+
entity_type=EntityTypes.LOCATION,
52+
recognition_metadata=AnnotatorMetadata(),
53+
),
54+
]
55+
56+
1757
def test_scan_image_no_urls():
1858
result = runner.invoke(app, ["scan-image"])
1959
assert result.exit_code == 1
@@ -101,3 +141,140 @@ def test_list_entities(mock_spacy_annotator):
101141
result = runner.invoke(app, ["list-entities"])
102142
assert result.exit_code == 0
103143
assert "['PERSON', 'ORG']" in result.stdout
144+
145+
146+
@patch("datafog.client.SpacyAnnotator")
147+
@patch("datafog.client.Anonymizer")
148+
def test_redact_text(mock_anonymizer, mock_spacy_annotator, sample_annotations):
149+
mock_annotator = mock_spacy_annotator.return_value
150+
mock_anonymizer_instance = mock_anonymizer.return_value
151+
152+
sample_text = "John Doe works at Acme Corp"
153+
sample_annotations = [
154+
AnnotationResult(
155+
start=0,
156+
end=8,
157+
score=1.0,
158+
entity_type=EntityTypes.PERSON,
159+
recognition_metadata=AnnotatorMetadata(),
160+
),
161+
AnnotationResult(
162+
start=18,
163+
end=27,
164+
score=1.0,
165+
entity_type=EntityTypes.ORGANIZATION,
166+
recognition_metadata=AnnotatorMetadata(),
167+
),
168+
]
169+
mock_annotator.annotate_text.return_value = sample_annotations
170+
171+
mock_anonymizer_instance.anonymize.return_value = AnonymizationResult(
172+
anonymized_text="[REDACTED] works at [REDACTED]", anonymized_entities=[]
173+
)
174+
175+
result = runner.invoke(app, ["redact-text", sample_text])
176+
177+
assert result.exit_code == 0
178+
assert "[REDACTED] works at [REDACTED]" in result.stdout
179+
mock_spacy_annotator.assert_called_once()
180+
mock_anonymizer.assert_called_once_with(anonymizer_type=AnonymizerType.REDACT)
181+
mock_annotator.annotate_text.assert_called_once_with(sample_text)
182+
mock_anonymizer_instance.anonymize.assert_called_once_with(
183+
sample_text, sample_annotations
184+
)
185+
186+
187+
@patch("datafog.client.SpacyAnnotator")
188+
@patch("datafog.client.Anonymizer")
189+
def test_replace_text(mock_anonymizer, mock_spacy_annotator):
190+
mock_annotator = mock_spacy_annotator.return_value
191+
mock_anonymizer_instance = mock_anonymizer.return_value
192+
193+
sample_text = "John Doe works at Acme Corp"
194+
sample_annotations = [
195+
AnnotationResult(
196+
start=0,
197+
end=8,
198+
score=1.0,
199+
entity_type=EntityTypes.PERSON,
200+
recognition_metadata=AnnotatorMetadata(),
201+
),
202+
AnnotationResult(
203+
start=18,
204+
end=27,
205+
score=1.0,
206+
entity_type=EntityTypes.ORGANIZATION,
207+
recognition_metadata=AnnotatorMetadata(),
208+
),
209+
]
210+
mock_annotator.annotate_text.return_value = sample_annotations
211+
212+
mock_anonymizer_instance.anonymize.return_value = AnonymizationResult(
213+
anonymized_text="Jane Smith works at TechCo Inc", anonymized_entities=[]
214+
)
215+
216+
result = runner.invoke(app, ["replace-text", sample_text])
217+
218+
assert result.exit_code == 0
219+
assert "Jane Smith works at TechCo Inc" in result.stdout
220+
mock_spacy_annotator.assert_called_once()
221+
mock_anonymizer.assert_called_once_with(anonymizer_type=AnonymizerType.REPLACE)
222+
mock_annotator.annotate_text.assert_called_once_with(sample_text)
223+
mock_anonymizer_instance.anonymize.assert_called_once_with(
224+
sample_text, sample_annotations
225+
)
226+
227+
228+
@patch("datafog.client.SpacyAnnotator")
229+
@patch("datafog.client.Anonymizer")
230+
def test_hash_text(mock_anonymizer, mock_spacy_annotator):
231+
mock_annotator = mock_spacy_annotator.return_value
232+
mock_anonymizer_instance = mock_anonymizer.return_value
233+
234+
sample_text = "John Doe works at Acme Corp"
235+
sample_annotations = [
236+
AnnotationResult(
237+
start=0,
238+
end=8,
239+
score=1.0,
240+
entity_type=EntityTypes.PERSON,
241+
recognition_metadata=AnnotatorMetadata(),
242+
),
243+
AnnotationResult(
244+
start=18,
245+
end=27,
246+
score=1.0,
247+
entity_type=EntityTypes.ORGANIZATION,
248+
recognition_metadata=AnnotatorMetadata(),
249+
),
250+
]
251+
mock_annotator.annotate_text.return_value = sample_annotations
252+
253+
mock_anonymizer_instance.anonymize.return_value = AnonymizationResult(
254+
anonymized_text="5ab5c95f works at 7b23f032", anonymized_entities=[]
255+
)
256+
257+
result = runner.invoke(app, ["hash-text", sample_text])
258+
259+
assert result.exit_code == 0
260+
assert "5ab5c95f works at 7b23f032" in result.stdout
261+
mock_spacy_annotator.assert_called_once()
262+
mock_anonymizer.assert_called_once_with(
263+
anonymizer_type=AnonymizerType.HASH, hash_type=HashType.SHA256
264+
)
265+
mock_annotator.annotate_text.assert_called_once_with(sample_text)
266+
mock_anonymizer_instance.anonymize.assert_called_once_with(
267+
sample_text, sample_annotations
268+
)
269+
270+
# Test with custom hash type
271+
result = runner.invoke(app, ["hash-text", sample_text, "--hash-type", "md5"])
272+
273+
print(f"Exit code: {result.exit_code}")
274+
print(f"Output: {result.stdout}")
275+
print(f"Exception: {result.exception}")
276+
277+
assert result.exit_code == 0
278+
mock_anonymizer.assert_called_with(
279+
anonymizer_type=AnonymizerType.HASH, hash_type=HashType.MD5
280+
)

0 commit comments

Comments
 (0)