Skip to content

Commit 01b8ef9

Browse files
author
Tom Searle
committed
CU-8698jzjj3: Fix tests
1 parent 092729d commit 01b8ef9

File tree

1 file changed

+24
-46
lines changed

1 file changed

+24
-46
lines changed

medcat-v1/tests/utils/ner/test_deid.py

Lines changed: 24 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -207,17 +207,14 @@ class MatchRulesTests(unittest.TestCase):
207207
def test_match_rules(self):
208208
# Test data from the docstring example
209209
rules = [
210-
('(123) 456-7890', '134'),
211-
('1234567890', '134'),
212-
('123.456.7890', '134'),
213-
('1234567890', '134'),
214-
('1234567890', '134'),
210+
(r'\(\d{3}\)\s*\d{3}-\d{4}', '134'), # (123) 456-7890
211+
(r'\d{3}\.\d{3}\.\d{4}', '134'), # 123.456.7890
212+
(r'\d{10}', '134'), # 1234567890
215213
]
216214
texts = [
217215
'My phone number is (123) 456-7890',
218216
'My phone number is 1234567890',
219217
'My phone number is 123.456.7890',
220-
'My phone number is 1234567890',
221218
]
222219
cui2preferred_name = {'134': 'Phone Number'}
223220

@@ -233,35 +230,26 @@ def test_match_rules(self):
233230
self.assertEqual(matches[0][0]['pretty_name'], 'Phone Number')
234231
self.assertEqual(matches[0][0]['cui'], '134')
235232
self.assertEqual(matches[0][0]['acc'], 1.0)
236-
self.assertEqual(matches[0][0]['start'], 17) # Position of phone number in text
237-
self.assertEqual(matches[0][0]['end'], 31) # End position of phone number
233+
self.assertEqual(matches[0][0]['start'], 19) # Position of phone number in text
234+
self.assertEqual(matches[0][0]['end'], 33) # End position of phone number
238235

239236
# Check second text matches
240237
self.assertEqual(len(matches[1]), 1) # One match in second text
241238
self.assertEqual(matches[1][0]['source_value'], '1234567890')
242239
self.assertEqual(matches[1][0]['pretty_name'], 'Phone Number')
243240
self.assertEqual(matches[1][0]['cui'], '134')
244241
self.assertEqual(matches[1][0]['acc'], 1.0)
245-
self.assertEqual(matches[1][0]['start'], 17) # Position of phone number in text
246-
self.assertEqual(matches[1][0]['end'], 27) # End position of phone number
242+
self.assertEqual(matches[1][0]['start'], 19) # Position of phone number in text
243+
self.assertEqual(matches[1][0]['end'], 29) # End position of phone number
247244

248245
# Check third text matches
249246
self.assertEqual(len(matches[2]), 1) # One match in third text
250247
self.assertEqual(matches[2][0]['source_value'], '123.456.7890')
251248
self.assertEqual(matches[2][0]['pretty_name'], 'Phone Number')
252249
self.assertEqual(matches[2][0]['cui'], '134')
253250
self.assertEqual(matches[2][0]['acc'], 1.0)
254-
self.assertEqual(matches[2][0]['start'], 17) # Position of phone number in text
255-
self.assertEqual(matches[2][0]['end'], 30) # End position of phone number
256-
257-
# Check fourth text matches
258-
self.assertEqual(len(matches[3]), 1) # One match in fourth text
259-
self.assertEqual(matches[3][0]['source_value'], '1234567890')
260-
self.assertEqual(matches[3][0]['pretty_name'], 'Phone Number')
261-
self.assertEqual(matches[3][0]['cui'], '134')
262-
self.assertEqual(matches[3][0]['acc'], 1.0)
263-
self.assertEqual(matches[3][0]['start'], 17) # Position of phone number in text
264-
self.assertEqual(matches[3][0]['end'], 27) # End position of phone number
251+
self.assertEqual(matches[2][0]['start'], 19) # Position of phone number in text
252+
self.assertEqual(matches[2][0]['end'], 31) # End position of phone number
265253

266254
def test_merge_preds(self):
267255
# Test data with overlapping predictions
@@ -284,19 +272,19 @@ def test_merge_preds(self):
284272

285273
# Test with accept_preds=True (default)
286274
merged_preds = deid.merge_preds(model_preds, rule_matches)
287-
self.assertEqual(len(merged_preds), 1) # Should return a list with one element
288-
self.assertEqual(len(merged_preds[0]), 3) # Should keep model predictions and non-overlapping rule match
289-
self.assertEqual(merged_preds[0][0]['start'], 10) # First model pred
290-
self.assertEqual(merged_preds[0][1]['start'], 25) # Second model pred
291-
self.assertEqual(merged_preds[0][2]['start'], 50) # Third model pred
275+
self.assertEqual(len(merged_preds), 4) # Should return a list with 4 elements
276+
self.assertEqual(merged_preds[0]['start'], 10) # First model pred
277+
self.assertEqual(merged_preds[1]['start'], 25) # Second model pred
278+
self.assertEqual(merged_preds[2]['start'], 50) # Third model pred
279+
self.assertEqual(merged_preds[3]['start'], 70) # Fourth rule match
292280

293281
# Test with accept_preds=False
294282
merged_preds = deid.merge_preds(model_preds, rule_matches, accept_preds=False)
295-
self.assertEqual(len(merged_preds), 1) # Should return a list with one element
296-
self.assertEqual(len(merged_preds[0]), 3) # Should keep rule matches and non-overlapping model pred
297-
self.assertEqual(merged_preds[0][0]['start'], 15) # First rule match
298-
self.assertEqual(merged_preds[0][1]['start'], 30) # Second rule match
299-
self.assertEqual(merged_preds[0][2]['start'], 70) # Third rule match
283+
self.assertEqual(len(merged_preds), 4) # Should return a list with 4 elements
284+
self.assertEqual(merged_preds[0]['start'], 15) # First rule match
285+
self.assertEqual(merged_preds[1]['start'], 30) # Second rule match
286+
self.assertEqual(merged_preds[2]['start'], 50) # Third model pred
287+
self.assertEqual(merged_preds[3]['start'], 70) # Fourth rule match
300288

301289
# Test with non-overlapping predictions
302290
model_preds = [
@@ -314,21 +302,11 @@ def test_merge_preds(self):
314302

315303
# Test with accept_preds=True (default)
316304
merged_preds = deid.merge_preds(model_preds, rule_matches)
317-
self.assertEqual(len(merged_preds), 1)
318-
self.assertEqual(len(merged_preds[0]), 4) # Should keep all predictions
319-
self.assertEqual(merged_preds[0][0]['start'], 10) # First model pred
320-
self.assertEqual(merged_preds[0][1]['start'], 25) # First rule match
321-
self.assertEqual(merged_preds[0][2]['start'], 50) # Second model pred
322-
self.assertEqual(merged_preds[0][3]['start'], 70) # Second rule match
323-
324-
# Test with accept_preds=False
325-
merged_preds = deid.merge_preds(model_preds, rule_matches, accept_preds=False)
326-
self.assertEqual(len(merged_preds), 1)
327-
self.assertEqual(len(merged_preds[0]), 4) # Should keep all predictions
328-
self.assertEqual(merged_preds[0][0]['start'], 10) # First model pred
329-
self.assertEqual(merged_preds[0][1]['start'], 25) # First rule match
330-
self.assertEqual(merged_preds[0][2]['start'], 50) # Second model pred
331-
self.assertEqual(merged_preds[0][3]['start'], 70) # Second rule match
305+
self.assertEqual(len(merged_preds), 4) # Should keep all predictions
306+
self.assertEqual(merged_preds[0]['start'], 10) # First model pred
307+
self.assertEqual(merged_preds[1]['start'], 25) # First rule match
308+
self.assertEqual(merged_preds[2]['start'], 50) # Second model pred
309+
self.assertEqual(merged_preds[3]['start'], 70) # Second rule match
332310

333311
def test_merge_all_preds(self):
334312
# Test with lists of different lengths

0 commit comments

Comments
 (0)