@@ -207,17 +207,14 @@ class MatchRulesTests(unittest.TestCase):
207207 def test_match_rules (self ):
208208 # Test data from the docstring example
209209 rules = [
210- ('(123) 456-7890' , '134' ),
211- ('1234567890' , '134' ),
212- ('123.456.7890' , '134' ),
213- ('1234567890' , '134' ),
214- ('1234567890' , '134' ),
210+ (r'\(\d{3}\)\s*\d{3}-\d{4}' , '134' ), # (123) 456-7890
211+ (r'\d{3}\.\d{3}\.\d{4}' , '134' ), # 123.456.7890
212+ (r'\d{10}' , '134' ), # 1234567890
215213 ]
216214 texts = [
217215 'My phone number is (123) 456-7890' ,
218216 'My phone number is 1234567890' ,
219217 'My phone number is 123.456.7890' ,
220- 'My phone number is 1234567890' ,
221218 ]
222219 cui2preferred_name = {'134' : 'Phone Number' }
223220
@@ -233,35 +230,26 @@ def test_match_rules(self):
233230 self .assertEqual (matches [0 ][0 ]['pretty_name' ], 'Phone Number' )
234231 self .assertEqual (matches [0 ][0 ]['cui' ], '134' )
235232 self .assertEqual (matches [0 ][0 ]['acc' ], 1.0 )
236- self .assertEqual (matches [0 ][0 ]['start' ], 17 ) # Position of phone number in text
237- self .assertEqual (matches [0 ][0 ]['end' ], 31 ) # End position of phone number
233+ self .assertEqual (matches [0 ][0 ]['start' ], 19 ) # Position of phone number in text
234+ self .assertEqual (matches [0 ][0 ]['end' ], 33 ) # End position of phone number
238235
239236 # Check second text matches
240237 self .assertEqual (len (matches [1 ]), 1 ) # One match in second text
241238 self .assertEqual (matches [1 ][0 ]['source_value' ], '1234567890' )
242239 self .assertEqual (matches [1 ][0 ]['pretty_name' ], 'Phone Number' )
243240 self .assertEqual (matches [1 ][0 ]['cui' ], '134' )
244241 self .assertEqual (matches [1 ][0 ]['acc' ], 1.0 )
245- self .assertEqual (matches [1 ][0 ]['start' ], 17 ) # Position of phone number in text
246- self .assertEqual (matches [1 ][0 ]['end' ], 27 ) # End position of phone number
242+ self .assertEqual (matches [1 ][0 ]['start' ], 19 ) # Position of phone number in text
243+ self .assertEqual (matches [1 ][0 ]['end' ], 29 ) # End position of phone number
247244
248245 # Check third text matches
249246 self .assertEqual (len (matches [2 ]), 1 ) # One match in third text
250247 self .assertEqual (matches [2 ][0 ]['source_value' ], '123.456.7890' )
251248 self .assertEqual (matches [2 ][0 ]['pretty_name' ], 'Phone Number' )
252249 self .assertEqual (matches [2 ][0 ]['cui' ], '134' )
253250 self .assertEqual (matches [2 ][0 ]['acc' ], 1.0 )
254- self .assertEqual (matches [2 ][0 ]['start' ], 17 ) # Position of phone number in text
255- self .assertEqual (matches [2 ][0 ]['end' ], 30 ) # End position of phone number
256-
257- # Check fourth text matches
258- self .assertEqual (len (matches [3 ]), 1 ) # One match in fourth text
259- self .assertEqual (matches [3 ][0 ]['source_value' ], '1234567890' )
260- self .assertEqual (matches [3 ][0 ]['pretty_name' ], 'Phone Number' )
261- self .assertEqual (matches [3 ][0 ]['cui' ], '134' )
262- self .assertEqual (matches [3 ][0 ]['acc' ], 1.0 )
263- self .assertEqual (matches [3 ][0 ]['start' ], 17 ) # Position of phone number in text
264- self .assertEqual (matches [3 ][0 ]['end' ], 27 ) # End position of phone number
251+ self .assertEqual (matches [2 ][0 ]['start' ], 19 ) # Position of phone number in text
252+ self .assertEqual (matches [2 ][0 ]['end' ], 31 ) # End position of phone number
265253
266254 def test_merge_preds (self ):
267255 # Test data with overlapping predictions
@@ -284,19 +272,19 @@ def test_merge_preds(self):
284272
285273 # Test with accept_preds=True (default)
286274 merged_preds = deid .merge_preds (model_preds , rule_matches )
287- self .assertEqual (len (merged_preds ), 1 ) # Should return a list with one element
288- self .assertEqual (len ( merged_preds [0 ]), 3 ) # Should keep model predictions and non-overlapping rule match
289- self .assertEqual (merged_preds [0 ][ 0 ][ 'start' ], 10 ) # First model pred
290- self .assertEqual (merged_preds [0 ][ 1 ][ 'start' ], 25 ) # Second model pred
291- self .assertEqual (merged_preds [0 ][ 2 ][ 'start' ], 50 ) # Third model pred
275+ self .assertEqual (len (merged_preds ), 4 ) # Should return a list with 4 elements
276+ self .assertEqual (merged_preds [0 ][ 'start' ], 10 ) # First model pred
277+ self .assertEqual (merged_preds [1 ][ 'start' ], 25 ) # Second model pred
278+ self .assertEqual (merged_preds [2 ][ 'start' ], 50 ) # Third model pred
279+ self .assertEqual (merged_preds [3 ][ 'start' ], 70 ) # Fourth rule match
292280
293281 # Test with accept_preds=False
294282 merged_preds = deid .merge_preds (model_preds , rule_matches , accept_preds = False )
295- self .assertEqual (len (merged_preds ), 1 ) # Should return a list with one element
296- self .assertEqual (len ( merged_preds [0 ]), 3 ) # Should keep rule matches and non-overlapping model pred
297- self .assertEqual (merged_preds [0 ][ 0 ][ 'start' ], 15 ) # First rule match
298- self .assertEqual (merged_preds [0 ][ 1 ][ 'start' ], 30 ) # Second rule match
299- self .assertEqual (merged_preds [0 ][ 2 ][ 'start' ], 70 ) # Third rule match
283+ self .assertEqual (len (merged_preds ), 4 ) # Should return a list with 4 elements
284+ self .assertEqual (merged_preds [0 ][ 'start' ], 15 ) # First rule match
285+ self .assertEqual (merged_preds [1 ][ 'start' ], 30 ) # Second rule match
286+ self .assertEqual (merged_preds [2 ][ 'start' ], 50 ) # Third model pred
287+ self .assertEqual (merged_preds [3 ][ 'start' ], 70 ) # Fourth rule match
300288
301289 # Test with non-overlapping predictions
302290 model_preds = [
@@ -314,21 +302,11 @@ def test_merge_preds(self):
314302
315303 # Test with accept_preds=True (default)
316304 merged_preds = deid .merge_preds (model_preds , rule_matches )
317- self .assertEqual (len (merged_preds ), 1 )
318- self .assertEqual (len (merged_preds [0 ]), 4 ) # Should keep all predictions
319- self .assertEqual (merged_preds [0 ][0 ]['start' ], 10 ) # First model pred
320- self .assertEqual (merged_preds [0 ][1 ]['start' ], 25 ) # First rule match
321- self .assertEqual (merged_preds [0 ][2 ]['start' ], 50 ) # Second model pred
322- self .assertEqual (merged_preds [0 ][3 ]['start' ], 70 ) # Second rule match
323-
324- # Test with accept_preds=False
325- merged_preds = deid .merge_preds (model_preds , rule_matches , accept_preds = False )
326- self .assertEqual (len (merged_preds ), 1 )
327- self .assertEqual (len (merged_preds [0 ]), 4 ) # Should keep all predictions
328- self .assertEqual (merged_preds [0 ][0 ]['start' ], 10 ) # First model pred
329- self .assertEqual (merged_preds [0 ][1 ]['start' ], 25 ) # First rule match
330- self .assertEqual (merged_preds [0 ][2 ]['start' ], 50 ) # Second model pred
331- self .assertEqual (merged_preds [0 ][3 ]['start' ], 70 ) # Second rule match
305+ self .assertEqual (len (merged_preds ), 4 ) # Should keep all predictions
306+ self .assertEqual (merged_preds [0 ]['start' ], 10 ) # First model pred
307+ self .assertEqual (merged_preds [1 ]['start' ], 25 ) # First rule match
308+ self .assertEqual (merged_preds [2 ]['start' ], 50 ) # Second model pred
309+ self .assertEqual (merged_preds [3 ]['start' ], 70 ) # Second rule match
332310
333311 def test_merge_all_preds (self ):
334312 # Test with lists of different lengths
0 commit comments