From 5e190e3a7860fc068cce5fa4e6ccf80c5fe18bce Mon Sep 17 00:00:00 2001 From: debelatesfaye Date: Tue, 4 Mar 2025 12:21:27 +0000 Subject: [PATCH 1/2] modified the regex --- src/segmenter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/segmenter.py b/src/segmenter.py index 454006c..c05ca7b 100644 --- a/src/segmenter.py +++ b/src/segmenter.py @@ -20,8 +20,9 @@ def __init__(self,file_obj): file = open(self.f_name,'r') def get_segments(self, input_text): - """Simple segmenter splitting texts based on regex while handling decimal points and abbreviations.""" - return re.split(r'(? Date: Tue, 4 Mar 2025 12:21:57 +0000 Subject: [PATCH 2/2] modified the regex --- src/segmenter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segmenter.py b/src/segmenter.py index c05ca7b..c7655ca 100644 --- a/src/segmenter.py +++ b/src/segmenter.py @@ -19,7 +19,7 @@ def __init__(self,file_obj): self.file_obj.save(self.f_name) file = open(self.f_name,'r') - def get_segments(self, input_text): + def get_segments(self, input_text): # regex """Segmenter that avoids splitting within decimal numbers and abbreviations.""" pattern = r'(?