From 75de58d41659fe502bd542db7d91bf105eb55ab5 Mon Sep 17 00:00:00 2001 From: debelatesfaye Date: Tue, 4 Mar 2025 11:48:32 +0000 Subject: [PATCH] modified the regex --- src/segmenter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/segmenter.py b/src/segmenter.py index f1cee99..454006c 100644 --- a/src/segmenter.py +++ b/src/segmenter.py @@ -20,8 +20,8 @@ def __init__(self,file_obj): file = open(self.f_name,'r') def get_segments(self, input_text): - "Simple segementer spliting texts based on regex." - return re.split("[.!?]",input_text) + """Simple segmenter splitting texts based on regex while handling decimal points and abbreviations.""" + return re.split(r'(?