-
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathutils.py
More file actions
21 lines (18 loc) · 721 Bytes
/
utils.py
File metadata and controls
21 lines (18 loc) · 721 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# utils.py
import re
# Function to replace date patterns with placeholders
def protect_dates(text):
# Pattern for dates like 1930–31–32, including Unicode dashes
date_pattern = r'\b\d{4}\u2013\d{2}\u2013\d{2}\b'
dates = re.findall(date_pattern, text)
for i, date in enumerate(dates):
# Replace Unicode dashes with standard hyphens
standard_date = date.replace('\u2013', '-')
text = text.replace(date, f'{{DATE{i}}}')
dates[i] = standard_date # Update with standard hyphen
return text, dates
# Function to restore date patterns
def restore_dates(text, dates):
for i, date in enumerate(dates):
text = text.replace(f'{{DATE{i}}}', date)
return text