Skip to content

Commit 6c83fd5

Browse files
authored
Merge pull request #441 from PyThaiNLP/fix-thai_strftime
Fix %O modifier for thai_strftime()
2 parents 7f24af2 + bc382ab commit 6c83fd5

File tree

4 files changed

+343
-332
lines changed

4 files changed

+343
-332
lines changed

pythainlp/util/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
from pythainlp.util.date import (
4545
now_reign_year,
4646
reign_year_to_ad,
47-
thai_strftime,
4847
thaiword_to_date,
4948
)
5049
from pythainlp.util.digitconv import (
@@ -67,6 +66,7 @@
6766
reorder_vowels,
6867
)
6968
from pythainlp.util.numtoword import bahttext, num_to_thaiword
69+
from pythainlp.util.strftime import thai_strftime
7070
from pythainlp.util.thai import countthai, isthai, isthaichar
7171
from pythainlp.util.thaiwordcheck import is_native_thai
7272
from pythainlp.util.time import thai_time, thaiword_to_time, time_to_thaiword

pythainlp/util/date.py

Lines changed: 1 addition & 329 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22
"""
3-
Thai date/time conversion and formatting.
3+
Thai date/time conversion.
44
55
Note: Does not take into account the change of new year's day in Thailand
66
"""
@@ -15,11 +15,9 @@
1515
"thai_abbr_weekdays",
1616
"thai_full_months",
1717
"thai_full_weekdays",
18-
"thai_strftime",
1918
"thaiword_to_date",
2019
]
2120

22-
import warnings
2321
from datetime import datetime, timedelta
2422
from typing import Union
2523

@@ -63,16 +61,6 @@
6361
"ธันวาคม",
6462
]
6563

66-
_HA_DIGITS = "0123456789"
67-
_TH_DIGITS = "๐๑๒๓๔๕๖๗๘๙"
68-
_HA_TH_DIGITS = str.maketrans(_HA_DIGITS, _TH_DIGITS)
69-
70-
71-
_NEED_L10N = "AaBbCcDFGgvXxYy+" # flags that need localization
72-
_EXTENSIONS = "EO-_0^#" # extension flags
73-
74-
_BE_AD_DIFFERENCE = 543
75-
7664
_DAY = {
7765
"วันนี้": 0,
7866
"คืนนี้": 0,
@@ -96,322 +84,6 @@
9684
}
9785

9886

99-
""" def _padding(n: int, length: int = 2, pad_char: str = "0") -> str:
100-
str_ = str(n)
101-
102-
pad_len = abs(length - len(str_))
103-
104-
return (pad_char * pad_len) + str_
105-
"""
106-
107-
108-
def _std_strftime(dt_obj: datetime, fmt_char: str) -> str:
109-
"""
110-
Standard datetime.strftime() with normalization and exception handling.
111-
"""
112-
str_ = ""
113-
try:
114-
str_ = dt_obj.strftime(f"%{fmt_char}")
115-
if str_ == f"%{fmt_char}":
116-
# normalize outputs for unsupported directives
117-
# in different platforms
118-
# unsupported "%Q" in platform A may return "Q"
119-
# unsupported "%Q" in platform A may return "%Q"
120-
str_ = fmt_char
121-
except ValueError as err:
122-
# Unsupported directives may raise ValueError on Windows,
123-
# in that case just use the fmt_char
124-
warnings.warn(
125-
(
126-
f"String format directive unknown/not support: %{fmt_char}"
127-
f"The system raises this ValueError: {err}"
128-
),
129-
UserWarning,
130-
)
131-
str_ = fmt_char
132-
return str_
133-
134-
135-
def _thai_strftime(dt_obj: datetime, fmt_char: str) -> str:
136-
"""
137-
Conversion support for thai_strftime().
138-
139-
The fmt_char should be in _NEED_L10N when call this function.
140-
"""
141-
str_ = ""
142-
if fmt_char == "A":
143-
# National representation of the full weekday name
144-
str_ = thai_full_weekdays[dt_obj.weekday()]
145-
elif fmt_char == "a":
146-
# National representation of the abbreviated weekday
147-
str_ = thai_abbr_weekdays[dt_obj.weekday()]
148-
elif fmt_char == "B":
149-
# National representation of the full month name
150-
str_ = thai_full_months[dt_obj.month - 1]
151-
elif fmt_char == "b":
152-
# National representation of the abbreviated month name
153-
str_ = thai_abbr_months[dt_obj.month - 1]
154-
elif fmt_char == "C":
155-
# Thai Buddhist century (AD+543)/100 + 1 as decimal number;
156-
str_ = str(int((dt_obj.year + _BE_AD_DIFFERENCE) / 100) + 1).zfill(2)
157-
elif fmt_char == "c":
158-
# Locale’s appropriate date and time representation
159-
# Wed 6 Oct 01:40:00 1976
160-
# พ 6 ต.ค. 01:40:00 2519 <-- left-aligned weekday, right-aligned day
161-
str_ = "{:<2} {:>2} {} {} {}".format(
162-
thai_abbr_weekdays[dt_obj.weekday()],
163-
dt_obj.day,
164-
thai_abbr_months[dt_obj.month - 1],
165-
dt_obj.strftime("%H:%M:%S"),
166-
str(dt_obj.year + _BE_AD_DIFFERENCE).zfill(4),
167-
)
168-
elif fmt_char == "D":
169-
# Equivalent to ``%m/%d/%y''
170-
str_ = "{}/{}".format(
171-
dt_obj.strftime("%m/%d"),
172-
(str(dt_obj.year + _BE_AD_DIFFERENCE)[-2:]).zfill(2),
173-
)
174-
elif fmt_char == "F":
175-
# Equivalent to ``%Y-%m-%d''
176-
str_ = "{}-{}".format(
177-
str(dt_obj.year + _BE_AD_DIFFERENCE).zfill(4),
178-
dt_obj.strftime("%m-%d"),
179-
)
180-
elif fmt_char == "G":
181-
# ISO 8601 year with century representing the year that contains the
182-
# greater part of the ISO week (%V). Monday as the first day of the week.
183-
str_ = str(int(dt_obj.strftime("%G")) + _BE_AD_DIFFERENCE).zfill(4)
184-
elif fmt_char == "g":
185-
# Same year as in ``%G'', but as a decimal number without century (00-99).
186-
str_ = (
187-
str(int(dt_obj.strftime("%G")) + _BE_AD_DIFFERENCE)[-2:]
188-
).zfill(2)
189-
elif fmt_char == "v":
190-
# BSD extension, ' 6-Oct-1976'
191-
str_ = "{:>2}-{}-{}".format(
192-
dt_obj.day,
193-
thai_abbr_months[dt_obj.month - 1],
194-
str(dt_obj.year + _BE_AD_DIFFERENCE).zfill(4),
195-
)
196-
elif fmt_char == "X":
197-
# Locale’s appropriate time representation.
198-
str_ = dt_obj.strftime("%H:%M:%S")
199-
elif fmt_char == "x":
200-
# Locale’s appropriate date representation.
201-
str_ = "{}/{}/{}".format(
202-
str(dt_obj.day).zfill(2),
203-
str(dt_obj.month).zfill(2),
204-
str(dt_obj.year + _BE_AD_DIFFERENCE).zfill(4),
205-
)
206-
elif fmt_char == "Y":
207-
# Year with century
208-
str_ = (str(dt_obj.year + _BE_AD_DIFFERENCE)).zfill(4)
209-
elif fmt_char == "y":
210-
# Year without century
211-
str_ = (str(dt_obj.year + _BE_AD_DIFFERENCE)[-2:]).zfill(2)
212-
elif fmt_char == "+":
213-
# National representation of the date and time
214-
# (the format is similar to that produced by date(1))
215-
# Wed 6 Oct 1976 01:40:00
216-
str_ = "{:<2} {:>2} {} {} {}".format(
217-
thai_abbr_weekdays[dt_obj.weekday()],
218-
dt_obj.day,
219-
thai_abbr_months[dt_obj.month - 1],
220-
dt_obj.year + _BE_AD_DIFFERENCE,
221-
dt_obj.strftime("%H:%M:%S"),
222-
)
223-
else:
224-
# No known localization available, use Python's default
225-
str_ = _std_strftime(dt_obj, fmt_char)
226-
227-
return str_
228-
229-
230-
def thai_strftime(
231-
dt_obj: datetime, fmt: str = "%-d %b %y", thaidigit: bool = False,
232-
) -> str:
233-
"""
234-
Convert :class:`datetime.datetime` into Thai date and time format.
235-
236-
The formatting directives are similar to :func:`datatime.strrftime`.
237-
238-
This function uses Thai names and Thai Buddhist Era for these directives:
239-
* **%a** - abbreviated weekday name
240-
(i.e. "จ", "อ", "พ", "พฤ", "ศ", "ส", "อา")
241-
* **%A** - full weekday name
242-
(i.e. "วันจันทร์", "วันอังคาร", "วันเสาร์", "วันอาทิตย์")
243-
* **%b** - abbreviated month name
244-
(i.e. "ม.ค.","ก.พ.","มี.ค.","เม.ย.","พ.ค.","มิ.ย.", "ธ.ค.")
245-
* **%B** - full month name
246-
(i.e. "มกราคม", "กุมภาพันธ์", "พฤศจิกายน", "ธันวาคม",)
247-
* **%y** - year without century (i.e. "56", "10")
248-
* **%Y** - year with century (i.e. "2556", "2410")
249-
* **%c** - date and time representation
250-
(i.e. "พ 6 ต.ค. 01:40:00 2519")
251-
* **%v** - short date representation
252-
(i.e. " 6-ม.ค.-2562", "27-ก.พ.-2555")
253-
254-
Other directives will be passed to datetime.strftime()
255-
256-
:Note:
257-
* The Thai Buddhist Era (BE) year is simply converted from AD
258-
by adding 543. This is certainly not accurate for years
259-
before 1941 AD, due to the change in Thai New Year's Day.
260-
* This meant to be an interrim solution, since
261-
Python standard's locale module (which relied on C's strftime())
262-
does not support "th" or "th_TH" locale yet. If supported,
263-
we can just locale.setlocale(locale.LC_TIME, "th_TH")
264-
and then use native datetime.strftime().
265-
266-
We trying to make this platform-independent and support extentions
267-
as many as possible, See these links for strftime() extensions
268-
in POSIX, BSD, and GNU libc:
269-
270-
* Python
271-
https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
272-
* C http://www.cplusplus.com/reference/ctime/strftime/
273-
* GNU https://metacpan.org/pod/POSIX::strftime::GNU
274-
* Linux https://linux.die.net/man/3/strftime
275-
* OpenBSD https://man.openbsd.org/strftime.3
276-
* FreeBSD https://www.unix.com/man-page/FreeBSD/3/strftime/
277-
* macOS
278-
https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/strftime.3.html
279-
* PHP https://secure.php.net/manual/en/function.strftime.php
280-
* JavaScript's implementation https://github.com/samsonjs/strftime
281-
* strftime() quick reference http://www.strftime.net/
282-
283-
:param datetime dt_obj: an instantiatetd object of
284-
:mod:`datetime.datetime`
285-
:param str fmt: string containing date and time directives
286-
:param bool thaidigit: If `thaidigit` is set to **False** (default),
287-
number will be represented in Arabic digit.
288-
If it is set to **True**, it will be represented
289-
in Thai digit.
290-
291-
:return: Date and time text, with month in Thai name and year in
292-
Thai Buddhist era. The year is simply converted from AD
293-
by adding 543 (will not accurate for years before 1941 AD,
294-
due to change in Thai New Year's Day).
295-
:rtype: str
296-
297-
:Example:
298-
::
299-
300-
from datetime import datetime
301-
from pythainlp.util import thai_strftime
302-
303-
datetime_obj = datetime(year=2019, month=6, day=9, \\
304-
hour=5, minute=59, second=0, microsecond=0)
305-
306-
print(datetime_obj)
307-
# output: 2019-06-09 05:59:00
308-
309-
thai_strftime(datetime_obj, "%A %d %B %Y")
310-
# output: 'วันอาทิตย์ 09 มิถุนายน 2562'
311-
312-
thai_strftime(datetime_obj, "%a %-d %b %y") # no padding
313-
# output: 'อา 9 มิ.ย. 62'
314-
315-
thai_strftime(datetime_obj, "%a %_d %b %y") # space padding
316-
# output: 'อา 9 มิ.ย. 62'
317-
318-
thai_strftime(datetime_obj, "%a %0d %b %y") # zero padding
319-
# output: 'อา 09 มิ.ย. 62'
320-
321-
thai_strftime(datetime_obj, "%-H นาฬิกา %-M นาที", thaidigit=True)
322-
# output: '๕ นาฬิกา ๕๙ นาที'
323-
324-
thai_strftime(datetime_obj, "%D (%v)")
325-
# output: '06/09/62 ( 9-มิ.ย.-2562)'
326-
327-
thai_strftime(datetime_obj, "%c")
328-
# output: 'อา 9 มิ.ย. 05:59:00 2562'
329-
330-
thai_strftime(datetime_obj, "%H:%M %p")
331-
# output: '01:40 AM'
332-
333-
thai_strftime(datetime_obj, "%H:%M %#p")
334-
# output: '01:40 am'
335-
"""
336-
thaidate_parts = []
337-
338-
i = 0
339-
fmt_len = len(fmt)
340-
while i < fmt_len:
341-
str_ = ""
342-
if fmt[i] == "%":
343-
j = i + 1
344-
if j < fmt_len:
345-
fmt_char = fmt[j]
346-
if fmt_char in _NEED_L10N: # requires localization?
347-
str_ = _thai_strftime(dt_obj, fmt_char)
348-
elif fmt_char in _EXTENSIONS:
349-
fmt_char_ext = fmt_char
350-
k = j + 1
351-
if k < fmt_len:
352-
fmt_char = fmt[k]
353-
if fmt_char in _NEED_L10N:
354-
str_ = _thai_strftime(dt_obj, fmt_char)
355-
else:
356-
str_ = _std_strftime(dt_obj, fmt_char)
357-
if fmt_char_ext == "-":
358-
# GNU libc extension,
359-
# no padding
360-
if str_[0] and str_[0] in " 0":
361-
str_ = str_[1:]
362-
elif fmt_char_ext == "_":
363-
# GNU libc extension,
364-
# explicitly specify space (" ") for padding
365-
if str_[0] and str_[0] == "0":
366-
str_ = " " + str_[1:]
367-
elif fmt_char_ext == "0":
368-
# GNU libc extension,
369-
# explicitly specify zero ("0") for padding
370-
if str_[0] and str_[0] == " ":
371-
str_ = "0" + str_[1:]
372-
elif fmt_char_ext == "^":
373-
# GNU libc extension,
374-
# convert to upper case
375-
str_ = str_.upper()
376-
elif fmt_char_ext == "#":
377-
# GNU libc extension,
378-
# swap case - useful for %Z
379-
str_ = str_.swapcase()
380-
elif fmt_char_ext == "E":
381-
# POSIX extension,
382-
# uses the locale's alternative representation
383-
# Not implemented yet
384-
pass
385-
elif fmt_char_ext == "O":
386-
# POSIX extension,
387-
# uses the locale's alternative numeric symbols
388-
str_ = str_.translate(_HA_TH_DIGITS)
389-
i = i + 1 # consume char after format char
390-
else:
391-
# format char at string's end has no meaning
392-
str_ = fmt_char_ext
393-
else: # not in _NEED_L10N nor _EXTENSIONS
394-
# no known localization available, use Python's default
395-
str_ = _std_strftime(dt_obj, fmt_char)
396-
397-
i = i + 1 # consume char after "%"
398-
else:
399-
# % char at string's end has no meaning
400-
str_ = "%"
401-
else:
402-
str_ = fmt[i]
403-
404-
thaidate_parts.append(str_)
405-
i = i + 1
406-
407-
thaidate_text = "".join(thaidate_parts)
408-
409-
if thaidigit:
410-
thaidate_text = thaidate_text.translate(_HA_TH_DIGITS)
411-
412-
return thaidate_text
413-
414-
41587
def now_reign_year() -> int:
41688
"""
41789
Return the reign year of the 10th King of Chakri dynasty.

0 commit comments

Comments
 (0)