|
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | """ |
3 | | -Thai date/time conversion and formatting. |
| 3 | +Thai date/time conversion. |
4 | 4 |
|
5 | 5 | Note: Does not take into account the change of new year's day in Thailand |
6 | 6 | """ |
|
15 | 15 | "thai_abbr_weekdays", |
16 | 16 | "thai_full_months", |
17 | 17 | "thai_full_weekdays", |
18 | | - "thai_strftime", |
19 | 18 | "thaiword_to_date", |
20 | 19 | ] |
21 | 20 |
|
22 | | -import warnings |
23 | 21 | from datetime import datetime, timedelta |
24 | 22 | from typing import Union |
25 | 23 |
|
|
63 | 61 | "ธันวาคม", |
64 | 62 | ] |
65 | 63 |
|
66 | | -_HA_DIGITS = "0123456789" |
67 | | -_TH_DIGITS = "๐๑๒๓๔๕๖๗๘๙" |
68 | | -_HA_TH_DIGITS = str.maketrans(_HA_DIGITS, _TH_DIGITS) |
69 | | - |
70 | | - |
71 | | -_NEED_L10N = "AaBbCcDFGgvXxYy+" # flags that need localization |
72 | | -_EXTENSIONS = "EO-_0^#" # extension flags |
73 | | - |
74 | | -_BE_AD_DIFFERENCE = 543 |
75 | | - |
76 | 64 | _DAY = { |
77 | 65 | "วันนี้": 0, |
78 | 66 | "คืนนี้": 0, |
|
96 | 84 | } |
97 | 85 |
|
98 | 86 |
|
99 | | -""" def _padding(n: int, length: int = 2, pad_char: str = "0") -> str: |
100 | | - str_ = str(n) |
101 | | -
|
102 | | - pad_len = abs(length - len(str_)) |
103 | | -
|
104 | | - return (pad_char * pad_len) + str_ |
105 | | - """ |
106 | | - |
107 | | - |
108 | | -def _std_strftime(dt_obj: datetime, fmt_char: str) -> str: |
109 | | - """ |
110 | | - Standard datetime.strftime() with normalization and exception handling. |
111 | | - """ |
112 | | - str_ = "" |
113 | | - try: |
114 | | - str_ = dt_obj.strftime(f"%{fmt_char}") |
115 | | - if str_ == f"%{fmt_char}": |
116 | | - # normalize outputs for unsupported directives |
117 | | - # in different platforms |
118 | | - # unsupported "%Q" in platform A may return "Q" |
119 | | - # unsupported "%Q" in platform A may return "%Q" |
120 | | - str_ = fmt_char |
121 | | - except ValueError as err: |
122 | | - # Unsupported directives may raise ValueError on Windows, |
123 | | - # in that case just use the fmt_char |
124 | | - warnings.warn( |
125 | | - ( |
126 | | - f"String format directive unknown/not support: %{fmt_char}" |
127 | | - f"The system raises this ValueError: {err}" |
128 | | - ), |
129 | | - UserWarning, |
130 | | - ) |
131 | | - str_ = fmt_char |
132 | | - return str_ |
133 | | - |
134 | | - |
135 | | -def _thai_strftime(dt_obj: datetime, fmt_char: str) -> str: |
136 | | - """ |
137 | | - Conversion support for thai_strftime(). |
138 | | -
|
139 | | - The fmt_char should be in _NEED_L10N when call this function. |
140 | | - """ |
141 | | - str_ = "" |
142 | | - if fmt_char == "A": |
143 | | - # National representation of the full weekday name |
144 | | - str_ = thai_full_weekdays[dt_obj.weekday()] |
145 | | - elif fmt_char == "a": |
146 | | - # National representation of the abbreviated weekday |
147 | | - str_ = thai_abbr_weekdays[dt_obj.weekday()] |
148 | | - elif fmt_char == "B": |
149 | | - # National representation of the full month name |
150 | | - str_ = thai_full_months[dt_obj.month - 1] |
151 | | - elif fmt_char == "b": |
152 | | - # National representation of the abbreviated month name |
153 | | - str_ = thai_abbr_months[dt_obj.month - 1] |
154 | | - elif fmt_char == "C": |
155 | | - # Thai Buddhist century (AD+543)/100 + 1 as decimal number; |
156 | | - str_ = str(int((dt_obj.year + _BE_AD_DIFFERENCE) / 100) + 1).zfill(2) |
157 | | - elif fmt_char == "c": |
158 | | - # Locale’s appropriate date and time representation |
159 | | - # Wed 6 Oct 01:40:00 1976 |
160 | | - # พ 6 ต.ค. 01:40:00 2519 <-- left-aligned weekday, right-aligned day |
161 | | - str_ = "{:<2} {:>2} {} {} {}".format( |
162 | | - thai_abbr_weekdays[dt_obj.weekday()], |
163 | | - dt_obj.day, |
164 | | - thai_abbr_months[dt_obj.month - 1], |
165 | | - dt_obj.strftime("%H:%M:%S"), |
166 | | - str(dt_obj.year + _BE_AD_DIFFERENCE).zfill(4), |
167 | | - ) |
168 | | - elif fmt_char == "D": |
169 | | - # Equivalent to ``%m/%d/%y'' |
170 | | - str_ = "{}/{}".format( |
171 | | - dt_obj.strftime("%m/%d"), |
172 | | - (str(dt_obj.year + _BE_AD_DIFFERENCE)[-2:]).zfill(2), |
173 | | - ) |
174 | | - elif fmt_char == "F": |
175 | | - # Equivalent to ``%Y-%m-%d'' |
176 | | - str_ = "{}-{}".format( |
177 | | - str(dt_obj.year + _BE_AD_DIFFERENCE).zfill(4), |
178 | | - dt_obj.strftime("%m-%d"), |
179 | | - ) |
180 | | - elif fmt_char == "G": |
181 | | - # ISO 8601 year with century representing the year that contains the |
182 | | - # greater part of the ISO week (%V). Monday as the first day of the week. |
183 | | - str_ = str(int(dt_obj.strftime("%G")) + _BE_AD_DIFFERENCE).zfill(4) |
184 | | - elif fmt_char == "g": |
185 | | - # Same year as in ``%G'', but as a decimal number without century (00-99). |
186 | | - str_ = ( |
187 | | - str(int(dt_obj.strftime("%G")) + _BE_AD_DIFFERENCE)[-2:] |
188 | | - ).zfill(2) |
189 | | - elif fmt_char == "v": |
190 | | - # BSD extension, ' 6-Oct-1976' |
191 | | - str_ = "{:>2}-{}-{}".format( |
192 | | - dt_obj.day, |
193 | | - thai_abbr_months[dt_obj.month - 1], |
194 | | - str(dt_obj.year + _BE_AD_DIFFERENCE).zfill(4), |
195 | | - ) |
196 | | - elif fmt_char == "X": |
197 | | - # Locale’s appropriate time representation. |
198 | | - str_ = dt_obj.strftime("%H:%M:%S") |
199 | | - elif fmt_char == "x": |
200 | | - # Locale’s appropriate date representation. |
201 | | - str_ = "{}/{}/{}".format( |
202 | | - str(dt_obj.day).zfill(2), |
203 | | - str(dt_obj.month).zfill(2), |
204 | | - str(dt_obj.year + _BE_AD_DIFFERENCE).zfill(4), |
205 | | - ) |
206 | | - elif fmt_char == "Y": |
207 | | - # Year with century |
208 | | - str_ = (str(dt_obj.year + _BE_AD_DIFFERENCE)).zfill(4) |
209 | | - elif fmt_char == "y": |
210 | | - # Year without century |
211 | | - str_ = (str(dt_obj.year + _BE_AD_DIFFERENCE)[-2:]).zfill(2) |
212 | | - elif fmt_char == "+": |
213 | | - # National representation of the date and time |
214 | | - # (the format is similar to that produced by date(1)) |
215 | | - # Wed 6 Oct 1976 01:40:00 |
216 | | - str_ = "{:<2} {:>2} {} {} {}".format( |
217 | | - thai_abbr_weekdays[dt_obj.weekday()], |
218 | | - dt_obj.day, |
219 | | - thai_abbr_months[dt_obj.month - 1], |
220 | | - dt_obj.year + _BE_AD_DIFFERENCE, |
221 | | - dt_obj.strftime("%H:%M:%S"), |
222 | | - ) |
223 | | - else: |
224 | | - # No known localization available, use Python's default |
225 | | - str_ = _std_strftime(dt_obj, fmt_char) |
226 | | - |
227 | | - return str_ |
228 | | - |
229 | | - |
230 | | -def thai_strftime( |
231 | | - dt_obj: datetime, fmt: str = "%-d %b %y", thaidigit: bool = False, |
232 | | -) -> str: |
233 | | - """ |
234 | | - Convert :class:`datetime.datetime` into Thai date and time format. |
235 | | -
|
236 | | - The formatting directives are similar to :func:`datatime.strrftime`. |
237 | | -
|
238 | | - This function uses Thai names and Thai Buddhist Era for these directives: |
239 | | - * **%a** - abbreviated weekday name |
240 | | - (i.e. "จ", "อ", "พ", "พฤ", "ศ", "ส", "อา") |
241 | | - * **%A** - full weekday name |
242 | | - (i.e. "วันจันทร์", "วันอังคาร", "วันเสาร์", "วันอาทิตย์") |
243 | | - * **%b** - abbreviated month name |
244 | | - (i.e. "ม.ค.","ก.พ.","มี.ค.","เม.ย.","พ.ค.","มิ.ย.", "ธ.ค.") |
245 | | - * **%B** - full month name |
246 | | - (i.e. "มกราคม", "กุมภาพันธ์", "พฤศจิกายน", "ธันวาคม",) |
247 | | - * **%y** - year without century (i.e. "56", "10") |
248 | | - * **%Y** - year with century (i.e. "2556", "2410") |
249 | | - * **%c** - date and time representation |
250 | | - (i.e. "พ 6 ต.ค. 01:40:00 2519") |
251 | | - * **%v** - short date representation |
252 | | - (i.e. " 6-ม.ค.-2562", "27-ก.พ.-2555") |
253 | | -
|
254 | | - Other directives will be passed to datetime.strftime() |
255 | | -
|
256 | | - :Note: |
257 | | - * The Thai Buddhist Era (BE) year is simply converted from AD |
258 | | - by adding 543. This is certainly not accurate for years |
259 | | - before 1941 AD, due to the change in Thai New Year's Day. |
260 | | - * This meant to be an interrim solution, since |
261 | | - Python standard's locale module (which relied on C's strftime()) |
262 | | - does not support "th" or "th_TH" locale yet. If supported, |
263 | | - we can just locale.setlocale(locale.LC_TIME, "th_TH") |
264 | | - and then use native datetime.strftime(). |
265 | | -
|
266 | | - We trying to make this platform-independent and support extentions |
267 | | - as many as possible, See these links for strftime() extensions |
268 | | - in POSIX, BSD, and GNU libc: |
269 | | -
|
270 | | - * Python |
271 | | - https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior |
272 | | - * C http://www.cplusplus.com/reference/ctime/strftime/ |
273 | | - * GNU https://metacpan.org/pod/POSIX::strftime::GNU |
274 | | - * Linux https://linux.die.net/man/3/strftime |
275 | | - * OpenBSD https://man.openbsd.org/strftime.3 |
276 | | - * FreeBSD https://www.unix.com/man-page/FreeBSD/3/strftime/ |
277 | | - * macOS |
278 | | - https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/strftime.3.html |
279 | | - * PHP https://secure.php.net/manual/en/function.strftime.php |
280 | | - * JavaScript's implementation https://github.com/samsonjs/strftime |
281 | | - * strftime() quick reference http://www.strftime.net/ |
282 | | -
|
283 | | - :param datetime dt_obj: an instantiatetd object of |
284 | | - :mod:`datetime.datetime` |
285 | | - :param str fmt: string containing date and time directives |
286 | | - :param bool thaidigit: If `thaidigit` is set to **False** (default), |
287 | | - number will be represented in Arabic digit. |
288 | | - If it is set to **True**, it will be represented |
289 | | - in Thai digit. |
290 | | -
|
291 | | - :return: Date and time text, with month in Thai name and year in |
292 | | - Thai Buddhist era. The year is simply converted from AD |
293 | | - by adding 543 (will not accurate for years before 1941 AD, |
294 | | - due to change in Thai New Year's Day). |
295 | | - :rtype: str |
296 | | -
|
297 | | - :Example: |
298 | | - :: |
299 | | -
|
300 | | - from datetime import datetime |
301 | | - from pythainlp.util import thai_strftime |
302 | | -
|
303 | | - datetime_obj = datetime(year=2019, month=6, day=9, \\ |
304 | | - hour=5, minute=59, second=0, microsecond=0) |
305 | | -
|
306 | | - print(datetime_obj) |
307 | | - # output: 2019-06-09 05:59:00 |
308 | | -
|
309 | | - thai_strftime(datetime_obj, "%A %d %B %Y") |
310 | | - # output: 'วันอาทิตย์ 09 มิถุนายน 2562' |
311 | | -
|
312 | | - thai_strftime(datetime_obj, "%a %-d %b %y") # no padding |
313 | | - # output: 'อา 9 มิ.ย. 62' |
314 | | -
|
315 | | - thai_strftime(datetime_obj, "%a %_d %b %y") # space padding |
316 | | - # output: 'อา 9 มิ.ย. 62' |
317 | | -
|
318 | | - thai_strftime(datetime_obj, "%a %0d %b %y") # zero padding |
319 | | - # output: 'อา 09 มิ.ย. 62' |
320 | | -
|
321 | | - thai_strftime(datetime_obj, "%-H นาฬิกา %-M นาที", thaidigit=True) |
322 | | - # output: '๕ นาฬิกา ๕๙ นาที' |
323 | | -
|
324 | | - thai_strftime(datetime_obj, "%D (%v)") |
325 | | - # output: '06/09/62 ( 9-มิ.ย.-2562)' |
326 | | -
|
327 | | - thai_strftime(datetime_obj, "%c") |
328 | | - # output: 'อา 9 มิ.ย. 05:59:00 2562' |
329 | | -
|
330 | | - thai_strftime(datetime_obj, "%H:%M %p") |
331 | | - # output: '01:40 AM' |
332 | | -
|
333 | | - thai_strftime(datetime_obj, "%H:%M %#p") |
334 | | - # output: '01:40 am' |
335 | | - """ |
336 | | - thaidate_parts = [] |
337 | | - |
338 | | - i = 0 |
339 | | - fmt_len = len(fmt) |
340 | | - while i < fmt_len: |
341 | | - str_ = "" |
342 | | - if fmt[i] == "%": |
343 | | - j = i + 1 |
344 | | - if j < fmt_len: |
345 | | - fmt_char = fmt[j] |
346 | | - if fmt_char in _NEED_L10N: # requires localization? |
347 | | - str_ = _thai_strftime(dt_obj, fmt_char) |
348 | | - elif fmt_char in _EXTENSIONS: |
349 | | - fmt_char_ext = fmt_char |
350 | | - k = j + 1 |
351 | | - if k < fmt_len: |
352 | | - fmt_char = fmt[k] |
353 | | - if fmt_char in _NEED_L10N: |
354 | | - str_ = _thai_strftime(dt_obj, fmt_char) |
355 | | - else: |
356 | | - str_ = _std_strftime(dt_obj, fmt_char) |
357 | | - if fmt_char_ext == "-": |
358 | | - # GNU libc extension, |
359 | | - # no padding |
360 | | - if str_[0] and str_[0] in " 0": |
361 | | - str_ = str_[1:] |
362 | | - elif fmt_char_ext == "_": |
363 | | - # GNU libc extension, |
364 | | - # explicitly specify space (" ") for padding |
365 | | - if str_[0] and str_[0] == "0": |
366 | | - str_ = " " + str_[1:] |
367 | | - elif fmt_char_ext == "0": |
368 | | - # GNU libc extension, |
369 | | - # explicitly specify zero ("0") for padding |
370 | | - if str_[0] and str_[0] == " ": |
371 | | - str_ = "0" + str_[1:] |
372 | | - elif fmt_char_ext == "^": |
373 | | - # GNU libc extension, |
374 | | - # convert to upper case |
375 | | - str_ = str_.upper() |
376 | | - elif fmt_char_ext == "#": |
377 | | - # GNU libc extension, |
378 | | - # swap case - useful for %Z |
379 | | - str_ = str_.swapcase() |
380 | | - elif fmt_char_ext == "E": |
381 | | - # POSIX extension, |
382 | | - # uses the locale's alternative representation |
383 | | - # Not implemented yet |
384 | | - pass |
385 | | - elif fmt_char_ext == "O": |
386 | | - # POSIX extension, |
387 | | - # uses the locale's alternative numeric symbols |
388 | | - str_ = str_.translate(_HA_TH_DIGITS) |
389 | | - i = i + 1 # consume char after format char |
390 | | - else: |
391 | | - # format char at string's end has no meaning |
392 | | - str_ = fmt_char_ext |
393 | | - else: # not in _NEED_L10N nor _EXTENSIONS |
394 | | - # no known localization available, use Python's default |
395 | | - str_ = _std_strftime(dt_obj, fmt_char) |
396 | | - |
397 | | - i = i + 1 # consume char after "%" |
398 | | - else: |
399 | | - # % char at string's end has no meaning |
400 | | - str_ = "%" |
401 | | - else: |
402 | | - str_ = fmt[i] |
403 | | - |
404 | | - thaidate_parts.append(str_) |
405 | | - i = i + 1 |
406 | | - |
407 | | - thaidate_text = "".join(thaidate_parts) |
408 | | - |
409 | | - if thaidigit: |
410 | | - thaidate_text = thaidate_text.translate(_HA_TH_DIGITS) |
411 | | - |
412 | | - return thaidate_text |
413 | | - |
414 | | - |
415 | 87 | def now_reign_year() -> int: |
416 | 88 | """ |
417 | 89 | Return the reign year of the 10th King of Chakri dynasty. |
|
0 commit comments