@@ -90,7 +90,7 @@ def __init__(self):
9090 )
9191
9292 def get_ner (
93- self , text : str , pos : bool = True , tag :bool = False
93+ self , text : str , pos : bool = True , tag : bool = False
9494 ) -> Union [List [Tuple [str , str ]], List [Tuple [str , str , str ]]]:
9595 """
9696 This function tags named-entitiy from text in IOB format.
@@ -99,9 +99,9 @@ def get_ner(
9999 :param boolean pos: To include POS tags in the results (`True`) or
100100 exclude (`False`). The defualt value is `True`
101101 :param boolean tag: output like html tag.
102- :return: a list of tuple associated with tokenized word, NER tag,
102+ :return: a list of tuple associated with tokenized word, NER tag,
103103 POS tag (if the parameter `pos` is specified as `True`),
104- and output like html tag (if the parameter `tag` is
104+ and output like html tag (if the parameter `tag` is
105105 specified as `True`).
106106 Otherwise, return a list of tuple associated with tokenized
107107 word and NER tag
@@ -128,8 +128,8 @@ def get_ner(
128128 ('49', 'NUM', 'I-TIME'), (' ', 'PUNCT', 'I-TIME'),
129129 ('น.', 'NOUN', 'I-TIME')]
130130 >>>
131- >>> ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.", \\
132- pos=False)
131+ >>> ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.",
132+ pos=False)
133133 [('วันที่', 'O'), (' ', 'O'),
134134 ('15', 'B-DATE'), (' ', 'I-DATE'),
135135 ('ก.ย.', 'I-DATE'), (' ', 'I-DATE'),
@@ -139,7 +139,8 @@ def get_ner(
139139 ('14', 'B-TIME'), (':', 'I-TIME'),
140140 ('49', 'I-TIME'), (' ', 'I-TIME'),
141141 ('น.', 'I-TIME')]
142- >>> ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.",tag=True)
142+ >>> ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.",
143+ tag=True)
143144 'วันที่ <DATE>15 ก.ย. 61</DATE> ทดสอบระบบเวลา <TIME>14:49 น.</TIME>'
144145 """
145146 self .__tokens = word_tokenize (text , engine = _WORD_TOKENIZER )
@@ -149,19 +150,20 @@ def get_ner(
149150 self .__x_test = self .__extract_features (self .__pos_tags )
150151 self .__y = self .crf .predict_single (self .__x_test )
151152
152- self .sent_ner = [(self .__pos_tags [i ][0 ], data ) for i , data in enumerate (self .__y )]
153+ self .sent_ner = [(self .__pos_tags [i ][0 ], data )
154+ for i , data in enumerate (self .__y )]
153155 if tag :
154- self .temp = ""
155- self .sent = ""
156- for idx ,(word ,ner ) in enumerate (self .sent_ner ):
156+ self .temp = ""
157+ self .sent = ""
158+ for idx , (word , ner ) in enumerate (self .sent_ner ):
157159 if "B-" in ner :
158- self .temp = ner .replace ("B-" ,"" )
160+ self .temp = ner .replace ("B-" , "" )
159161 self .sent += "<" + self .temp + ">"
160- elif "O" == ner and self .temp != "" :
161- self .sent += "</" + self .temp + ">"
162- self .temp = ""
162+ elif "O" == ner and self .temp != "" :
163+ self .sent += "</" + self .temp + ">"
164+ self .temp = ""
163165 self .sent += word
164- if idx == len (self .sent_ner )- 1 and self .temp != "" :
166+ if idx == len (self .sent_ner )- 1 and self .temp != "" :
165167 self .sent += "</" + self .temp + ">"
166168 return self .sent
167169 elif pos :
@@ -172,7 +174,6 @@ def get_ner(
172174 else :
173175 return self .sent_ner
174176
175-
176177 @staticmethod
177178 def __extract_features (doc ):
178179 return [_doc2features (doc , i ) for i in range (len (doc ))]
0 commit comments