@@ -9,31 +9,31 @@ def _doc2features(doc, i):
99 word = doc [i ][0 ]
1010 postag = doc [i ][1 ]
1111 # Features from current word
12- features = {
12+ features = {
1313 'word.word' : word ,
14- 'word.isspace' :word .isspace (),
15- 'postag' :postag ,
14+ 'word.isspace' : word .isspace (),
15+ 'postag' : postag ,
1616 'word.isdigit()' : word .isdigit ()
1717 }
1818 if i > 0 :
1919 prevword = doc [i - 1 ][0 ]
2020 postag1 = doc [i - 1 ][1 ]
2121 features ['word.prevword' ] = prevword
22- features ['word.previsspace' ]= prevword .isspace ()
22+ features ['word.previsspace' ] = prevword .isspace ()
2323 features ['word.prepostag' ] = postag1
2424 features ['word.prevwordisdigit' ] = prevword .isdigit ()
2525 else :
26- features ['BOS' ] = True # Special "Beginning of Sequence" tag
26+ features ['BOS' ] = True # Special "Beginning of Sequence" tag
2727 # Features from next word
2828 if i < len (doc )- 1 :
2929 nextword = doc [i + 1 ][0 ]
3030 postag1 = doc [i + 1 ][1 ]
3131 features ['word.nextword' ] = nextword
32- features ['word.nextisspace' ]= nextword .isspace ()
32+ features ['word.nextisspace' ] = nextword .isspace ()
3333 features ['word.nextpostag' ] = postag1
3434 features ['word.nextwordisdigit' ] = nextword .isdigit ()
3535 else :
36- features ['EOS' ] = True # Special "End of Sequence" tag
36+ features ['EOS' ] = True # Special "End of Sequence" tag
3737 return features
3838
3939
@@ -53,12 +53,12 @@ def clause_tokenize(doc: List[str]):
5353 _list_cls = []
5454 _temp = []
5555 _len_doc = len (doc ) - 1
56- for i ,item in enumerate (_tag ):
57- w ,t = item
56+ for i , item in enumerate (_tag ):
57+ w , t = item
5858 if t == "E_CLS" or i == _len_doc :
5959 _temp .append (w )
6060 _list_cls .append (_temp )
6161 _temp = []
6262 else :
6363 _temp .append (w )
64- return _list_cls
64+ return _list_cls
0 commit comments