@@ -36,21 +36,25 @@ def annotate(self, text, properties=None):
3636 pass
3737 return output
3838
39- def tokensregex (self , text , pattern , filter ):
40- return self .regex ('/tokensregex' , text , pattern , filter )
39+ def tokensregex (self , text , pattern , filter , properties = None ):
40+ return self .regex ('/tokensregex' , text , pattern , filter , properties )
4141
42- def semgrex (self , text , pattern , filter ):
43- return self .regex ('/semgrex' , text , pattern , filter )
42+ def semgrex (self , text , pattern , filter , properties = None ):
43+ return self .regex ('/semgrex' , text , pattern , filter , properties )
4444
45- def regex (self , endpoint , text , pattern , filter ):
45+ def regex (self , endpoint , text , pattern , filter , properties = None ):
46+ assert isinstance (text , str )
47+ data = text .encode ()
4648 r = requests .get (
4749 self .server_url + endpoint , params = {
48- 'pattern' : pattern ,
50+ 'pattern' : pattern ,
51+ 'properties' : str (properties or {}),
4952 'filter' : filter
50- }, data = text )
53+ }, data = data )
54+ r .encoding = 'utf-8'
5155 output = r .text
5256 try :
53- output = json .loads (r .text )
57+ output = json .loads (r .text , encoding = 'utf-8' , strict = True )
5458 except :
5559 pass
5660 return output
0 commit comments