From 8999f6f1a291ed008d219c9e2c05232c7389e3e1 Mon Sep 17 00:00:00 2001 From: eneq123 Date: Wed, 8 May 2013 17:56:46 +0400 Subject: [PATCH] moving loadData() and pre-compile regexp on init stage is more efficient --- uasparser/__init__.py | 61 +++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/uasparser/__init__.py b/uasparser/__init__.py index 2d247e1..b5b0bda 100644 --- a/uasparser/__init__.py +++ b/uasparser/__init__.py @@ -64,6 +64,9 @@ class UASparser(object): cache_dir = '' cache_data = None update_interval = 3600 * 24 * 10 # 10 days + data = '' + browser_reg = [] + os_reg = [] def __init__(self, cache_dir=None): """ @@ -76,6 +79,25 @@ def __init__(self, cache_dir=None): self.cache_file_name = os.path.join(self.cache_dir, self.cache_file_name) + #Load cache data + self.data = self.loadData() + + for index in self.data['browser_reg']['order']: + bdict = {} + test = self.data['browser_reg'][index] + test_rg = self.toPythonReg(test[0]) + bdict['r'] = test_rg + bdict['id'] = test[1] + self.browser_reg.append(bdict) + + for index in self.data['os_reg']: + bdict = {} + test = self.data['os_reg'][index] + test_rg = self.toPythonReg(test[0]) + bdict['r'] = test_rg + bdict['id'] = test[1] + self.os_reg.append(bdict) + def parse(self, useragent, entire_url=''): """ Get the information of an useragent string @@ -108,22 +130,11 @@ def parse(self, useragent, entire_url=''): if 'os_icon' in entire_url: ret['os_icon'] = self.os_img_url % ret['os_icon'] - def toPythonReg(reg): - reg_l = reg[1:reg.rfind('/')] # modify the re into python format - reg_r = reg[reg.rfind('/') + 1:] - flag = 0 - if 's' in reg_r: - flag = flag | re.S - if 'i' in reg_r: - flag = flag | re.I - return re.compile(reg_l, flag) - #Check argument if not useragent: raise UASException("Excepted argument useragent is not given.") - #Load cache data - data = self.loadData() + data = self.data #Is it a spider? for index in data['robots']['order']: @@ -146,11 +157,10 @@ def toPythonReg(reg): #A browser id_browser = None - for index in data['browser_reg']['order']: - test = data['browser_reg'][index] - test_rg = toPythonReg(test[0]).findall(useragent) # All regular expression should be in python format + for reg in self.browser_reg: + test_rg = reg['r'].findall(useragent) if test_rg: - id_browser = int(test[1]) # Bingo + id_browser = int(reg['id']) # Bingo info = test_rg[0] break @@ -190,11 +200,10 @@ def toPythonReg(reg): # Try to match an OS os_id = None - for index in data['os_reg']: - test = data['os_reg'][index] - test_rg = toPythonReg(test[0]).findall(useragent) + for reg in self.os_reg: + test_rg = reg['r'].findall(useragent) if test_rg: - os_id = int(test[1]) + os_id = int(reg['id']) break # Get OS detail @@ -299,3 +308,15 @@ def loadData(self): self.cache_data = pickle.load(open(self.cache_file_name, 'rb')) return self.cache_data + + def toPythonReg(self,reg): + reg = str(reg) + reg_l = reg[1:reg.rfind('/')] # modify the re into python format + reg_r = reg[reg.rfind('/') + 1:] + flag = 0 + if 's' in reg_r: + flag = flag | re.S + if 'i' in reg_r: + flag = flag | re.I + return re.compile(reg_l, flag) +