From 8e2322b1ed221c04c201b4291307f2291ca8dd26 Mon Sep 17 00:00:00 2001 From: Yoyo Date: Sat, 24 Jan 2015 16:06:05 +0000 Subject: [PATCH 1/5] Fix up parsing to match up with small site redesign and update a coupe of providers. --- Contents/Code/Parsing.py | 5 +- Contents/Code/__init__.py | 25 ++- Contents/Services/ServiceInfo.plist | 2 +- .../Services/URL/HugeFiles/ServiceCode.pys | 178 +++++++++++++++++- .../Services/URL/IceFilms/ServiceCode.pys | 29 +-- Contents/Services/URL/MovReel/ServiceCode.pys | 137 +++++++++++++- 6 files changed, 357 insertions(+), 19 deletions(-) diff --git a/Contents/Code/Parsing.py b/Contents/Code/Parsing.py index 2accbba..2caef36 100644 --- a/Contents/Code/Parsing.py +++ b/Contents/Code/Parsing.py @@ -177,7 +177,8 @@ def GetSources(url): # Extract out provider name from source. if (providerElem.span): - provider = providerElem.span["title"][len("Hosted By "):] + provider = "".join(map(lambda x: str(x).title() if str(x).isupper() else str(x), providerElem.span.findAll(text=True))) + #Log(provider) else: provider = providerElem.img["title"][len("Hosted By "):] @@ -332,7 +333,7 @@ def GetMediaInfo(url, mediainfo, query_external=False): imdb_id = mediainfo.id else: soup = BeautifulSoup(HTTP.Request(ICEFILMS_URL + url).content) - imdb_link = soup.find('a','iframe')['href'] + imdb_link = soup.find('a','NOiframe')['href'] imdb_id = re.search("(tt\d+)", str(imdb_link)).group() if (query_external): diff --git a/Contents/Code/__init__.py b/Contents/Code/__init__.py index 5948b14..97c6b10 100644 --- a/Contents/Code/__init__.py +++ b/Contents/Code/__init__.py @@ -6,6 +6,7 @@ import copy import sys import base64 +import md5 from datetime import date, datetime, timedelta from dateutil import tz @@ -1246,7 +1247,7 @@ def CaptchaRequiredMenu(mediainfo, source_item, url, parent_name=None, replace_p tagline="This provider requires that you solve this Captcha.", summary="This provider requires that you solve this Captcha.", thumb=PLUGIN_URL + "/proxy?" + urllib.urlencode({'url':captcha_img_URL}), - art=mediainfo.background, + art=PLUGIN_URL + "/proxy?" + urllib.urlencode({'url':captcha_img_URL}), ) ) @@ -1301,7 +1302,7 @@ def CaptchaProcessMenu(query, mediainfo, source_item, url, solve_captcha_url, pa return oc # Utility methods for captchas. All requests in the Captcha cycle must come from the same User-Agent -# If just let the clients load the Captcha image, we get different User-Agents. Some us libcurl and +# If just let the clients load the Captcha image, we get different User-Agents. Some use libcurl and # it'd be possible to force a specific user agent using the "url|extraparams" notation, however some # clients use the transcoder which does it's own separate thing and doesn't understand libcurl params. # So, instead, we rewrite the Captcha's image URL to pass through this, so we can forcibly set @@ -1312,8 +1313,26 @@ def CaptchaProcessMenu(query, mediainfo, source_item, url, solve_captcha_url, pa def Proxy(url): #Log(url) - return HTTP.Request(url,headers={'User-Agent':USER_AGENT}).content + key = "CAPTCHA-" + md5.new(url).hexdigest() + + #Log("WAITING " + key); + Thread.AcquireLock(key) + try: + if (not Data.Exists(key)): + #Log("REQUESTING CAPTCHA") + captcha = HTTP.Request(url,headers={'User-Agent':USER_AGENT}, cacheTime=10).content + #Log("SAVING CAPTCHA") + Data.Save(key, captcha) + #Log("SLEEPING") + time.sleep(10) + except Exception, ex: + pass + + #Log("UNBLOCKING " + key); + Thread.ReleaseLock(key) + + return Data.Load(key) #################################################################################################### def SearchResultsMenu(query, type, parent_name=None): diff --git a/Contents/Services/ServiceInfo.plist b/Contents/Services/ServiceInfo.plist index 37d2c48..e534de1 100644 --- a/Contents/Services/ServiceInfo.plist +++ b/Contents/Services/ServiceInfo.plist @@ -1 +1 @@ - URL IceFilms URLPatterns (external|captcha)://icefilms\.info/\d+/\d+ play://icefilms\.info/ providerinfo://icefilms/.* http://providerinfo.icefilms/.* ShareBees URLPatterns http://(www\.)?sharebees.com providerinfo://sharebees/.*(icefilms) RapidShare URLPatterns https?://(www\.)?rapidshare.com providerinfo://rapidshare/.*(icefilms) BillionUploads URLPatterns http://(www\.)?billionuploads.com providerinfo://billionuploads/.*(icefilms) 2Shared URLPatterns http://(www\.)?2shared.com providerinfo://2shared/.*(icefilms) 180Upload URLPatterns (captcha|solve|play)://(www\.)?(180Upload.com|epicshare.net)/ providerinfo://(www\.)?(180upload(\.com)?|epicshare(\.net)?)/.*(icefilms) MegaRelease URLPatterns (captcha|solve|play)://(www\.)?(megarelease\.org|lemuploads\.com)/ providerinfo://(www\.)?(megarelease(\.org)?|lemuploads(\.com)?)/.*(icefilms) MovReel URLPatterns http://(www\.)?movreel.com providerinfo://movreel/.*(icefilms) VidHog URLPatterns http://(www\.)?(vidhog)\.(com|net)/[\d\w]{8,} providerinfo://vidhog/.*(icefilms) HugeFiles URLPatterns http://(www\.)?hugefiles\.net providerinfo://hugefiles/.*(icefilms) \ No newline at end of file + URL IceFilms URLPatterns (external|captcha)://icefilms\.info/\d+/\d+ play://icefilms\.info/ providerinfo://icefilms/.* http://providerinfo.icefilms/.* ShareBees URLPatterns http://(www\.)?sharebees.com providerinfo://sharebees/.*(icefilms) RapidShare URLPatterns https?://(www\.)?rapidshare.com providerinfo://rapidshare/.*(icefilms) BillionUploads URLPatterns http://(www\.)?billionuploads.com providerinfo://billionuploads/.*(icefilms) 2Shared URLPatterns http://(www\.)?2shared.com providerinfo://2shared/.*(icefilms) 180Upload URLPatterns (captcha|solve|play)://(www\.)?(180Upload.com|epicshare.net)/ providerinfo://(www\.)?(180upload(\.com)?|epicshare(\.net)?)/.*(icefilms) MegaRelease URLPatterns (captcha|solve|play)://(www\.)?(megarelease\.org|lemuploads\.com)/ providerinfo://(www\.)?(megarelease(\.org)?|lemuploads(\.com)?)/.*(icefilms) MovReel URLPatterns http://(www\.)?movreel.com providerinfo://movreel/.*(icefilms) VidHog URLPatterns http://(www\.)?(vidhog)\.(com|net)/[\d\w]{8,} providerinfo://vidhog/.*(icefilms) HugeFiles URLPatterns (captcha|solve|play)://(www\.)?hugefiles\.net providerinfo://hugefiles/.*(icefilms) \ No newline at end of file diff --git a/Contents/Services/URL/HugeFiles/ServiceCode.pys b/Contents/Services/URL/HugeFiles/ServiceCode.pys index e17c09b..a610f9d 100644 --- a/Contents/Services/URL/HugeFiles/ServiceCode.pys +++ b/Contents/Services/URL/HugeFiles/ServiceCode.pys @@ -1 +1,177 @@ -import re, string from BeautifulSoup import BeautifulSoup USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22' def NormalizeURL(url): #Log("*********** In HugeFiles NormalizeURL") # Deal with special providerInfo URL built up by IceFilms plugin to return # info about this provider. For all other normal URLs, do nothing. if ("providerinfo" in url): try: show = Prefs["show_hugefiles"] except Exception, ex: show = True if (show): return url + "&visible=true" else: return url else: return url def MetadataObjectForURL(url): #Log('In MetadataObjectForURL for HugeFiles (' + url + ')') video = VideoClipObject( title = 'HugeFiles Redirect Page', summary = 'HugeFiles Redirect Page', thumb = None, ) return video def MediaObjectsForURL(url): Log("------------------------------------------------------------------------------") Log("In MediaObjectsForURL for HugeFiles. (%s)" % url) ret = [] ret.append( MediaObject( parts = [PartObject(key=Callback(PlayVideo, url=url))], ) ) return ret @indirect def PlayVideo(url): # Request initial page... #Log("------------------------------------------------------------------------------") #Log("Requesting: " + url) soup = BeautifulSoup(HTTP.Request(url).content) #Log(str(soup)) # Deal with initial page. # Extract out these form elements... formElems = ['op', 'id', 'fname', 'method_free', 'referer', 'usr_login'] params = {} for formElem in formElems: formElemVal = soup.find('input', {'name' : formElem })['value'] params[formElem] = formElemVal # Submit form with extracted elements. #Log("****************************************************************************") #Log("Requesting: " + url) #Log("Params: " + str(params)) soup = BeautifulSoup(HTTP.Request(url, values=params).content) #Log(str(soup)) # Unpack player code and turn it into soup. player_code = Unpack(str(soup.find('div',{ 'id': 'player_code' }))) if ('new SWFObject' in player_code): final_url = re.search("\\\\'file\\\\',\\\\'([^\\\\]*)",player_code).group(1) else: soup = BeautifulSoup(re.search('(<.*>)', player_code).group(1)) final_url = soup.find('param',{'name': 'src'})['value'] Log("final url: " + final_url) return ObjectContainer( objects = [ VideoClipObject( items = [ MediaObject( parts = [PartObject(key=final_url)], ) ] ) ] ) return ret def LogProviderError(msg="", ex=None): Log("************************** PROVIDER ERROR: " + msg) raise Exception(msg) return [] def Unpack(script): if script is None: return #Log(script) # Look for string to unpack. val_to_unpack = re.search("return p}\('(.*)',\d*,\d*,'", script) if (val_to_unpack is None or val_to_unpack.group(1) is None): return None # Look for substitution values. sub_vals = re.search("\d{2},'([^']*)'.split", script) if (sub_vals is None): return None val_to_unpack = val_to_unpack.group(1) sub_vals = sub_vals.group(1).split('|') #Log(val_to_unpack) #Log(sub_vals) # Create dict to map url sub keys to sub values. alphadict = dict() for index_cnt in range(0, 2): index = index_cnt * len(string.digits + string.ascii_lowercase) strindex = str(index_cnt) if index_cnt > 0 else "" for cnt in range(0, len(string.digits + string.ascii_lowercase)): alphadict[strindex + (string.digits + string.ascii_lowercase)[cnt]] = cnt + index def SubElem(matchObj): val = sub_vals[alphadict[matchObj.group(0)]] if (val == ""): val = matchObj.group(0) return val # Sub values into string to unpack return re.sub("[0-9a-z]{1,2}", SubElem, val_to_unpack) \ No newline at end of file +import re, string +import urllib, urlparse, cgi + +from BeautifulSoup import BeautifulSoup + +USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22' + +def NormalizeURL(url): + + #Log("*********** In HugeFiles NormalizeURL") + + # Deal with special providerInfo URL built up by IceFilms plugin to return + # info about this provider. For all other normal URLs, do nothing. + if ("providerinfo" in url): + + try: + show = Prefs["show_hugefiles"] + except Exception, ex: + show = True + + if (show): + return url + "&visible=true&captcha=true" + else: + return url + + else: + return url + +def MetadataObjectForURL(url): + + #Log('In MetadataObjectForURL for HugeFiles (' + url + ')') + + video = VideoClipObject( + title = 'HugeFiles Redirect Page', + summary = 'HugeFiles Redirect Page', + thumb = None, + ) + + return video + +def MediaObjectsForURL(url): + + #Log(url) + + # Somebody knows we're a complicated beast! They've asked us to return the url of the Captcha + # image as well as a URL to submit to get past the captcha. + if ("captcha://" in url): + + url = url.replace("captcha://", "http://") + + html = HTTP.Request(url, cacheTime=0, headers={'User-Agent':USER_AGENT}).content + soup = BeautifulSoup(html); + + data = {} + r = soup.findAll('input', { 'type': "hidden", 'name': re.compile(".+"), 'value': re.compile(".*") }) + r = r + soup.findAll('input', { 'type': "submit" }) + + if r: + for node in r: + #Log(node) + data[node['name']] = node['value'] + else: + raise Exception('Unable to resolve Link') + + #Check for SolveMedia Captcha image + solvemedia = re.search('