diff --git a/Contents/Code/Parsing.py b/Contents/Code/Parsing.py index 2accbba..2caef36 100644 --- a/Contents/Code/Parsing.py +++ b/Contents/Code/Parsing.py @@ -177,7 +177,8 @@ def GetSources(url): # Extract out provider name from source. if (providerElem.span): - provider = providerElem.span["title"][len("Hosted By "):] + provider = "".join(map(lambda x: str(x).title() if str(x).isupper() else str(x), providerElem.span.findAll(text=True))) + #Log(provider) else: provider = providerElem.img["title"][len("Hosted By "):] @@ -332,7 +333,7 @@ def GetMediaInfo(url, mediainfo, query_external=False): imdb_id = mediainfo.id else: soup = BeautifulSoup(HTTP.Request(ICEFILMS_URL + url).content) - imdb_link = soup.find('a','iframe')['href'] + imdb_link = soup.find('a','NOiframe')['href'] imdb_id = re.search("(tt\d+)", str(imdb_link)).group() if (query_external): diff --git a/Contents/Code/RecentItems.py b/Contents/Code/RecentItems.py index 5fbb47b..7a05901 100644 --- a/Contents/Code/RecentItems.py +++ b/Contents/Code/RecentItems.py @@ -11,10 +11,10 @@ def __init__(self): def add(self, mediaInfo, providerURLs, path, caller=None): - self.items.append([mediaInfo, providerURLs, path, caller]) + self.items.insert(0, [mediaInfo, providerURLs, path, caller]) while (len(self.items) > 50): - self.items.pop(0) + self.items.pop() def getCaller(self, url): @@ -34,7 +34,7 @@ def getByURL(self, url): result = [elem for elem in self.items if url in elem[1]] if (len(result) > 0): - return [result[-1][0], result[-1][2]] + return [result[0][0], result[0][2]] else: return None diff --git a/Contents/Code/__init__.py b/Contents/Code/__init__.py index 5948b14..2783d93 100644 --- a/Contents/Code/__init__.py +++ b/Contents/Code/__init__.py @@ -6,6 +6,7 @@ import copy import sys import base64 +import md5 from datetime import date, datetime, timedelta from dateutil import tz @@ -1246,7 +1247,7 @@ def CaptchaRequiredMenu(mediainfo, source_item, url, parent_name=None, replace_p tagline="This provider requires that you solve this Captcha.", summary="This provider requires that you solve this Captcha.", thumb=PLUGIN_URL + "/proxy?" + urllib.urlencode({'url':captcha_img_URL}), - art=mediainfo.background, + art=PLUGIN_URL + "/proxy?" + urllib.urlencode({'url':captcha_img_URL}), ) ) @@ -1301,7 +1302,7 @@ def CaptchaProcessMenu(query, mediainfo, source_item, url, solve_captcha_url, pa return oc # Utility methods for captchas. All requests in the Captcha cycle must come from the same User-Agent -# If just let the clients load the Captcha image, we get different User-Agents. Some us libcurl and +# If just let the clients load the Captcha image, we get different User-Agents. Some use libcurl and # it'd be possible to force a specific user agent using the "url|extraparams" notation, however some # clients use the transcoder which does it's own separate thing and doesn't understand libcurl params. # So, instead, we rewrite the Captcha's image URL to pass through this, so we can forcibly set @@ -1312,8 +1313,26 @@ def CaptchaProcessMenu(query, mediainfo, source_item, url, solve_captcha_url, pa def Proxy(url): #Log(url) - return HTTP.Request(url,headers={'User-Agent':USER_AGENT}).content + key = "CAPTCHA-" + md5.new(url).hexdigest() + + #Log("WAITING " + key); + Thread.AcquireLock(key) + try: + if (not Data.Exists(key)): + #Log("REQUESTING CAPTCHA") + captcha = HTTP.Request(url,headers={'User-Agent':USER_AGENT}, cacheTime=10).content + #Log("SAVING CAPTCHA") + Data.Save(key, captcha) + #Log("SLEEPING") + time.sleep(10) + except Exception, ex: + pass + + #Log("UNBLOCKING " + key); + Thread.ReleaseLock(key) + + return Data.Load(key) #################################################################################################### def SearchResultsMenu(query, type, parent_name=None): @@ -2338,7 +2357,7 @@ def GetAdditionalSources(imdb_id, title, year=None, season_num=None, ep_num=None # to let the original plugin know when the user decides to play one of our sources. if ('Referer' in Request.Headers): - match = re.search("/video/([^/]+)/", Request.Headers['Referer']) + match = re.search("/video/([^/]+)", Request.Headers['Referer']) caller = match.group(1) if match else None # Work out what type of search to carry out. @@ -2477,9 +2496,9 @@ def PlaybackStarted(url): # Use the information from the mediainfo to call the PlaybackStarted method of # whatever plugin requested this. - url = PLEX_URL + '/video/%s/playback/external/%s' % (caller, mediainfo['id']) - if (mediainfo['ep_num']): - url += "/%s/%s" % (str(mediainfo['season']), str(mediainfo['ep_num'])) + url = PLEX_URL + '/video/%s/playback/external/%s' % (caller, mediainfo.id) + if (hasattr(mediainfo, 'ep_num') and mediainfo.ep_num is not None): + url += "/%s/%s" % (str(mediainfo.season), str(mediainfo.ep_num)) request = urllib2.Request(url) response = urllib2.urlopen(request) @@ -2512,9 +2531,9 @@ def PlaybackStartedExternal(id, season_num=None, ep_num=None): browsedItems = cerealizer.loads(Data.Load(BROWSED_ITEMS_KEY)) # See if the URL being played is on our recently browsed list. - info = browsedItems.getByID(id, season_num, ep_num) + item = browsedItems.getByID(id, season_num, ep_num) - if (info is None): + if (item is None): Log("****** ERROR: Watching Item which hasn't been browsed to") return "" diff --git a/Contents/Libraries/Shared/xgoogle/search.py b/Contents/Libraries/Shared/xgoogle/search.py index 9dd2d14..0ba817b 100755 --- a/Contents/Libraries/Shared/xgoogle/search.py +++ b/Contents/Libraries/Shared/xgoogle/search.py @@ -50,10 +50,16 @@ def __str__(self): return 'Google Search Result: "%s"' % self.title class GoogleSearch(object): - SEARCH_URL_0 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&btnG=Google+Search&complete=0" - NEXT_PAGE_0 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&start=%(start)d&complete=0" - SEARCH_URL_1 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&btnG=Google+Search&complete=0" - NEXT_PAGE_1 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&start=%(start)d&complete=0" + SEARCH_URL_0 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&btnG=Google+Search" + NEXT_PAGE_0 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&start=%(start)d" + SEARCH_URL_1 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&btnG=Google+Search" + NEXT_PAGE_1 = "http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&start=%(start)d" + + # Use IceFilm's CSE. Prevents problems with query limiting. Other methods have also been adjusted accordingly. + SEARCH_URL_0 = "http://www.google.%(tld)s/cse?hl=%(lang)s&cx=010591583107216882486:bafpv02vxuq&cof=FORID:9&nojs=1&q=%(query)s" + NEXT_PAGE_0 = "http://www.google.%(tld)s/cse?hl=%(lang)s&cx=010591583107216882486:bafpv02vxuq&cof=FORID:9&nojs=1&q=%(query)s&start=%(start)d" + SEARCH_URL_1 = "http://www.google.%(tld)s/cse?hl=%(lang)s&cx=010591583107216882486:bafpv02vxuq&cof=FORID:9&nojs=1&q=%(query)s&num=%(num)d" + NEXT_PAGE_1 = "http://www.google.%(tld)s/cse?hl=%(lang)s&cx=010591583107216882486:bafpv02vxuq&cof=FORID:9&nojs=1&q=%(query)s&num=%(num)d&start=%(start)d" def __init__(self, query, random_agent=False, debug=False, lang="en", tld="com", re_search_strings=None): self.query = query @@ -226,7 +232,7 @@ def _extract_info(self, soup): return {'from': int(matches.group(1)), 'to': int(matches.group(2)), 'total': int(matches.group(3))} def _extract_results(self, soup): - results = soup.findAll('li', {'class': 'g'}) + results = soup.findAll('div', {'class': 'g'}) ret_res = [] for result in results: eres = self._extract_result(result) @@ -250,13 +256,13 @@ def _extract_title_url(self, result): title = ''.join(title_a.findAll(text=True)) title = self._html_unescape(title) url = title_a['href'] - match = re.match(r'/url\?(url|q)=(http[^&]+)&', url) + match = re.match(r'/url\?q=(http[^&]+)&', url) if match: - url = urllib.unquote(match.group(2)) + url = urllib.unquote(match.group(1)) return title, url def _extract_description(self, result): - desc_div = result.find('div', {'class': re.compile(r'\bs\b')}) + desc_div = result.find('span', {'class': re.compile(r'\bs\b')}) if not desc_div: self._maybe_raise(ParseError, "Description tag in Google search result was not found", result) return None diff --git a/Contents/Libraries/Shared/xgoogle/test.py b/Contents/Libraries/Shared/xgoogle/test.py new file mode 100644 index 0000000..a86a185 --- /dev/null +++ b/Contents/Libraries/Shared/xgoogle/test.py @@ -0,0 +1,48 @@ +from search import GoogleSearch +import re + +def GetSearchResults(query=None,type=None, exact=False): + + if (type=="movies"): + # This a google search. The -tv will ommit all TV shows. + search = 'intitle:%s -"Episode List" -"Series Rating"' % (query) + else: + search = 'allintitle:%s "Episode List"' % (query) + + gs = GoogleSearch(search) + gs.results_per_page = 25 + gs.page = 0 + results = gs.get_results() + gs.get_results() + items = [] + + for res in results: + + name = re.sub( + '(||||DivX|-|icefilms(\.info)?|\.\.\.|Episode List|links)', + '', + res.title.encode('utf8') + ).strip() + + url=res.url + video_url = re.search("icefilms\.info(/.*)", url).group(1) + + res = {} + + res['type'] = type + res['title'] = name + + match = re.search("(.*)\((\d*)\)", res['title']) + + if (match): + res['title'] = match.group(1).strip() + res['year'] = int(match.group(2).strip()) + + res['id'] = video_url + + items.append(res) + + return items + +items = GetSearchResults("the", "tv") +print items +print len(items) diff --git a/Contents/Services/ServiceInfo.plist b/Contents/Services/ServiceInfo.plist index 37d2c48..e534de1 100644 --- a/Contents/Services/ServiceInfo.plist +++ b/Contents/Services/ServiceInfo.plist @@ -1 +1 @@ - URL IceFilms URLPatterns (external|captcha)://icefilms\.info/\d+/\d+ play://icefilms\.info/ providerinfo://icefilms/.* http://providerinfo.icefilms/.* ShareBees URLPatterns http://(www\.)?sharebees.com providerinfo://sharebees/.*(icefilms) RapidShare URLPatterns https?://(www\.)?rapidshare.com providerinfo://rapidshare/.*(icefilms) BillionUploads URLPatterns http://(www\.)?billionuploads.com providerinfo://billionuploads/.*(icefilms) 2Shared URLPatterns http://(www\.)?2shared.com providerinfo://2shared/.*(icefilms) 180Upload URLPatterns (captcha|solve|play)://(www\.)?(180Upload.com|epicshare.net)/ providerinfo://(www\.)?(180upload(\.com)?|epicshare(\.net)?)/.*(icefilms) MegaRelease URLPatterns (captcha|solve|play)://(www\.)?(megarelease\.org|lemuploads\.com)/ providerinfo://(www\.)?(megarelease(\.org)?|lemuploads(\.com)?)/.*(icefilms) MovReel URLPatterns http://(www\.)?movreel.com providerinfo://movreel/.*(icefilms) VidHog URLPatterns http://(www\.)?(vidhog)\.(com|net)/[\d\w]{8,} providerinfo://vidhog/.*(icefilms) HugeFiles URLPatterns http://(www\.)?hugefiles\.net providerinfo://hugefiles/.*(icefilms) \ No newline at end of file + URL IceFilms URLPatterns (external|captcha)://icefilms\.info/\d+/\d+ play://icefilms\.info/ providerinfo://icefilms/.* http://providerinfo.icefilms/.* ShareBees URLPatterns http://(www\.)?sharebees.com providerinfo://sharebees/.*(icefilms) RapidShare URLPatterns https?://(www\.)?rapidshare.com providerinfo://rapidshare/.*(icefilms) BillionUploads URLPatterns http://(www\.)?billionuploads.com providerinfo://billionuploads/.*(icefilms) 2Shared URLPatterns http://(www\.)?2shared.com providerinfo://2shared/.*(icefilms) 180Upload URLPatterns (captcha|solve|play)://(www\.)?(180Upload.com|epicshare.net)/ providerinfo://(www\.)?(180upload(\.com)?|epicshare(\.net)?)/.*(icefilms) MegaRelease URLPatterns (captcha|solve|play)://(www\.)?(megarelease\.org|lemuploads\.com)/ providerinfo://(www\.)?(megarelease(\.org)?|lemuploads(\.com)?)/.*(icefilms) MovReel URLPatterns http://(www\.)?movreel.com providerinfo://movreel/.*(icefilms) VidHog URLPatterns http://(www\.)?(vidhog)\.(com|net)/[\d\w]{8,} providerinfo://vidhog/.*(icefilms) HugeFiles URLPatterns (captcha|solve|play)://(www\.)?hugefiles\.net providerinfo://hugefiles/.*(icefilms) \ No newline at end of file diff --git a/Contents/Services/URL/180Upload/ServiceCode.pys b/Contents/Services/URL/180Upload/ServiceCode.pys index 2cec4de..e6e8426 100644 --- a/Contents/Services/URL/180Upload/ServiceCode.pys +++ b/Contents/Services/URL/180Upload/ServiceCode.pys @@ -124,58 +124,22 @@ def MediaObjectsForURL(url): if ('180upload' in url): link = re.search('', html) - final_url = "play://180upload.com/?" + urllib.urlencode({'url':link.group(1)}).replace(" ", "+") + final_url = link.group(1).replace(" ", "+") elif ('epicshare' in url): link = re.search('Regular', html) - final_url = "play://180upload.com/?" + urllib.urlencode({'url':link.group(1)}).replace(" ", "+") + final_url = link.group(1).replace(" ", "+") - return [ - MediaObject( - parts = [ - PartObject( - key=final_url - ) - ], - ) - ] + Log("final url: " + final_url) - elif ('play://' in url): - return [ MediaObject( parts = [ PartObject( - key=Callback(PlayVideo, url=url.replace("play://", "http://")) + key=final_url ) ], ) ] - - - -@indirect -def PlayVideo(url): - - # Extract out video URL. - url_parts = urlparse.urlparse(url) - - # Break down query string. - data = dict(cgi.parse_qsl(url_parts.query)) - final_url = data['url'].replace(" ","+") - - Log(final_url) - - return ObjectContainer( - objects = [ - VideoClipObject( - items = [ - MediaObject( - parts = [PartObject(key=final_url)], - ) - ] - ) - ] - ) def LogProviderError(msg="", ex=None): diff --git a/Contents/Services/URL/HugeFiles/ServiceCode.pys b/Contents/Services/URL/HugeFiles/ServiceCode.pys index e17c09b..a610f9d 100644 --- a/Contents/Services/URL/HugeFiles/ServiceCode.pys +++ b/Contents/Services/URL/HugeFiles/ServiceCode.pys @@ -1 +1,177 @@ -import re, string from BeautifulSoup import BeautifulSoup USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22' def NormalizeURL(url): #Log("*********** In HugeFiles NormalizeURL") # Deal with special providerInfo URL built up by IceFilms plugin to return # info about this provider. For all other normal URLs, do nothing. if ("providerinfo" in url): try: show = Prefs["show_hugefiles"] except Exception, ex: show = True if (show): return url + "&visible=true" else: return url else: return url def MetadataObjectForURL(url): #Log('In MetadataObjectForURL for HugeFiles (' + url + ')') video = VideoClipObject( title = 'HugeFiles Redirect Page', summary = 'HugeFiles Redirect Page', thumb = None, ) return video def MediaObjectsForURL(url): Log("------------------------------------------------------------------------------") Log("In MediaObjectsForURL for HugeFiles. (%s)" % url) ret = [] ret.append( MediaObject( parts = [PartObject(key=Callback(PlayVideo, url=url))], ) ) return ret @indirect def PlayVideo(url): # Request initial page... #Log("------------------------------------------------------------------------------") #Log("Requesting: " + url) soup = BeautifulSoup(HTTP.Request(url).content) #Log(str(soup)) # Deal with initial page. # Extract out these form elements... formElems = ['op', 'id', 'fname', 'method_free', 'referer', 'usr_login'] params = {} for formElem in formElems: formElemVal = soup.find('input', {'name' : formElem })['value'] params[formElem] = formElemVal # Submit form with extracted elements. #Log("****************************************************************************") #Log("Requesting: " + url) #Log("Params: " + str(params)) soup = BeautifulSoup(HTTP.Request(url, values=params).content) #Log(str(soup)) # Unpack player code and turn it into soup. player_code = Unpack(str(soup.find('div',{ 'id': 'player_code' }))) if ('new SWFObject' in player_code): final_url = re.search("\\\\'file\\\\',\\\\'([^\\\\]*)",player_code).group(1) else: soup = BeautifulSoup(re.search('(<.*>)', player_code).group(1)) final_url = soup.find('param',{'name': 'src'})['value'] Log("final url: " + final_url) return ObjectContainer( objects = [ VideoClipObject( items = [ MediaObject( parts = [PartObject(key=final_url)], ) ] ) ] ) return ret def LogProviderError(msg="", ex=None): Log("************************** PROVIDER ERROR: " + msg) raise Exception(msg) return [] def Unpack(script): if script is None: return #Log(script) # Look for string to unpack. val_to_unpack = re.search("return p}\('(.*)',\d*,\d*,'", script) if (val_to_unpack is None or val_to_unpack.group(1) is None): return None # Look for substitution values. sub_vals = re.search("\d{2},'([^']*)'.split", script) if (sub_vals is None): return None val_to_unpack = val_to_unpack.group(1) sub_vals = sub_vals.group(1).split('|') #Log(val_to_unpack) #Log(sub_vals) # Create dict to map url sub keys to sub values. alphadict = dict() for index_cnt in range(0, 2): index = index_cnt * len(string.digits + string.ascii_lowercase) strindex = str(index_cnt) if index_cnt > 0 else "" for cnt in range(0, len(string.digits + string.ascii_lowercase)): alphadict[strindex + (string.digits + string.ascii_lowercase)[cnt]] = cnt + index def SubElem(matchObj): val = sub_vals[alphadict[matchObj.group(0)]] if (val == ""): val = matchObj.group(0) return val # Sub values into string to unpack return re.sub("[0-9a-z]{1,2}", SubElem, val_to_unpack) \ No newline at end of file +import re, string +import urllib, urlparse, cgi + +from BeautifulSoup import BeautifulSoup + +USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22' + +def NormalizeURL(url): + + #Log("*********** In HugeFiles NormalizeURL") + + # Deal with special providerInfo URL built up by IceFilms plugin to return + # info about this provider. For all other normal URLs, do nothing. + if ("providerinfo" in url): + + try: + show = Prefs["show_hugefiles"] + except Exception, ex: + show = True + + if (show): + return url + "&visible=true&captcha=true" + else: + return url + + else: + return url + +def MetadataObjectForURL(url): + + #Log('In MetadataObjectForURL for HugeFiles (' + url + ')') + + video = VideoClipObject( + title = 'HugeFiles Redirect Page', + summary = 'HugeFiles Redirect Page', + thumb = None, + ) + + return video + +def MediaObjectsForURL(url): + + #Log(url) + + # Somebody knows we're a complicated beast! They've asked us to return the url of the Captcha + # image as well as a URL to submit to get past the captcha. + if ("captcha://" in url): + + url = url.replace("captcha://", "http://") + + html = HTTP.Request(url, cacheTime=0, headers={'User-Agent':USER_AGENT}).content + soup = BeautifulSoup(html); + + data = {} + r = soup.findAll('input', { 'type': "hidden", 'name': re.compile(".+"), 'value': re.compile(".*") }) + r = r + soup.findAll('input', { 'type': "submit" }) + + if r: + for node in r: + #Log(node) + data[node['name']] = node['value'] + else: + raise Exception('Unable to resolve Link') + + #Check for SolveMedia Captcha image + solvemedia = re.search('