diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f64c2a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +STOP diff --git a/pipeline.py b/pipeline.py index 63fd0ce..dfe677b 100755 --- a/pipeline.py +++ b/pipeline.py @@ -33,7 +33,7 @@ project = Project( title = "URLTeam", project_html = """ -

URLTeam Website · Leaderboard

-

The URLTeam is a project to preserve shorturls from various URL shorteners.

+

URLTeam Website · Leaderboard

+

The URLTeam is a project to preserve shorturls from various URL shorteners. This tracker is grabbing links from Pixorial.

""" ) diff --git a/single_task.py b/single_task.py index b3ee853..7bba3c7 100755 --- a/single_task.py +++ b/single_task.py @@ -24,7 +24,8 @@ import tinyback.tracker username = tmp_dir = None -tracker = "http://urlteam.terrywri.st/" +tracker = "http://argonath.db48x.net/" +max_submission_retries = 6 for i, value in enumerate(sys.argv): if i == 1: @@ -78,5 +79,17 @@ def emit(self, record): reaper = tinyback.Reaper(task, progress=True) fileobj = reaper.run(tmp_dir) -tracker.put(task, fileobj, username) + +tries = 0 +while tries < max_submission_retries: + try: + tracker.put(task, fileobj, username) + break + except Exception, e: + wait = 2 ** (tries+1) + logger.warn(e) + if tries < max_submission_retries: + logger.warn("Sleeping for %d seconds..." % wait) + time.sleep(wait) + tries += 1 fileobj.close() diff --git a/tinyback/__init__.py b/tinyback/__init__.py index d764099..69817f7 100644 --- a/tinyback/__init__.py +++ b/tinyback/__init__.py @@ -25,7 +25,7 @@ from tinyback import exceptions, generators, services -__version__ = "2.12" +__version__ = "2.13" class ServiceTester: diff --git a/tinyback/services.py b/tinyback/services.py index 0eb05b9..1f3c423 100644 --- a/tinyback/services.py +++ b/tinyback/services.py @@ -24,6 +24,7 @@ import socket import urllib import urlparse +import socket import tinyback from tinyback import exceptions @@ -104,11 +105,25 @@ def __init__(self): else: raise ValueError("Unknown scheme %s" % parsed_url.scheme) + pos = parsed_url.netloc.find(':'); + if pos != -1: + self._hostname = parsed_url.netloc[0:pos] + self._port = parsed_url.netloc[pos+1:] + else: + self._hostname = parsed_url.netloc + self._port = None + addr = [addrinfo for addrinfo in socket.getaddrinfo(self._hostname, self._port or 80) + if (addrinfo[0] == socket.AF_INET or addrinfo[0] == socket.AF_INET6) and + isinstance(addrinfo[4][0], basestring)] + if not len(addr): + raise ValueError("Unknown host %s" % parsed_url.netloc) + self._host = addr[0][4][0] + version = platform.python_version_tuple() if int(version[0]) == 2 and int(version[1]) <= 5: - self._conn = klass(parsed_url.netloc) + self._conn = klass(self._host) else: - self._conn = klass(parsed_url.netloc, timeout=30) + self._conn = klass(self._host, timeout=30) def _http_head(self, code): return self._http_fetch(code, "HEAD")[0] @@ -122,6 +137,7 @@ def _http_fetch(self, code, method): headers["Connection"] = "Keep-Alive" else: headers["Connection"] = "close" + headers["Host"] = self._hostname try: self._conn.request(method, self._path + code, headers=headers) @@ -807,6 +823,103 @@ def yourls_api_url(self): def yourls_url_convert(self): return 36 +class Pixorial(SimpleService): + """ """ + + @property + def charset(self): + return "0123456789abcdefghijklmnopqrstuvwxyz" + + @property + def url(self): + return "http://myhub.pixorial.com/s/" + + @property + def rate_limit(self): + """ + Returns a tuple specifiyng the rate-limit, or None. + + Returns a two-element tuple, with the first element being the number of + requests that are allowed in the timespan denoted by the second element + (in seconds). When there is no rate-limit, simply returns None. + """ + return (20, 1) + + @property + def http_keepalive(self): + """ + Whether to use HTTP persistent connections or not. If set to false, the + connection will be forcibly closed after each request + """ + return False + + def fetch(self, code): + resp = self._http_head(code) + + if resp.status in self.http_status_redirect: + location = resp.getheader("Location") + if location == "http://myhub.pixorial.com/": + raise exceptions.NoRedirectException("Redirected to home page") + if not location: + raise exceptions.ServiceException("No Location header after HTTP status 301") + return location + elif resp.status in self.http_status_no_redirect: + raise exceptions.NoRedirectException() + elif resp.status in self.http_status_code_blocked: + raise exceptions.CodeBlockedException() + elif resp.status in self.http_status_blocked: + raise exceptions.BlockedException() + else: + return self.unexpected_http_status(code, resp) + +class Twitter(SimpleService): + """ Twitter changes all urls in their short messages to use this shortener. """ + @property + def charset(self): + return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + @property + def url(self): + return "http://t.co/" + @property + def rate_limit(self): + return (20, 1) + @property + def http_keepalive(self): + return False + +class Trim(SimpleService): + """ Twitter changes all urls in their short messages to use this shortener. """ + @property + def charset(self): + return "0123456789abcdefghijklmnopqrstuvwxyz" + @property + def url(self): + return "http://tr.im/" + @property + def rate_limit(self): + return (20, 1) + @property + def http_keepalive(self): + return True + def fetch(self, code): + resp = self._http_head(code) + + if resp.status in self.http_status_redirect: + location = resp.getheader("Location") + if code != "404" and location == "http://tr.im/404": + raise exceptions.NoRedirectException("Redirected to 404 page") + if not location: + raise exceptions.ServiceException("No Location header after HTTP status 301") + return location + elif resp.status in self.http_status_no_redirect: + raise exceptions.NoRedirectException() + elif resp.status in self.http_status_code_blocked: + raise exceptions.CodeBlockedException() + elif resp.status in self.http_status_blocked: + raise exceptions.BlockedException() + else: + return self.unexpected_http_status(code, resp) + _factory_map = { "bitly": Bitly, @@ -822,7 +935,10 @@ def yourls_url_convert(self): "visiblihex": VisibliHex, "visibli": Visibli, "vbly": Vbly, - "arsehat": Arsehat + "arsehat": Arsehat, + "pixorial": Pixorial, + "twitter": Twitter, + "trim": Trim, }