From dcf1b50d8c208d043d33a9c1f6d14ce3f2455db7 Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Sat, 21 Jun 2014 10:34:47 -0700 Subject: [PATCH 1/7] add Pixorial as a service, set up my own tracker --- single_task.py | 2 +- tinyback/services.py | 53 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/single_task.py b/single_task.py index 197f411..9c4e190 100755 --- a/single_task.py +++ b/single_task.py @@ -24,7 +24,7 @@ import tinyback.tracker username = tmp_dir = None -tracker = "http://tracker.tinyarchive.org/v1/" +tracker = "http://argonath.db48x.net/" for i, value in enumerate(sys.argv): if i == 1: diff --git a/tinyback/services.py b/tinyback/services.py index 0eb05b9..10d151c 100644 --- a/tinyback/services.py +++ b/tinyback/services.py @@ -807,6 +807,56 @@ def yourls_api_url(self): def yourls_url_convert(self): return 36 +class Pixorial(SimpleService): + """ """ + + @property + def charset(self): + return "0123456789abcdefghijklmnopqrstuvwxyz" + + @property + def url(self): + return "http://myhub.pixorial.com/s/" + + @property + def rate_limit(self): + """ + Returns a tuple specifiyng the rate-limit, or None. + + Returns a two-element tuple, with the first element being the number of + requests that are allowed in the timespan denoted by the second element + (in seconds). When there is no rate-limit, simply returns None. + """ + return (20, 1) + + @property + def http_keepalive(self): + """ + Whether to use HTTP persistent connections or not. If set to false, the + connection will be forcibly closed after each request + """ + return False + + def fetch(self, code): + resp = self._http_head(code) + + if resp.status in self.http_status_redirect: + location = resp.getheader("Location") + if location == "http://myhub.pixorial.com/": + raise exceptions.NoRedirectException("Redirected to home page") + if not location: + raise exceptions.ServiceException("No Location header after HTTP status 301") + return location + elif resp.status in self.http_status_no_redirect: + raise exceptions.NoRedirectException() + elif resp.status in self.http_status_code_blocked: + raise exceptions.CodeBlockedException() + elif resp.status in self.http_status_blocked: + raise exceptions.BlockedException() + else: + return self.unexpected_http_status(code, resp) + + _factory_map = { "bitly": Bitly, @@ -822,7 +872,8 @@ def yourls_url_convert(self): "visiblihex": VisibliHex, "visibli": Visibli, "vbly": Vbly, - "arsehat": Arsehat + "arsehat": Arsehat, + "pixorial": Pixorial, } From 9908032972ca9f8b8cef00eb70d77a5eec596877 Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Sat, 21 Jun 2014 10:40:03 -0700 Subject: [PATCH 2/7] update html message for warriors --- pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline.py b/pipeline.py index 8860be7..dfe677b 100755 --- a/pipeline.py +++ b/pipeline.py @@ -33,7 +33,7 @@ project = Project( title = "URLTeam", project_html = """ -

URLTeam Website · Leaderboard

-

The URLTeam is a project to preserve shorturls from various URL shorteners.

+

URLTeam Website · Leaderboard

+

The URLTeam is a project to preserve shorturls from various URL shorteners. This tracker is grabbing links from Pixorial.

""" ) From 6c18abb3f3aff5d05a1dde235612f6765994f180 Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Sat, 5 Jul 2014 11:04:17 -0700 Subject: [PATCH 3/7] add some retries in case submitting to the tracker fails with exponential backoff --- single_task.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/single_task.py b/single_task.py index 9c4e190..1626ff2 100755 --- a/single_task.py +++ b/single_task.py @@ -25,6 +25,7 @@ username = tmp_dir = None tracker = "http://argonath.db48x.net/" +max_submission_retries = 6 for i, value in enumerate(sys.argv): if i == 1: @@ -78,5 +79,16 @@ def emit(self, record): reaper = tinyback.Reaper(task, progress=True) fileobj = reaper.run(tmp_dir) -tracker.put(task, fileobj, username) + +tries = 0 +while tries < max_submission_retries: + try: + tracker.put(task, fileobj, username) + except Exception, e: + wait = 2 ** (tries+1) + logger.warn(e) + if tries < max_submission_retries: + logger.warn("Sleeping for %d seconds..." % wait) + time.sleep(wait) + tries += 1 fileobj.close() From 988613d190ef9dde226799be56939e41c4b6a08d Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Sat, 5 Jul 2014 11:05:30 -0700 Subject: [PATCH 4/7] add a .gitignore file --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f64c2a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +STOP From 0ceec46748a7a27b51fc2b7ab9f704be7e940464 Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Sat, 5 Jul 2014 11:15:11 -0700 Subject: [PATCH 5/7] don't retry after success :P --- single_task.py | 1 + 1 file changed, 1 insertion(+) diff --git a/single_task.py b/single_task.py index 1626ff2..7bba3c7 100755 --- a/single_task.py +++ b/single_task.py @@ -84,6 +84,7 @@ def emit(self, record): while tries < max_submission_retries: try: tracker.put(task, fileobj, username) + break except Exception, e: wait = 2 ** (tries+1) logger.warn(e) From af46e36c82af938630a981890b1f14999d48e59a Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Sat, 5 Jul 2014 16:53:34 -0700 Subject: [PATCH 6/7] cache the dns lookup in the service --- tinyback/services.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tinyback/services.py b/tinyback/services.py index 10d151c..f06deca 100644 --- a/tinyback/services.py +++ b/tinyback/services.py @@ -24,6 +24,7 @@ import socket import urllib import urlparse +import socket import tinyback from tinyback import exceptions @@ -104,11 +105,25 @@ def __init__(self): else: raise ValueError("Unknown scheme %s" % parsed_url.scheme) + pos = parsed_url.netloc.find(':'); + if pos != -1: + self._hostname = parsed_url.netloc[0:pos] + self._port = parsed_url.netloc[pos+1:] + else: + self._hostname = parsed_url.netloc + self._port = None + addr = [addrinfo for addrinfo in socket.getaddrinfo(self._hostname, self._port or 80) + if (addrinfo[0] == socket.AF_INET or addrinfo[0] == socket.AF_INET6) and + isinstance(addrinfo[4][0], basestring)] + if not len(addr): + raise ValueError("Unknown host %s" % parsed_url.netloc) + self._host = addr[0][4][0] + version = platform.python_version_tuple() if int(version[0]) == 2 and int(version[1]) <= 5: - self._conn = klass(parsed_url.netloc) + self._conn = klass(self._host) else: - self._conn = klass(parsed_url.netloc, timeout=30) + self._conn = klass(self._host, timeout=30) def _http_head(self, code): return self._http_fetch(code, "HEAD")[0] @@ -122,6 +137,7 @@ def _http_fetch(self, code, method): headers["Connection"] = "Keep-Alive" else: headers["Connection"] = "close" + headers["Host"] = self._hostname try: self._conn.request(method, self._path + code, headers=headers) From 3d079610e05f2b56299b6ed1730a77dd2c9e586e Mon Sep 17 00:00:00 2001 From: Daniel Brooks Date: Sat, 5 Jul 2014 18:07:15 -0700 Subject: [PATCH 7/7] add t.co and tr.im to the services --- tinyback/__init__.py | 2 +- tinyback/services.py | 49 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/tinyback/__init__.py b/tinyback/__init__.py index d764099..69817f7 100644 --- a/tinyback/__init__.py +++ b/tinyback/__init__.py @@ -25,7 +25,7 @@ from tinyback import exceptions, generators, services -__version__ = "2.12" +__version__ = "2.13" class ServiceTester: diff --git a/tinyback/services.py b/tinyback/services.py index f06deca..1f3c423 100644 --- a/tinyback/services.py +++ b/tinyback/services.py @@ -872,6 +872,53 @@ def fetch(self, code): else: return self.unexpected_http_status(code, resp) +class Twitter(SimpleService): + """ Twitter changes all urls in their short messages to use this shortener. """ + @property + def charset(self): + return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + @property + def url(self): + return "http://t.co/" + @property + def rate_limit(self): + return (20, 1) + @property + def http_keepalive(self): + return False + +class Trim(SimpleService): + """ Twitter changes all urls in their short messages to use this shortener. """ + @property + def charset(self): + return "0123456789abcdefghijklmnopqrstuvwxyz" + @property + def url(self): + return "http://tr.im/" + @property + def rate_limit(self): + return (20, 1) + @property + def http_keepalive(self): + return True + def fetch(self, code): + resp = self._http_head(code) + + if resp.status in self.http_status_redirect: + location = resp.getheader("Location") + if code != "404" and location == "http://tr.im/404": + raise exceptions.NoRedirectException("Redirected to 404 page") + if not location: + raise exceptions.ServiceException("No Location header after HTTP status 301") + return location + elif resp.status in self.http_status_no_redirect: + raise exceptions.NoRedirectException() + elif resp.status in self.http_status_code_blocked: + raise exceptions.CodeBlockedException() + elif resp.status in self.http_status_blocked: + raise exceptions.BlockedException() + else: + return self.unexpected_http_status(code, resp) _factory_map = { @@ -890,6 +937,8 @@ def fetch(self, code): "vbly": Vbly, "arsehat": Arsehat, "pixorial": Pixorial, + "twitter": Twitter, + "trim": Trim, }