Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.pyc
STOP
4 changes: 2 additions & 2 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
project = Project(
title = "URLTeam",
project_html = """
<h2>URLTeam <span class="links"><a href="http://urlte.am/">Website</a> &middot; <a href="http://urlteam.terrywri.st/">Leaderboard</a></span></h2>
<p>The URLTeam is a project to preserve shorturls from various URL shorteners.</p>
<h2>URLTeam <span class="links"><a href="http://urlte.am/">Website</a> &middot; <a href="http://argonath.db48x.net/">Leaderboard</a></span></h2>
<p>The URLTeam is a project to preserve shorturls from various URL shorteners. This tracker is grabbing links from Pixorial.</p>
"""
)
17 changes: 15 additions & 2 deletions single_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
import tinyback.tracker

username = tmp_dir = None
tracker = "http://urlteam.terrywri.st/"
tracker = "http://argonath.db48x.net/"
max_submission_retries = 6

for i, value in enumerate(sys.argv):
if i == 1:
Expand Down Expand Up @@ -78,5 +79,17 @@ def emit(self, record):

reaper = tinyback.Reaper(task, progress=True)
fileobj = reaper.run(tmp_dir)
tracker.put(task, fileobj, username)

tries = 0
while tries < max_submission_retries:
try:
tracker.put(task, fileobj, username)
break
except Exception, e:
wait = 2 ** (tries+1)
logger.warn(e)
if tries < max_submission_retries:
logger.warn("Sleeping for %d seconds..." % wait)
time.sleep(wait)
tries += 1
fileobj.close()
2 changes: 1 addition & 1 deletion tinyback/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from tinyback import exceptions, generators, services

__version__ = "2.12"
__version__ = "2.13"

class ServiceTester:

Expand Down
122 changes: 119 additions & 3 deletions tinyback/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import socket
import urllib
import urlparse
import socket

import tinyback
from tinyback import exceptions
Expand Down Expand Up @@ -104,11 +105,25 @@ def __init__(self):
else:
raise ValueError("Unknown scheme %s" % parsed_url.scheme)

pos = parsed_url.netloc.find(':');
if pos != -1:
self._hostname = parsed_url.netloc[0:pos]
self._port = parsed_url.netloc[pos+1:]
else:
self._hostname = parsed_url.netloc
self._port = None
addr = [addrinfo for addrinfo in socket.getaddrinfo(self._hostname, self._port or 80)
if (addrinfo[0] == socket.AF_INET or addrinfo[0] == socket.AF_INET6) and
isinstance(addrinfo[4][0], basestring)]
if not len(addr):
raise ValueError("Unknown host %s" % parsed_url.netloc)
self._host = addr[0][4][0]

version = platform.python_version_tuple()
if int(version[0]) == 2 and int(version[1]) <= 5:
self._conn = klass(parsed_url.netloc)
self._conn = klass(self._host)
else:
self._conn = klass(parsed_url.netloc, timeout=30)
self._conn = klass(self._host, timeout=30)

def _http_head(self, code):
return self._http_fetch(code, "HEAD")[0]
Expand All @@ -122,6 +137,7 @@ def _http_fetch(self, code, method):
headers["Connection"] = "Keep-Alive"
else:
headers["Connection"] = "close"
headers["Host"] = self._hostname

try:
self._conn.request(method, self._path + code, headers=headers)
Expand Down Expand Up @@ -807,6 +823,103 @@ def yourls_api_url(self):
def yourls_url_convert(self):
return 36

class Pixorial(SimpleService):
""" """

@property
def charset(self):
return "0123456789abcdefghijklmnopqrstuvwxyz"

@property
def url(self):
return "http://myhub.pixorial.com/s/"

@property
def rate_limit(self):
"""
Returns a tuple specifiyng the rate-limit, or None.

Returns a two-element tuple, with the first element being the number of
requests that are allowed in the timespan denoted by the second element
(in seconds). When there is no rate-limit, simply returns None.
"""
return (20, 1)

@property
def http_keepalive(self):
"""
Whether to use HTTP persistent connections or not. If set to false, the
connection will be forcibly closed after each request
"""
return False

def fetch(self, code):
resp = self._http_head(code)

if resp.status in self.http_status_redirect:
location = resp.getheader("Location")
if location == "http://myhub.pixorial.com/":
raise exceptions.NoRedirectException("Redirected to home page")
if not location:
raise exceptions.ServiceException("No Location header after HTTP status 301")
return location
elif resp.status in self.http_status_no_redirect:
raise exceptions.NoRedirectException()
elif resp.status in self.http_status_code_blocked:
raise exceptions.CodeBlockedException()
elif resp.status in self.http_status_blocked:
raise exceptions.BlockedException()
else:
return self.unexpected_http_status(code, resp)

class Twitter(SimpleService):
""" Twitter changes all urls in their short messages to use this shortener. """
@property
def charset(self):
return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
@property
def url(self):
return "http://t.co/"
@property
def rate_limit(self):
return (20, 1)
@property
def http_keepalive(self):
return False

class Trim(SimpleService):
""" Twitter changes all urls in their short messages to use this shortener. """
@property
def charset(self):
return "0123456789abcdefghijklmnopqrstuvwxyz"
@property
def url(self):
return "http://tr.im/"
@property
def rate_limit(self):
return (20, 1)
@property
def http_keepalive(self):
return True
def fetch(self, code):
resp = self._http_head(code)

if resp.status in self.http_status_redirect:
location = resp.getheader("Location")
if code != "404" and location == "http://tr.im/404":
raise exceptions.NoRedirectException("Redirected to 404 page")
if not location:
raise exceptions.ServiceException("No Location header after HTTP status 301")
return location
elif resp.status in self.http_status_no_redirect:
raise exceptions.NoRedirectException()
elif resp.status in self.http_status_code_blocked:
raise exceptions.CodeBlockedException()
elif resp.status in self.http_status_blocked:
raise exceptions.BlockedException()
else:
return self.unexpected_http_status(code, resp)


_factory_map = {
"bitly": Bitly,
Expand All @@ -822,7 +935,10 @@ def yourls_url_convert(self):
"visiblihex": VisibliHex,
"visibli": Visibli,
"vbly": Vbly,
"arsehat": Arsehat
"arsehat": Arsehat,
"pixorial": Pixorial,
"twitter": Twitter,
"trim": Trim,
}


Expand Down