From 8fbe99236e06eafe6e330c0627de2fdb549e1e3a Mon Sep 17 00:00:00 2001 From: David Ryskalczyk Date: Thu, 21 Dec 2017 09:54:27 -0500 Subject: [PATCH] Add global socks proxy support --- tests/single-threaded-proxy.py | 2 ++ warcprox/main.py | 5 +++++ warcprox/mitmproxy.py | 11 ++++++++++- warcprox/warcproxy.py | 9 +++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/single-threaded-proxy.py b/tests/single-threaded-proxy.py index 1c176a27..ba886854 100755 --- a/tests/single-threaded-proxy.py +++ b/tests/single-threaded-proxy.py @@ -65,6 +65,8 @@ def parse_args(): help='where to store and load generated certificates') arg_parser.add_argument('--onion-tor-socks-proxy', dest='onion_tor_socks_proxy', default=None, help='host:port of tor socks proxy, used only to connect to .onion sites') + arg_parser.add_argument('--global-socks-proxy', dest='global_socks_proxy', + default=None, help='host:port of socks proxy, used to connect to all sites (overrides --onion-tor-socks-proxy') arg_parser.add_argument('--version', action='version', version="warcprox {}".format(warcprox.__version__)) arg_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true') diff --git a/warcprox/main.py b/warcprox/main.py index 6140f728..fd62fdd8 100644 --- a/warcprox/main.py +++ b/warcprox/main.py @@ -154,6 +154,11 @@ def _build_arg_parser(prog): default=None, help=( 'host:port of tor socks proxy, used only to connect to ' '.onion sites')) + arg_parser.add_argument( + '--global-socks-proxy', dest='global_socks_proxy', + default=None, help=( + 'host:port of socks proxy, used to connect to ' + 'all sites (overrides --onion-tor-socks-proxy)')) arg_parser.add_argument( '--crawl-log-dir', dest='crawl_log_dir', default=None, help=( 'if specified, write crawl log files in the specified ' diff --git a/warcprox/mitmproxy.py b/warcprox/mitmproxy.py index 8d950fac..33bf3340 100644 --- a/warcprox/mitmproxy.py +++ b/warcprox/mitmproxy.py @@ -232,7 +232,16 @@ def _determine_host_port(self): def _connect_to_remote_server(self): # Connect to destination - if self.onion_tor_socks_proxy_host and self.hostname.endswith('.onion'): + if self.global_socks_proxy_host: + self.logger.info( + "using socks proxy at %s:%s to connect to %s", + self.global_socks_proxy_host, + self.global_socks_proxy_port or 1080, self.hostname) + self._remote_server_sock = socks.socksocket() + self._remote_server_sock.set_proxy( + socks.SOCKS5, addr=self.global_socks_proxy_host, + port=self.global_socks_proxy_port, rdns=True) + elif self.onion_tor_socks_proxy_host and self.hostname.endswith('.onion'): self.logger.info( "using tor socks proxy at %s:%s to connect to %s", self.onion_tor_socks_proxy_host, diff --git a/warcprox/warcproxy.py b/warcprox/warcproxy.py index 12aac745..ca5037ec 100644 --- a/warcprox/warcproxy.py +++ b/warcprox/warcproxy.py @@ -395,6 +395,15 @@ def __init__( WarcProxyHandler.onion_tor_socks_proxy_host = options.onion_tor_socks_proxy WarcProxyHandler.onion_tor_socks_proxy_port = None + if options.global_socks_proxy: + try: + host, port = options.global_socks_proxy.split(':') + WarcProxyHandler.global_socks_proxy_host = host + WarcProxyHandler.global_socks_proxy_port = int(port) + except ValueError: + WarcProxyHandler.global_socks_proxy_host = options.global_socks_proxy + WarcProxyHandler.global_socks_proxy_port = None + http_server.HTTPServer.__init__( self, server_address, WarcProxyHandler, bind_and_activate=True)