-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
136 lines (92 loc) · 4.31 KB
/
main.py
File metadata and controls
136 lines (92 loc) · 4.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import argparse
import concurrent.futures
import requests
import logging
from bs4 import BeautifulSoup
from proxy import Proxy
from utils import ProxyTypeMapper, AnonymityTypeMapper, args_to_params
import multiprocessing
# Argument parser setup
parser = argparse.ArgumentParser(description='Parse proxies from https://hidemy.name/')
parser.add_argument('file', help='File path for saving proxies')
parser.add_argument('-t', '--types', nargs='+', dest='type', choices=['http', 'https', 'socks4', 'socks5'],
help='Proxies\' types list (http, https, socks4 and/or socks5)', default='hs',
metavar='http, https, socks4, socks5', action=ProxyTypeMapper)
parser.add_argument('-a', '--anon', nargs='+', dest='anon', choices=['high', 'avg', 'low'],
help='Proxy anonymity scale (high, avg, low)', metavar='high, avg, low', action=AnonymityTypeMapper)
parser.add_argument('-p', '--ports', nargs='+', dest='ports', help='Proxies ports')
parser.add_argument('-v', '--validate', help='Invalid proxies will be skipped', action='store_true')
parser.add_argument('-T', '--timeout', '--time', type=int, default=2,
help='Max time for validating a proxy. Not needed if -v (validate) flag is not specified.')
parser.add_argument('-l', '--logging', type=str,
help='Path to save a logging file. If not specified the logging file won\'t be created.')
args = parser.parse_args().__dict__
logging_file = args.pop('logging')
# Logger setup
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.DEBUG)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if logging_file:
file_handler = logging.FileHandler(logging_file)
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
filename = args.pop('file')
to_validate = args.pop('validate')
timeout = args.pop('timeout')
params = args_to_params(args)
base_url = 'https://hidemy.name'
base_uri = base_url + '/en/proxy-list/' + params
next_page = base_uri
def get_proxies(html):
proxies_html = soup.find('tbody').find_all('tr')
proxies_list = []
for proxy_html in proxies_html:
ip = proxy_html.find_next('td')
port = ip.find_next_sibling()
types = port.find_all_next('td')[2]
proxy = Proxy(ip.text, port.text, types.text)
proxies_list.append(proxy)
if not to_validate:
logger.info(f'{proxy.type.name} proxy has been successfully parsed {proxy}')
if to_validate:
proxies_list = validate_proxies(proxies_list)
return proxies_list
def get_next_page(html):
last_button = soup.find('div', {'class': 'pagination'}).find('ul').find_all('li')[-1]
# If last button is the page we are in
if last_button['class'][0] == 'active':
return None
return base_url + last_button.find('a')['href']
def validate_proxies(proxies_list):
def is_valid(proxy):
req_proxies = {
'http': f'{proxy.type}://{proxy}',
'https': f'{proxy.type}://{proxy}'
}
try:
requests.get('https://icanhazip.com/', proxies=req_proxies, timeout=timeout)
logger.info(f'{proxy.type.name} proxy {proxy} is valid!')
valid_proxies.append(proxy)
except Exception:
logger.info(f'{proxy.type.name} proxy {proxy} is invalid!')
valid_proxies = multiprocessing.Manager().list()
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(is_valid, proxies_list)
return valid_proxies
def proxies_to_file(filepath, proxies_list):
with open(filepath, 'a') as f:
if len(proxies_list):
f.write("\n".join(map(lambda p: p.__str__(), proxies_list)) + "\n")
if __name__ == '__main__':
while next_page:
response = requests.get(next_page, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'})
soup = BeautifulSoup(response.text, 'html.parser')
proxies = get_proxies(soup)
next_page = get_next_page(soup)
proxies_to_file(filename, proxies)