forked from iw4p/proxy-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
proxyChecker.py
136 lines (116 loc) · 4.52 KB
/
proxyChecker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import argparse
import random
import re
import socket
import threading
import urllib.request
from time import time
import socks
user_agents = []
with open("user_agents.txt", "r") as f:
for line in f:
user_agents.append(line.replace("\n", ""))
class Proxy:
def __init__(self, method, proxy):
if method.lower() not in ["http", "https", "socks4", "socks5"]:
raise NotImplementedError("Only HTTP, HTTPS, SOCKS4, and SOCKS5 are supported")
self.method = method.lower()
self.proxy = proxy
def is_valid(self):
return re.match(r"\d{1,3}(?:\.\d{1,3}){3}(?::\d{1,5})?$", self.proxy)
def check(self, site, timeout, user_agent, verbose):
if self.method in ["socks4", "socks5"]:
socks.set_default_proxy(socks.SOCKS4 if self.method == "socks4" else socks.SOCKS5,
self.proxy.split(':')[0], int(self.proxy.split(':')[1]))
socket.socket = socks.socksocket
try:
start_time = time()
urllib.request.urlopen(site, timeout=timeout)
end_time = time()
time_taken = end_time - start_time
verbose_print(verbose, f"Proxy {self.proxy} is valid, time taken: {time_taken}")
return True, time_taken, None
except Exception as e:
verbose_print(verbose, f"Proxy {self.proxy} is not valid, error: {str(e)}")
return False, 0, e
else:
url = self.method + "://" + self.proxy
proxy_support = urllib.request.ProxyHandler({self.method: url})
opener = urllib.request.build_opener(proxy_support)
urllib.request.install_opener(opener)
req = urllib.request.Request(self.method + "://" + site)
req.add_header("User-Agent", user_agent)
try:
start_time = time()
urllib.request.urlopen(req, timeout=timeout)
end_time = time()
time_taken = end_time - start_time
verbose_print(verbose, f"Proxy {self.proxy} is valid, time taken: {time_taken}")
return True, time_taken, None
except Exception as e:
verbose_print(verbose, f"Proxy {self.proxy} is not valid, error: {str(e)}")
return False, 0, e
def __str__(self):
return self.proxy
def verbose_print(verbose, message):
if verbose:
print(message)
def check(file, timeout, method, site, verbose, random_user_agent):
proxies = []
with open(file, "r") as f:
for line in f:
proxies.append(Proxy(method, line.replace("\n", "")))
print(f"Checking {len(proxies)} proxies")
proxies = filter(lambda x: x.is_valid(), proxies)
valid_proxies = []
user_agent = random.choice(user_agents)
def check_proxy(proxy, user_agent):
new_user_agent = user_agent
if random_user_agent:
new_user_agent = random.choice(user_agents)
valid, time_taken, error = proxy.check(site, timeout, new_user_agent, verbose)
valid_proxies.extend([proxy] if valid else [])
threads = []
for proxy in proxies:
t = threading.Thread(target=check_proxy, args=(proxy, user_agent))
threads.append(t)
for t in threads:
t.start()
for t in threads:
t.join()
with open(file, "w") as f:
for proxy in valid_proxies:
f.write(str(proxy) + "\n")
print(f"Found {len(valid_proxies)} valid proxies")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-t",
"--timeout",
type=int,
help="Dismiss the proxy after -t seconds",
default=20,
)
parser.add_argument("-p", "--proxy", help="Check HTTPS, HTTP, SOCKS4, or SOCKS5 proxies", default="http")
parser.add_argument("-l", "--list", help="Path to your proxy list file", default="output.txt")
parser.add_argument(
"-s",
"--site",
help="Check with specific website like google.com",
default="https://google.com/",
)
parser.add_argument(
"-v",
"--verbose",
help="Increase output verbosity",
action="store_true",
)
parser.add_argument(
"-r",
"--random_agent",
help="Use a random user agent per proxy",
action="store_true",
)
args = parser.parse_args()
check(file=args.list, timeout=args.timeout, method=args.proxy, site=args.site, verbose=args.verbose,
random_user_agent=args.random_agent)