From 6a7821940f1d02aa40c4e9c28d88fc017dfaeed5 Mon Sep 17 00:00:00 2001 From: yj <> Date: Sun, 17 Mar 2019 16:28:44 +0800 Subject: [PATCH] added support for HTTP and HTTPS proxy. example usage: g = Goose( {'https_proxy' : '127.0.0.1:8080'} ) --- goose/configuration.py | 4 ++++ goose/network.py | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/goose/configuration.py b/goose/configuration.py index fcfa5b9a..dae28a56 100644 --- a/goose/configuration.py +++ b/goose/configuration.py @@ -99,6 +99,10 @@ def __init__(self): # http timeout self.http_timeout = HTTP_DEFAULT_TIMEOUT + # proxy settings + self.http_proxy = None + self.https_proxy = None + def get_parser(self): return AVAILABLE_PARSERS[self.parser_class] diff --git a/goose/network.py b/goose/network.py index 666a7d61..051f1edb 100644 --- a/goose/network.py +++ b/goose/network.py @@ -30,6 +30,16 @@ def __init__(self, config): # set header self.headers = {'User-agent': self.config.browser_user_agent} + proxies = {} + if self.config.http_proxy is not None: + proxies["http"] = self.config.http_proxy; + if self.config.https_proxy is not None: + proxies["https"] = self.config.https_proxy; + if len(proxies) > 0: + proxy = urllib2.ProxyHandler(proxies) + opener = urllib2.build_opener(proxy) + urllib2.install_opener(opener) + def get_url(self): # if we have a result # get the final_url