12
12
13
13
# Custom Python libraries.
14
14
15
- __version__ = "1.1 .0"
15
+ __version__ = "1.2 .0"
16
16
17
17
# Logging
18
18
ROOT_LOGGER = logging .getLogger ("yagooglesearch" )
@@ -82,6 +82,7 @@ def __init__(
82
82
http_429_cool_off_time_in_minutes = 60 ,
83
83
http_429_cool_off_factor = 1.1 ,
84
84
proxy = "" ,
85
+ verify_ssl = True ,
85
86
verbosity = 5 ,
86
87
):
87
88
@@ -108,6 +109,8 @@ def __init__(
108
109
:param float http_429_cool_off_factor: Factor to multiply by http_429_cool_off_time_in_minutes for each HTTP 429
109
110
detected.
110
111
:param str proxy: HTTP(S) or SOCKS5 proxy to use.
112
+ :param bool verify_ssl: Verify the SSL certificate to prevent traffic interception attacks. Defaults to True.
113
+ This may need to be disabled in some HTTPS proxy instances.
111
114
:param int verbosity: Logging and console output verbosity.
112
115
113
116
:rtype: List of str
@@ -129,6 +132,7 @@ def __init__(
129
132
self .http_429_cool_off_time_in_minutes = http_429_cool_off_time_in_minutes
130
133
self .http_429_cool_off_factor = http_429_cool_off_factor
131
134
self .proxy = proxy
135
+ self .verify_ssl = verify_ssl
132
136
self .verbosity = verbosity
133
137
134
138
# Assign log level.
@@ -176,6 +180,10 @@ def __init__(
176
180
"https" : self .proxy ,
177
181
}
178
182
183
+ # Suppress warning messages if verify_ssl is disabled.
184
+ if not self .verify_ssl :
185
+ requests .packages .urllib3 .disable_warnings (requests .packages .urllib3 .exceptions .InsecureRequestWarning )
186
+
179
187
def update_urls (self ):
180
188
"""Update search URLs being used."""
181
189
@@ -234,8 +242,9 @@ def filter_search_result_urls(self, link):
234
242
ROOT_LOGGER .debug (f"pre filter_search_result_urls() link: { link } " )
235
243
236
244
try :
237
- # Extract URL from parameter.
238
- if link .startswith ("/url?" ):
245
+ # Extract URL from parameter. Once in a while the full "http://www.google.com/url?" exists instead of just
246
+ # "/url?". After a re-run, it disappears and "/url?" is present...might be a caching thing?
247
+ if link .startswith ("/url?" ) or link .startswith ("http://www.google.com/url?" ):
239
248
urlparse_object = urllib .parse .urlparse (link , scheme = "http" )
240
249
241
250
# The "q" key exists most of the time.
@@ -294,7 +303,9 @@ def get_page(self, url):
294
303
}
295
304
296
305
ROOT_LOGGER .info (f"Requesting URL: { url } " )
297
- response = requests .get (url , proxies = self .proxy_dict , headers = headers , cookies = self .cookies , timeout = 15 )
306
+ response = requests .get (
307
+ url , proxies = self .proxy_dict , headers = headers , cookies = self .cookies , timeout = 15 , verify = self .verify_ssl
308
+ )
298
309
299
310
# Update the cookies.
300
311
self .cookies = response .cookies
@@ -303,10 +314,11 @@ def get_page(self, url):
303
314
http_response_code = response .status_code
304
315
305
316
# debug_requests_response(response)
306
- ROOT_LOGGER .debug (f" status_code: { http_response_code } " )
307
- ROOT_LOGGER .debug (f" proxy: { self .proxy } " )
308
- ROOT_LOGGER .debug (f" headers: { headers } " )
309
- ROOT_LOGGER .debug (f" cookies: { self .cookies } " )
317
+ ROOT_LOGGER .debug (f" status_code: { http_response_code } " )
318
+ ROOT_LOGGER .debug (f" headers: { headers } " )
319
+ ROOT_LOGGER .debug (f" cookies: { self .cookies } " )
320
+ ROOT_LOGGER .debug (f" proxy: { self .proxy } " )
321
+ ROOT_LOGGER .debug (f" verify_ssl: { self .verify_ssl } " )
310
322
311
323
html = ""
312
324
0 commit comments