@@ -320,6 +320,44 @@ def get_page(self, url):
320
320
ROOT_LOGGER .debug (f" proxy: { self .proxy } " )
321
321
ROOT_LOGGER .debug (f" verify_ssl: { self .verify_ssl } " )
322
322
323
+ # Google throws up a consent page for searches sourcing from a European Union country IP location.
324
+ # See https://github.com/benbusby/whoogle-search/issues/311
325
+ try :
326
+ if response .cookies ["CONSENT" ].startswith ("PENDING+" ):
327
+
328
+ ROOT_LOGGER .warning (
329
+ "Looks like your IP address is sourcing from a European Union location...your search results may "
330
+ "vary, but I'll try and work around this by updating the cookie."
331
+ )
332
+
333
+ # Convert the cookiejar data struture to a Python dict.
334
+ cookie_dict = requests .utils .dict_from_cookiejar (self .cookies )
335
+
336
+ # Pull out the random number assigned to the response cookie.
337
+ number = cookie_dict ["CONSENT" ].split ("+" )[1 ]
338
+
339
+ # See https://github.com/benbusby/whoogle-search/pull/320/files
340
+ """
341
+ Attempting to disect/breakdown the new cookie response values.
342
+
343
+ YES - Accept consent
344
+ shp - ?
345
+ gws - "server:" header value returned from original request. Maybe Google Workspace plus a build?
346
+ fr - Original tests sourced from France. Assuming this is the country code. Country code was changed
347
+ to .de and it still worked.
348
+ F - FX agrees to tracking. Modifying it to just F seems to consent with "no" to personalized stuff.
349
+ Not tested, solely based off of
350
+ https://github.com/benbusby/whoogle-search/issues/311#issuecomment-841065630
351
+ XYZ - Random 3-digit number assigned to the first response cookie.
352
+ """
353
+ self .cookies = {"CONSENT" : f"YES+shp.gws-20211108-0-RC1.fr+F+{ number } " }
354
+
355
+ ROOT_LOGGER .info (f"Updating cookie to: { self .cookies } " )
356
+
357
+ # "CONSENT" cookie does not exist.
358
+ except KeyError :
359
+ pass
360
+
323
361
html = ""
324
362
325
363
if http_response_code == 200 :
0 commit comments