Skip to content

Commit 30cd185

Browse files
authored
Merge pull request #6 from opsdisk/issue-5-eu-countries-require-cookie-modification
Added logic to detect EU country sourced IPs to modify cookie
2 parents 113a088 + 3a4ca7b commit 30cd185

File tree

2 files changed

+39
-1
lines changed

2 files changed

+39
-1
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="yagooglesearch",
8-
version="1.2.0",
8+
version="1.3.0",
99
author="Brennon Thomas",
1010
author_email="info@opsdisk.com",
1111
description="A Python library for executing intelligent, realistic-looking, and tunable Google searches.",

yagooglesearch/__init__.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,44 @@ def get_page(self, url):
320320
ROOT_LOGGER.debug(f" proxy: {self.proxy}")
321321
ROOT_LOGGER.debug(f" verify_ssl: {self.verify_ssl}")
322322

323+
# Google throws up a consent page for searches sourcing from a European Union country IP location.
324+
# See https://github.com/benbusby/whoogle-search/issues/311
325+
try:
326+
if response.cookies["CONSENT"].startswith("PENDING+"):
327+
328+
ROOT_LOGGER.warning(
329+
"Looks like your IP address is sourcing from a European Union location...your search results may "
330+
"vary, but I'll try and work around this by updating the cookie."
331+
)
332+
333+
# Convert the cookiejar data struture to a Python dict.
334+
cookie_dict = requests.utils.dict_from_cookiejar(self.cookies)
335+
336+
# Pull out the random number assigned to the response cookie.
337+
number = cookie_dict["CONSENT"].split("+")[1]
338+
339+
# See https://github.com/benbusby/whoogle-search/pull/320/files
340+
"""
341+
Attempting to disect/breakdown the new cookie response values.
342+
343+
YES - Accept consent
344+
shp - ?
345+
gws - "server:" header value returned from original request. Maybe Google Workspace plus a build?
346+
fr - Original tests sourced from France. Assuming this is the country code. Country code was changed
347+
to .de and it still worked.
348+
F - FX agrees to tracking. Modifying it to just F seems to consent with "no" to personalized stuff.
349+
Not tested, solely based off of
350+
https://github.com/benbusby/whoogle-search/issues/311#issuecomment-841065630
351+
XYZ - Random 3-digit number assigned to the first response cookie.
352+
"""
353+
self.cookies = {"CONSENT": f"YES+shp.gws-20211108-0-RC1.fr+F+{number}"}
354+
355+
ROOT_LOGGER.info(f"Updating cookie to: {self.cookies}")
356+
357+
# "CONSENT" cookie does not exist.
358+
except KeyError:
359+
pass
360+
323361
html = ""
324362

325363
if http_response_code == 200:

0 commit comments

Comments
 (0)