|
12 | 12 |
|
13 | 13 | # Custom Python libraries.
|
14 | 14 |
|
15 |
| -__version__ = "1.6.0" |
| 15 | +__version__ = "1.6.1" |
16 | 16 |
|
17 | 17 | # Logging
|
18 | 18 | ROOT_LOGGER = logging.getLogger("yagooglesearch")
|
|
32 | 32 | USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"
|
33 | 33 |
|
34 | 34 | # Load the list of valid user agents from the install folder. The search order is:
|
35 |
| -# 1) user_agents.txt |
36 |
| -# 2) default USER_AGENT |
| 35 | +# 1) user_agents.txt |
| 36 | +# 2) default USER_AGENT |
37 | 37 | install_folder = os.path.abspath(os.path.split(__file__)[0])
|
38 | 38 |
|
39 | 39 | try:
|
@@ -155,7 +155,16 @@ def __init__(
|
155 | 155 | self.cookies = None
|
156 | 156 |
|
157 | 157 | # Used later to ensure there are not any URL parameter collisions.
|
158 |
| - self.url_parameters = ("btnG", "cr", "hl", "num", "q", "safe", "start", "tbs") |
| 158 | + self.url_parameters = ( |
| 159 | + "btnG", |
| 160 | + "cr", |
| 161 | + "hl", |
| 162 | + "num", |
| 163 | + "q", |
| 164 | + "safe", |
| 165 | + "start", |
| 166 | + "tbs", |
| 167 | + ) |
159 | 168 |
|
160 | 169 | # Default user agent, unless instructed by the user to change it.
|
161 | 170 | if not user_agent:
|
@@ -272,7 +281,7 @@ def filter_search_result_urls(self, link):
|
272 | 281 | )
|
273 | 282 | link = None
|
274 | 283 |
|
275 |
| - # TODO: Generates false positives if specifing an actual Google site, e.g. "site:google.com fiber". |
| 284 | + # TODO: Generates false positives if specifying an actual Google site, e.g. "site:google.com fiber". |
276 | 285 | if urlparse_object.netloc and ("google" in urlparse_object.netloc.lower()):
|
277 | 286 | ROOT_LOGGER.debug(f'Excluding URL because it contains "google": {link}')
|
278 | 287 | link = None
|
@@ -338,15 +347,15 @@ def get_page(self, url):
|
338 | 347 | "vary, but I'll try and work around this by updating the cookie."
|
339 | 348 | )
|
340 | 349 |
|
341 |
| - # Convert the cookiejar data struture to a Python dict. |
| 350 | + # Convert the cookiejar data structure to a Python dict. |
342 | 351 | cookie_dict = requests.utils.dict_from_cookiejar(self.cookies)
|
343 | 352 |
|
344 | 353 | # Pull out the random number assigned to the response cookie.
|
345 | 354 | number = cookie_dict["CONSENT"].split("+")[1]
|
346 | 355 |
|
347 | 356 | # See https://github.com/benbusby/whoogle-search/pull/320/files
|
348 | 357 | """
|
349 |
| - Attempting to disect/breakdown the new cookie response values. |
| 358 | + Attempting to dissect/breakdown the new cookie response values. |
350 | 359 |
|
351 | 360 | YES - Accept consent
|
352 | 361 | shp - ?
|
@@ -416,7 +425,7 @@ def search(self):
|
416 | 425 | if builtin_param in self.extra_params.keys():
|
417 | 426 | raise ValueError(f'GET parameter "{builtin_param}" is overlapping with the built-in GET parameter')
|
418 | 427 |
|
419 |
| - # Simulates browsing to the google.com home page and retrieving the initial cookie. |
| 428 | + # Simulates browsing to the https://www.google.com home page and retrieving the initial cookie. |
420 | 429 | html = self.get_page(self.url_home)
|
421 | 430 |
|
422 | 431 | # Loop until we reach the maximum result results found or there are no more search results found to reach
|
@@ -533,7 +542,7 @@ def search(self):
|
533 | 542 | else:
|
534 | 543 | ROOT_LOGGER.info(f"Duplicate URL found: {link}")
|
535 | 544 |
|
536 |
| - # If we reached the limit of requested URLS, return with the results. |
| 545 | + # If we reached the limit of requested URLs, return with the results. |
537 | 546 | if self.max_search_result_urls_to_return <= len(self.search_result_list):
|
538 | 547 | return self.search_result_list
|
539 | 548 |
|
|
0 commit comments