|
12 | 12 |
|
13 | 13 | # Custom Python libraries.
|
14 | 14 |
|
15 |
| -__version__ = "1.4.0" |
| 15 | +__version__ = "1.5.0" |
16 | 16 |
|
17 | 17 | # Logging
|
18 | 18 | ROOT_LOGGER = logging.getLogger("yagooglesearch")
|
@@ -468,9 +468,7 @@ def search(self):
|
468 | 468 | gbar.clear()
|
469 | 469 | anchors = soup.find_all("a")
|
470 | 470 |
|
471 |
| - # Used to determine if another page of search results needs to be requested. If 100 search results are |
472 |
| - # requested per page, but the current page of results is less than that, no need to search the next page for |
473 |
| - # results because there won't be any. Prevents fruitless queries and costing a pointless search request. |
| 471 | + # Tracks number of valid URLs found on a search page. |
474 | 472 | valid_links_found_in_this_search = 0
|
475 | 473 |
|
476 | 474 | # Process every anchored URL.
|
@@ -498,21 +496,19 @@ def search(self):
|
498 | 496 | ROOT_LOGGER.info(f"Found unique URL #{total_valid_links_found}: {link}")
|
499 | 497 | unique_urls_set.add(link)
|
500 | 498 |
|
| 499 | + else: |
| 500 | + ROOT_LOGGER.info(f"Duplicate URL found: {link}") |
| 501 | + |
501 | 502 | # If we reached the limit of requested URLS, return with the results.
|
502 | 503 | if self.max_search_result_urls_to_return <= len(unique_urls_set):
|
503 | 504 | # Convert to a list.
|
504 | 505 | self.unique_urls_list = list(unique_urls_set)
|
505 | 506 | return self.unique_urls_list
|
506 | 507 |
|
507 |
| - # See comment for the "valid_links_found_in_this_search" variable. This is because determining if a "Next" |
508 |
| - # URL page of results is not straightforward. For example, this can happen if |
509 |
| - # max_search_result_urls_to_return=100, but there are only 93 total possible results. |
510 |
| - if valid_links_found_in_this_search != self.num: |
511 |
| - ROOT_LOGGER.info( |
512 |
| - f"The number of valid search results ({valid_links_found_in_this_search}) was not the requested " |
513 |
| - f"max results to pull back at once num=({self.num}) for this page. That implies there won't be " |
514 |
| - "any search results on the next page either. Moving on..." |
515 |
| - ) |
| 508 | + # Determining if a "Next" URL page of results is not straightforward. If no valid links are found, the |
| 509 | + # search results have been exhausted. |
| 510 | + if valid_links_found_in_this_search == 0: |
| 511 | + ROOT_LOGGER.info("No valid search results found on this page. Moving on...") |
516 | 512 | # Convert to a list.
|
517 | 513 | self.unique_urls_list = list(unique_urls_set)
|
518 | 514 | return self.unique_urls_list
|
|
0 commit comments