diff --git a/fraudcrawler/scraping/search.py b/fraudcrawler/scraping/search.py index 2e1ec35..feefb26 100644 --- a/fraudcrawler/scraping/search.py +++ b/fraudcrawler/scraping/search.py @@ -275,7 +275,6 @@ async def _search( q: The search string (with potentially added site: parameters). google_domain: The Google domain to use for the search (e.g. google.[com]). location_[requested|used]: The location to use for the search. - tbs: The to-be-searched parameters (e.g. 'ctr:CH'). cr: The country code to limit the search to (e.g. 'countryCH'). gl: The country code to use for the search. hl: The language code to use for the search. @@ -308,7 +307,6 @@ async def _search( "google_domain": google_domain, "location_requested": location.name, "location_used": location.name, - "tbs": f"ctr:{country_code.upper()}", "cr": f"country{country_code.upper()}", "gl": country_code, "hl": language.code, @@ -384,15 +382,25 @@ def _engine(self) -> str: @staticmethod def _extract_search_results_urls(data: dict) -> List[str]: - """Extracts search results urls from the response data. + """Extracts all search result URLs by composing `organic_results` and `inline_images`. Args: data: The json data from the SerpApi search response. """ + + # Extract URLs from `organic_results` + organic_urls = [] results = data.get("organic_results") if results is not None: - return [url for res in results if (url := res.get("link"))] - return [] + organic_urls = [url for res in results if (url := res.get("link"))] + + # Extract URLs from `inline_images` + inline_urls = [] + images = data.get("inline_images") + if images is not None: + inline_urls = [url for img in images if (url := img.get("source"))] + + return organic_urls + inline_urls async def search( self, diff --git a/pyproject.toml b/pyproject.toml index c453c17..387feb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "fraudcrawler" -version = "0.8.9" +version = "0.8.10" description = "Intelligent Market Monitoring" authors = [ "Domingo Bertus ",