Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ SERPAPI_KEY=<your-serpapi-key>
OPENAIAPI_KEY=<your-openai-api-key>
DATAFORSEO_USER=<your-dataforseo-username>
DATAFORSEO_PWD=<your-dataforseo-password>
REDIS_USE_CACHE=<true/false>
REDIS_USE_CACHE=<true/false>
REDIS_URL=<redis-url>
11 changes: 9 additions & 2 deletions fraudcrawler/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from fraudcrawler.cache.cacher import RedisCacher
from fraudcrawler.cache.cacher import RedisCacher, RedisConfig
from fraudcrawler.scraping.search import Searcher, SearchEngineName, WebsiteSearch
from fraudcrawler.scraping.enrich import Enricher
from fraudcrawler.scraping.url import URLCollector
from fraudcrawler.scraping.url import (
URLCollector,
LocalURLCollector,
DistributedURLCollector,
)
from fraudcrawler.scraping.zyte import ZyteAPI
from fraudcrawler.scraping.saved_search_models import (
WebsiteSource,
Expand Down Expand Up @@ -35,11 +39,14 @@

__all__ = [
"RedisCacher",
"RedisConfig",
"Searcher",
"SearchEngineName",
"WebsiteSearch",
"Enricher",
"URLCollector",
"LocalURLCollector",
"DistributedURLCollector",
"ZyteAPI",
"WebsiteSource",
"WebsiteSourceFilterConfig",
Expand Down
9 changes: 7 additions & 2 deletions fraudcrawler/base/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
DEFAULT_HTTPX_TIMEOUT,
DEFAULT_HTTPX_LIMITS,
DEFAULT_HTTPX_REDIRECTS,
REDIS_USE_CACHE,
REDIS_DEFAULT_HOSTNAME,
REDIS_DEFAULT_PORT,
)

logger = logging.getLogger(__name__)
Expand All @@ -46,10 +49,12 @@ class Setup(BaseSettings):
dataforseo_pwd: str
zyteapi_key: str
openaiapi_key: str
pypy_token: str

# Redis cache
redis_use_cache: bool
redis_use_cache: bool = REDIS_USE_CACHE
redis_hostname: str = REDIS_DEFAULT_HOSTNAME
redis_port: int = REDIS_DEFAULT_PORT
redis_password: str | None = None

class Config:
env_file = ".env"
Expand Down
2 changes: 1 addition & 1 deletion fraudcrawler/base/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ async def run(

# Handle previously collected URLs
if pcurls := previously_collected_urls:
self._url_collector.add_previously_collected_urls(urls=pcurls)
await self._url_collector.add_previously_collected_urls(urls=pcurls)

# Setup the async framework
n_saved_sources = len(website_source_sources or [])
Expand Down
Loading
Loading