diff --git a/hcf_backend/manager.py b/hcf_backend/manager.py index 3b7ac8f..b40b67b 100644 --- a/hcf_backend/manager.py +++ b/hcf_backend/manager.py @@ -40,7 +40,7 @@ def flush(self, slot=None): slot_obj = self._frontier.get(slot) slot_obj.flush() self._links_to_flush_count[slot] = 0 - LOG.info('Flushed %d link(s) to slot %s', slot) + LOG.info('Flushed %d link(s) to slot %s', n_links_to_flush, slot) return n_links_to_flush diff --git a/hcf_backend/utils/crawlmanager.py b/hcf_backend/utils/crawlmanager.py index 5a119bc..3f7c4d0 100644 --- a/hcf_backend/utils/crawlmanager.py +++ b/hcf_backend/utils/crawlmanager.py @@ -8,6 +8,8 @@ import random import logging +from scrapinghub import ScrapinghubClient + from shub_workflow.crawl import CrawlManager from hcf_backend.utils.hcfpal import HCFPal @@ -23,6 +25,7 @@ class HCFCrawlManager(CrawlManager): def __init__(self): super().__init__() + super.client = ScrapinghubClient(max_retries=10, max_retry_time=3600) self.hcfpal = HCFPal(self.client._hsclient.get_project(self.project_id)) def add_argparser_options(self):