|
3 | 3 | from functools import cache |
4 | 4 | from tempfile import mkdtemp |
5 | 5 | from timeit import default_timer |
6 | | -from typing import Any, Generator |
| 6 | +from typing import Any |
7 | 7 |
|
8 | 8 | import magic |
9 | 9 | from banal import ensure_list |
10 | 10 | from followthemoney import model |
11 | 11 | from followthemoney.helpers import entity_filename |
12 | 12 | from followthemoney.namespace import Namespace |
13 | | -from followthemoney.proxy import EntityProxy |
14 | 13 | from ftmq.store.fragments import get_fragments |
15 | 14 | from ftmq.store.fragments.utils import safe_fragment |
16 | 15 | from normality import stringify |
17 | 16 | from openaleph_procrastinate import defer |
18 | | -from procrastinate import App |
| 17 | +from openaleph_procrastinate.app import App |
| 18 | +from openaleph_procrastinate.util import make_checksum_entity |
19 | 19 | from prometheus_client import Counter, Histogram |
20 | 20 | from rigour.mime import normalize_mimetype |
21 | 21 | from servicelayer.archive import init_archive |
@@ -106,7 +106,7 @@ def __init__(self, app: App, dataset: str, context: dict[str, Any]): |
106 | 106 | self.context = context |
107 | 107 | self.ns = Namespace(self.context["namespace"]) |
108 | 108 | self.work_path = ensure_path(mkdtemp(prefix="ingestor-")) |
109 | | - self.emitted = set() |
| 109 | + self.emitted = [] |
110 | 110 | self.archive = get_archive() |
111 | 111 |
|
112 | 112 | def make_entity(self, schema, parent=None): |
@@ -138,7 +138,7 @@ def apply_context(self, entity, source): |
138 | 138 | def emit_entity(self, entity, fragment=None): |
139 | 139 | entity = self.ns.apply(entity) |
140 | 140 | self.writer.put(entity.to_dict(), fragment) |
141 | | - self.emitted.add(entity.id) |
| 141 | + self.emitted.append(make_checksum_entity(entity, quiet=True)) |
142 | 142 |
|
143 | 143 | def emit_text_fragment(self, entity, texts, fragment): |
144 | 144 | texts = [t for t in ensure_list(texts) if filter_text(t)] |
@@ -259,6 +259,3 @@ def delegate(self, ingestor_class, file_path, entity): |
259 | 259 | def close(self): |
260 | 260 | self.writer.flush() |
261 | 261 | remove_directory(self.work_path) |
262 | | - |
263 | | - def iterate_emitted(self) -> Generator[EntityProxy, None, None]: |
264 | | - yield from self.db.iterate(self.emitted) |
0 commit comments