-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocument_processor.js
More file actions
41 lines (35 loc) · 1.21 KB
/
document_processor.js
File metadata and controls
41 lines (35 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/**
* DocumentProcessor
* Unified interface for file processing (OCR + rule-based parsing).
*/
const DocumentProcessor = {
process(file) {
const props = PropertiesService.getScriptProperties();
const options = {
mode: props.getProperty("PROCESSOR_MODE") || APP_DEFAULTS.PROCESSOR_MODE,
mlMode: props.getProperty("ML_MODE") || APP_DEFAULTS.ML_MODE
};
LoggerService.info(`Processor Mode: ${options.mode}, ML Mode: ${options.mlMode}`);
let parser;
try {
parser = ParserFactory.getParser();
} catch (e) {
LoggerService.error("Parser selection failed: " + e.toString());
return null;
}
let text;
try { text = OCRService.extractText(file); }
catch (e) { LoggerService.error("OCR failed: " + e.toString()); return null; }
if (!text) {
LoggerService.warn("No text extracted from file");
return null;
}
const parsed = parser.parseFromText(text, file);
// Post-processing: hash, category, filename, url
parsed.hash = Utils.generateHash(parsed.rawText);
parsed.fileName = file.getName();
parsed.url = file.getUrl();
parsed.category = CategoryClassifier.classify(parsed.rawText, parsed.vendor);
return parsed;
}
};