From 4a8b89ee05b25f2b2dd04ef9afcd7fc181b569bb Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Mon, 14 Sep 2020 09:04:50 +0300 Subject: [PATCH] Please don't forget to delete tempTxtFiles --- .../tesseract4/AbstractTesseract4OcrEngine.cs | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs index b9d9e25..f7f4405 100644 --- a/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs +++ b/itext/itext.pdfocr.tesseract4/itext/pdfocr/tesseract4/AbstractTesseract4OcrEngine.cs @@ -426,15 +426,24 @@ private AbstractTesseract4OcrEngine.ITesseractOcrResult ProcessInputFiles(FileIn DoTesseractOcr(input, tempFiles, outputFormat, page); if (outputFormat.Equals(OutputFormat.HOCR)) { IList tempTxtFiles = null; - if (GetTesseract4OcrEngineProperties().IsUseTxtToImproveHocrParsing()) { - tempTxtFiles = new List(); - for (int i = 0; i < numOfFiles; i++) { - tempTxtFiles.Add(CreateTempFile(".txt")); + IDictionary> pageData = null; + try { + if (GetTesseract4OcrEngineProperties().IsUseTxtToImproveHocrParsing()) { + tempTxtFiles = new List(); + for (int i = 0; i < numOfFiles; i++) { + tempTxtFiles.Add(CreateTempFile(".txt")); + } + DoTesseractOcr(input, tempTxtFiles, OutputFormat.TXT, page, false); + } + pageData = TesseractHelper.ParseHocrFile(tempFiles, tempTxtFiles, GetTesseract4OcrEngineProperties()); + } + finally { + if (tempTxtFiles != null) { + foreach (FileInfo file in tempTxtFiles) { + TesseractHelper.DeleteFile(file.FullName); + } } - DoTesseractOcr(input, tempTxtFiles, OutputFormat.TXT, page, false); } - IDictionary> pageData = TesseractHelper.ParseHocrFile(tempFiles, tempTxtFiles, GetTesseract4OcrEngineProperties - ()); if (GetTesseract4OcrEngineProperties().IsPreprocessingImages()) { imageData.Put(page, pageData.Get(1)); }