From 0bd7792bf10383dda28eed0f646168890c381684 Mon Sep 17 00:00:00 2001 From: Georg Heiler Date: Thu, 7 Aug 2025 16:22:42 +0200 Subject: [PATCH 1/2] fix: path generation in case of relative file paths without artifact directory Signed-off-by: Georg Heiler --- docling_core/types/doc/document.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index e5a5708d..2f9e8365 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -4525,23 +4525,33 @@ def save_as_html( with open(filename, "w", encoding="utf-8") as fw: fw.write(html_out) - + def _get_output_paths( self, filename: Union[str, Path], artifacts_dir: Optional[Path] = None ) -> Tuple[Path, Optional[Path]]: + """ + Determines the output directory for artifacts and the reference path for URIs. + + This function correctly handles absolute and relative paths for `filename` + and `artifacts_dir` without path duplication. + """ if isinstance(filename, str): filename = Path(filename) if artifacts_dir is None: - # Remove the extension and add '_pictures' - artifacts_dir = filename.with_suffix("") - artifacts_dir = artifacts_dir.with_name(artifacts_dir.name + "_artifacts") - if artifacts_dir.is_absolute(): + # Default case: create an '_artifacts' directory alongside the file. + final_artifacts_dir = filename.with_name(filename.stem + "_artifacts") + else: + if isinstance(artifacts_dir, str): + artifacts_dir = Path(artifacts_dir) + if artifacts_dir.is_absolute(): + final_artifacts_dir = artifacts_dir + else: + final_artifacts_dir = filename.parent / artifacts_dir + if final_artifacts_dir.is_absolute(): reference_path = None else: reference_path = filename.parent - artifacts_dir = reference_path / artifacts_dir - - return artifacts_dir, reference_path + return final_artifacts_dir, reference_path def _make_copy_with_refmode( self, From c41e9d5a95b3bce316ca989913c35e372c8ddb09 Mon Sep 17 00:00:00 2001 From: Georg Heiler Date: Thu, 7 Aug 2025 16:33:39 +0200 Subject: [PATCH 2/2] fix: clean text further Signed-off-by: Georg Heiler --- docling_core/types/doc/document.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 2f9e8365..9ab26168 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -4531,9 +4531,6 @@ def _get_output_paths( ) -> Tuple[Path, Optional[Path]]: """ Determines the output directory for artifacts and the reference path for URIs. - - This function correctly handles absolute and relative paths for `filename` - and `artifacts_dir` without path duplication. """ if isinstance(filename, str): filename = Path(filename)