From 60c9df5fd7f9b0072f3ea1ac76ac8396938f1faa Mon Sep 17 00:00:00 2001 From: cau-bot Date: Tue, 24 Jun 2025 13:08:37 +0200 Subject: [PATCH 1/9] Decode /Cnnn glyph names --- src/v2/pdf_resources/page_font.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/v2/pdf_resources/page_font.h b/src/v2/pdf_resources/page_font.h index 06ac6150..86580deb 100644 --- a/src/v2/pdf_resources/page_font.h +++ b/src/v2/pdf_resources/page_font.h @@ -1829,7 +1829,31 @@ namespace pdflib } else { - diff_numb_to_char[numb] = name; + // Try to decode names of the form '/C123' into + // their ASCII representation. If the conversion + // fails or falls outside of the ASCII table, keep + // the original glyph name. + std::smatch ascii_match; + std::regex re_ascii(R"(^\/C(\d+)$)"); + + if(std::regex_match(name, ascii_match, re_ascii)) + { + int code = std::stoi(ascii_match[1].str()); + if(code >= 0 && code < 128) + { + diff_numb_to_char[numb] = + std::string(1, static_cast(code)); + } + else + { + diff_numb_to_char[numb] = name; + } + } + else + { + diff_numb_to_char[numb] = name; + } + LOG_S(WARNING) << "differences["< " << name; } From b5ceabc5878d296693d90ddb1006310455f78cba Mon Sep 17 00:00:00 2001 From: cau-bot Date: Tue, 24 Jun 2025 13:36:42 +0200 Subject: [PATCH 2/9] feat: decode /Cxxx using font encoding --- src/v2/pdf_resources/page_font.h | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/v2/pdf_resources/page_font.h b/src/v2/pdf_resources/page_font.h index 86580deb..dd6c8be9 100644 --- a/src/v2/pdf_resources/page_font.h +++ b/src/v2/pdf_resources/page_font.h @@ -1829,21 +1829,39 @@ namespace pdflib } else { - // Try to decode names of the form '/C123' into - // their ASCII representation. If the conversion - // fails or falls outside of the ASCII table, keep - // the original glyph name. + // Try to decode names of the form '/C123'. For + // codes in the ASCII range we simply convert the + // value. For higher values, we attempt to decode + // using the font's base encoding and finally fall + // back to ISO-8859-1. + std::smatch ascii_match; std::regex re_ascii(R"(^\/C(\d+)$)"); if(std::regex_match(name, ascii_match, re_ascii)) { int code = std::stoi(ascii_match[1].str()); + if(code >= 0 && code < 128) { diff_numb_to_char[numb] = std::string(1, static_cast(code)); } + else if(code >= 0 && code < 256) + { + std::string decoded = + get_character_from_encoding(code); + + if(decoded.rfind("GLYPH<", 0) == 0) + { + std::string tmp(4, ' '); + auto itr = utf8::append(code, tmp.begin()); + tmp.erase(itr, tmp.end()); + decoded = tmp; + } + + diff_numb_to_char[numb] = decoded; + } else { diff_numb_to_char[numb] = name; From 2a2b20dc24d24c9f006d7dfafea8f870d516088e Mon Sep 17 00:00:00 2001 From: cau-bot Date: Tue, 24 Jun 2025 13:49:06 +0200 Subject: [PATCH 3/9] Refactor C-name decoding --- src/v2/pdf_resources/page_font.h | 70 ++++++++++++-------------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/src/v2/pdf_resources/page_font.h b/src/v2/pdf_resources/page_font.h index dd6c8be9..47923e72 100644 --- a/src/v2/pdf_resources/page_font.h +++ b/src/v2/pdf_resources/page_font.h @@ -1694,6 +1694,32 @@ namespace pdflib std::regex re_01(R"(\/(.+)\.(.+))"); std::regex re_02(R"((\/)?(uni|UNI)([0-9A-Ea-e]{4}))"); std::regex re_03(R"((\/)(g|G)\d+)"); + std::regex re_ascii(R"(^\/C(\d+)$)"); + + auto decode_cname = [&](const std::string& n) -> std::string + { + std::smatch m; + if(std::regex_match(n, m, re_ascii)) + { + int code = std::stoi(m[1].str()); + if(code >= 0 && code < 128) + { + return std::string(1, static_cast(code)); + } + else if(code >= 0 && code < 256) + { + std::string decoded = get_character_from_encoding(code); + if(decoded.rfind("GLYPH<", 0) == 0) + { + std::string tmp; + utf8::append(code, std::back_inserter(tmp)); + return tmp; + } + return decoded; + } + } + return n; + }; if(utils::json::has(keys, json_font)) { @@ -1829,49 +1855,7 @@ namespace pdflib } else { - // Try to decode names of the form '/C123'. For - // codes in the ASCII range we simply convert the - // value. For higher values, we attempt to decode - // using the font's base encoding and finally fall - // back to ISO-8859-1. - - std::smatch ascii_match; - std::regex re_ascii(R"(^\/C(\d+)$)"); - - if(std::regex_match(name, ascii_match, re_ascii)) - { - int code = std::stoi(ascii_match[1].str()); - - if(code >= 0 && code < 128) - { - diff_numb_to_char[numb] = - std::string(1, static_cast(code)); - } - else if(code >= 0 && code < 256) - { - std::string decoded = - get_character_from_encoding(code); - - if(decoded.rfind("GLYPH<", 0) == 0) - { - std::string tmp(4, ' '); - auto itr = utf8::append(code, tmp.begin()); - tmp.erase(itr, tmp.end()); - decoded = tmp; - } - - diff_numb_to_char[numb] = decoded; - } - else - { - diff_numb_to_char[numb] = name; - } - } - else - { - diff_numb_to_char[numb] = name; - } - + diff_numb_to_char[numb] = decode_cname(name); LOG_S(WARNING) << "differences["< " << name; } From 14a2204fcb44996d03275edc028deca5ab886bc0 Mon Sep 17 00:00:00 2001 From: cau-bot Date: Tue, 24 Jun 2025 13:49:10 +0200 Subject: [PATCH 4/9] refactor: decode cname via method --- src/v2/pdf_resources/page_font.h | 62 ++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/src/v2/pdf_resources/page_font.h b/src/v2/pdf_resources/page_font.h index 47923e72..d02a4cf7 100644 --- a/src/v2/pdf_resources/page_font.h +++ b/src/v2/pdf_resources/page_font.h @@ -54,6 +54,7 @@ namespace pdflib std::string get_correct_character(uint32_t c); std::string get_character_from_encoding(uint32_t c); + std::string decode_cname(const std::string& name); void init_encoding(); void init_subtype(); @@ -569,9 +570,42 @@ namespace pdflib << "; Encoding: " << to_string(encoding) << "; font-name: " << font_name; - return notdef; + return notdef; + } + } + + // Decode glyph names of the form '/Cnnn'. + // - For 0<=nnn<128 we interpret it as standard ASCII. + // - For 128<=nnn<256 we use the font's encoding and + // fall back to ISO-8859-1 if undefined. + // - Otherwise return the name unchanged. + std::string pdf_resource::decode_cname(const std::string& name) + { + static const std::regex re_ascii(R"(^\/C(\d+)$)"); + + std::smatch m; + if(std::regex_match(name, m, re_ascii)) + { + int code = std::stoi(m[1].str()); + if(code >= 0 && code < 128) + { + return std::string(1, static_cast(code)); + } + else if(code >= 0 && code < 256) + { + std::string decoded = get_character_from_encoding(code); + if(decoded.rfind("GLYPH<", 0) == 0) + { + std::string tmp; + utf8::append(code, std::back_inserter(tmp)); + return tmp; + } + return decoded; } } + + return name; + } } void pdf_resource::set(std::string font_key_, @@ -1694,32 +1728,6 @@ namespace pdflib std::regex re_01(R"(\/(.+)\.(.+))"); std::regex re_02(R"((\/)?(uni|UNI)([0-9A-Ea-e]{4}))"); std::regex re_03(R"((\/)(g|G)\d+)"); - std::regex re_ascii(R"(^\/C(\d+)$)"); - - auto decode_cname = [&](const std::string& n) -> std::string - { - std::smatch m; - if(std::regex_match(n, m, re_ascii)) - { - int code = std::stoi(m[1].str()); - if(code >= 0 && code < 128) - { - return std::string(1, static_cast(code)); - } - else if(code >= 0 && code < 256) - { - std::string decoded = get_character_from_encoding(code); - if(decoded.rfind("GLYPH<", 0) == 0) - { - std::string tmp; - utf8::append(code, std::back_inserter(tmp)); - return tmp; - } - return decoded; - } - } - return n; - }; if(utils::json::has(keys, json_font)) { From 7555ed30e096752701ac8db4becb57475dde08e6 Mon Sep 17 00:00:00 2001 From: cau-bot Date: Tue, 24 Jun 2025 14:04:40 +0200 Subject: [PATCH 5/9] Fix brace mismatch --- src/v2/pdf_resources/page_font.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/v2/pdf_resources/page_font.h b/src/v2/pdf_resources/page_font.h index d02a4cf7..b4626907 100644 --- a/src/v2/pdf_resources/page_font.h +++ b/src/v2/pdf_resources/page_font.h @@ -572,6 +572,7 @@ namespace pdflib return notdef; } + } } // Decode glyph names of the form '/Cnnn'. @@ -606,7 +607,6 @@ namespace pdflib return name; } - } void pdf_resource::set(std::string font_key_, nlohmann::json& json_font_, From b27d74c95746670049733bfb7206d2a78183c513 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Tue, 24 Jun 2025 14:42:00 +0200 Subject: [PATCH 6/9] DCO Remediation Commit for cau-bot I, cau-bot , hereby add my Signed-off-by to this commit: 60c9df5fd7f9b0072f3ea1ac76ac8396938f1faa I, cau-bot , hereby add my Signed-off-by to this commit: b5ceabc5878d296693d90ddb1006310455f78cba I, cau-bot , hereby add my Signed-off-by to this commit: 2a2b20dc24d24c9f006d7dfafea8f870d516088e I, cau-bot , hereby add my Signed-off-by to this commit: 14a2204fcb44996d03275edc028deca5ab886bc0 I, cau-bot , hereby add my Signed-off-by to this commit: 7555ed30e096752701ac8db4becb57475dde08e6 Signed-off-by: Christoph Auer From 148c264ce7ee4c97cdbaf5fb0faf92931cce9c18 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Thu, 16 Oct 2025 15:13:38 +0200 Subject: [PATCH 7/9] DCO Remediation Commit for cau-bot I, cau-bot , hereby add my Signed-off-by to this commit: 60c9df5fd7f9b0072f3ea1ac76ac8396938f1faa I, cau-bot , hereby add my Signed-off-by to this commit: b5ceabc5878d296693d90ddb1006310455f78cba I, cau-bot , hereby add my Signed-off-by to this commit: 2a2b20dc24d24c9f006d7dfafea8f870d516088e I, cau-bot , hereby add my Signed-off-by to this commit: 14a2204fcb44996d03275edc028deca5ab886bc0 I, cau-bot , hereby add my Signed-off-by to this commit: 7555ed30e096752701ac8db4becb57475dde08e6 Signed-off-by: Christoph Auer From 25a75f5e42c24c01df401ddcf51c9b083904c473 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Thu, 16 Oct 2025 15:14:44 +0200 Subject: [PATCH 8/9] DCO Remediation Commit for cau-bot I, cau-bot , hereby add my Signed-off-by to this commit: 60c9df5fd7f9b0072f3ea1ac76ac8396938f1faa I, cau-bot , hereby add my Signed-off-by to this commit: b5ceabc5878d296693d90ddb1006310455f78cba I, cau-bot , hereby add my Signed-off-by to this commit: 2a2b20dc24d24c9f006d7dfafea8f870d516088e I, cau-bot , hereby add my Signed-off-by to this commit: 14a2204fcb44996d03275edc028deca5ab886bc0 I, cau-bot , hereby add my Signed-off-by to this commit: 7555ed30e096752701ac8db4becb57475dde08e6 I, Christoph Auer , hereby add my Signed-off-by to this commit: 4b2347a9ce016ddebae52adc08f4fb13a4e6ec9e Signed-off-by: Christoph Auer From 93a0deae1c212b800097a830c1045760217d962e Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Thu, 16 Oct 2025 15:24:39 +0200 Subject: [PATCH 9/9] DCO Remediation Commit for cau-bot I, cau-bot , hereby add my Signed-off-by to this commit: 60c9df5fd7f9b0072f3ea1ac76ac8396938f1faa I, cau-bot , hereby add my Signed-off-by to this commit: b5ceabc5878d296693d90ddb1006310455f78cba I, cau-bot , hereby add my Signed-off-by to this commit: 2a2b20dc24d24c9f006d7dfafea8f870d516088e I, cau-bot , hereby add my Signed-off-by to this commit: 14a2204fcb44996d03275edc028deca5ab886bc0 I, cau-bot , hereby add my Signed-off-by to this commit: 7555ed30e096752701ac8db4becb57475dde08e6 Signed-off-by: cau-bot Signed-off-by: Christoph Auer