From 00889dff5e4d2584cb87a8028bf69a7f6a5afb19 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 16 Mar 2026 22:52:12 +0000 Subject: [PATCH 1/3] Fix ValueError when parsing TXT records with escaped semicolons (\;) The remove_comments function was treating \; as a comment boundary, stripping the semicolon and leaving a trailing backslash. This caused shlex.split to raise ValueError: No escaped character. Now escaped semicolons are skipped so they are preserved as part of the record value, which shlex correctly converts to a literal semicolon. Fixes #49 https://claude.ai/code/session_012JHkX4dZkEHyQ6nNtD1a1B --- zonefile_parser/helper.py | 2 ++ zonefile_parser/helper_test.py | 10 ++++++++++ zonefile_parser/main_test.py | 15 +++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/zonefile_parser/helper.py b/zonefile_parser/helper.py index 27a9bbf..67b6737 100644 --- a/zonefile_parser/helper.py +++ b/zonefile_parser/helper.py @@ -3,6 +3,8 @@ def remove_comments(line:str): for index,character in enumerate(line): if character == ";" and not is_in_quote(line,index): + if index > 0 and line[index - 1] == '\\': + continue line = line[:index] break return line diff --git a/zonefile_parser/helper_test.py b/zonefile_parser/helper_test.py index db553c3..b5ebece 100644 --- a/zonefile_parser/helper_test.py +++ b/zonefile_parser/helper_test.py @@ -16,6 +16,16 @@ def test_doesnt_change_string_without_comment(self): result = helper.remove_comments(input) assert result == input + def test_doesnt_remove_escaped_semicolon(self): + input = r"v=DMARC1\;" + result = helper.remove_comments(input) + assert result == input + + def test_removes_comment_after_escaped_semicolon(self): + input = r"v=DMARC1\; comment" + result = helper.remove_comments(input) + assert result == input + class TestIsInQuote: def test_returns_whether_index_in_quote(self): input = '"A"' diff --git a/zonefile_parser/main_test.py b/zonefile_parser/main_test.py index ccdc3a2..aaa889e 100644 --- a/zonefile_parser/main_test.py +++ b/zonefile_parser/main_test.py @@ -328,6 +328,21 @@ def test_origin_not_appended_to_txt_rdata_when_name_matches(self): assert record.rtype == "TXT" assert record.rdata == {"value": "v=spf1 include:mail.otherdomain.com ~all"} + def test_issue_49_escaped_semicolon_in_txt(self): + # TXT record with a trailing escaped semicolon (\;) should not raise ValueError + text = """ +$TTL 3600 +$ORIGIN example.com. +_dmarc 3600 IN TXT v=DMARC1\\; +""" + result = zonefile_parser.main.parse(text) + + record = result[0] + + assert record.name == "_dmarc.example.com." + assert record.rtype == "TXT" + assert record.rdata == {"value": "v=DMARC1;"} + def test_multiple_cnames_with_name_in_target(self): # multiple records in the same zone, each with name appearing in their CNAME target text = """ From b8e5c62863a2ff8f43a85874115acb92caa8aa3d Mon Sep 17 00:00:00 2001 From: Alex Redwood Date: Wed, 18 Mar 2026 05:06:55 +1100 Subject: [PATCH 2/3] updated tests --- zonefile_parser/helper_test.py | 2 +- zonefile_parser/main_test.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/zonefile_parser/helper_test.py b/zonefile_parser/helper_test.py index b5ebece..8b9493f 100644 --- a/zonefile_parser/helper_test.py +++ b/zonefile_parser/helper_test.py @@ -21,7 +21,7 @@ def test_doesnt_remove_escaped_semicolon(self): result = helper.remove_comments(input) assert result == input - def test_removes_comment_after_escaped_semicolon(self): + def test_doesnt_treat_text_after_escaped_semicolon_as_comment(self): input = r"v=DMARC1\; comment" result = helper.remove_comments(input) assert result == input diff --git a/zonefile_parser/main_test.py b/zonefile_parser/main_test.py index aaa889e..901e21d 100644 --- a/zonefile_parser/main_test.py +++ b/zonefile_parser/main_test.py @@ -343,6 +343,22 @@ def test_issue_49_escaped_semicolon_in_txt(self): assert record.rtype == "TXT" assert record.rdata == {"value": "v=DMARC1;"} + def test_issue_49_dmarc_record_with_comment(self): + # DMARC TXT record with escaped semicolons and a trailing comment + # the comment (after unescaped ;) should be stripped, \; should survive as ; + text = """ +$TTL 3600 +$ORIGIN example.com. +_dmarc 3600 IN TXT v=DMARC1\\; p=none\\; rua=mailto:dmarc@example.com ; this is a comment +""" + result = zonefile_parser.main.parse(text) + + record = result[0] + + assert record.name == "_dmarc.example.com." + assert record.rtype == "TXT" + assert record.rdata == {"value": "v=DMARC1; p=none; rua=mailto:dmarc@example.com"} + def test_multiple_cnames_with_name_in_target(self): # multiple records in the same zone, each with name appearing in their CNAME target text = """ From c8c93e7374493f18ad17e8aab2f73c7d20e69c9f Mon Sep 17 00:00:00 2001 From: Alex Redwood Date: Wed, 18 Mar 2026 05:10:39 +1100 Subject: [PATCH 3/3] Update main_test.py --- zonefile_parser/main_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zonefile_parser/main_test.py b/zonefile_parser/main_test.py index 901e21d..ba3386c 100644 --- a/zonefile_parser/main_test.py +++ b/zonefile_parser/main_test.py @@ -344,12 +344,12 @@ def test_issue_49_escaped_semicolon_in_txt(self): assert record.rdata == {"value": "v=DMARC1;"} def test_issue_49_dmarc_record_with_comment(self): - # DMARC TXT record with escaped semicolons and a trailing comment - # the comment (after unescaped ;) should be stripped, \; should survive as ; + # DMARC TXT record with an escaped semicolon followed by a real comment + # the real comment (unescaped ;) should be stripped, \; should survive as ; text = """ $TTL 3600 $ORIGIN example.com. -_dmarc 3600 IN TXT v=DMARC1\\; p=none\\; rua=mailto:dmarc@example.com ; this is a comment +_dmarc 3600 IN TXT v=DMARC1\\; ; this is a comment """ result = zonefile_parser.main.parse(text) @@ -357,7 +357,7 @@ def test_issue_49_dmarc_record_with_comment(self): assert record.name == "_dmarc.example.com." assert record.rtype == "TXT" - assert record.rdata == {"value": "v=DMARC1; p=none; rua=mailto:dmarc@example.com"} + assert record.rdata == {"value": "v=DMARC1;"} def test_multiple_cnames_with_name_in_target(self): # multiple records in the same zone, each with name appearing in their CNAME target