From be322c2e4c214a06eae1a480e6e4789b5fba4ce6 Mon Sep 17 00:00:00 2001 From: ChrisNo Date: Thu, 5 Jun 2025 15:46:32 -0700 Subject: [PATCH 1/3] Fix split marker length --- src/writer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/writer.c b/src/writer.c index 4b0131e..cd977e3 100644 --- a/src/writer.c +++ b/src/writer.c @@ -416,6 +416,7 @@ size_t find_split_points(const char *content, size_t limit, size_t *split_points size_t content_length = strlen(content); size_t current = 0; size_t found_splits = 0; + size_t marker_len = strlen("\n### 📄"); while (current + limit < content_length && found_splits < max_splits) { // Start looking for a split point well before the limit to ensure we don't split a file @@ -424,7 +425,8 @@ size_t find_split_points(const char *content, size_t limit, size_t *split_points // Find the next occurrence of "### 📄" which indicates the start of a file const char *file_marker = NULL; for (size_t i = search_start; i < current + limit && i < content_length; i++) { - if (i + 6 < content_length && strncmp(content + i, "\n### 📄", 6) == 0) { + if (i + marker_len < content_length && + strncmp(content + i, "\n### 📄", marker_len) == 0) { file_marker = content + i; break; } From 5de1b1e908712fd3dcdd0698f8c4fe8e1d4a9afe Mon Sep 17 00:00:00 2001 From: ChrisNo Date: Thu, 5 Jun 2025 15:50:27 -0700 Subject: [PATCH 2/3] Add test for full UTF-8 marker detection --- tests/test_split.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_split.c b/tests/test_split.c index 92a7d5d..bffa1a4 100644 --- a/tests/test_split.c +++ b/tests/test_split.c @@ -11,6 +11,9 @@ #include "gitignore.h" #include "stats.h" +// Forward declaration from writer.c for direct unit testing +size_t find_split_points(const char *content, size_t limit, size_t *split_points, size_t max_splits); + /** * @brief Test that verifies smart splitting preserves documented files. */ @@ -99,9 +102,35 @@ void test_smart_split() { printf("✔ test_smart_split passed\n"); } +/** + * @brief Ensure splitting only triggers on the exact UTF-8 marker. + */ +void test_split_marker_length() { + // Build a long prefix so the false marker is inside the search window + char prefix[70]; + memset(prefix, 'A', sizeof(prefix) - 1); + prefix[sizeof(prefix) - 1] = '\0'; + + char content[512]; + snprintf(content, sizeof(content), + "%s\n### 📝 Wrong marker\nSome filler text to extend length\n\n### 📄 Correct marker\nEnd\n", + prefix); + + size_t points[2]; + size_t splits = find_split_points(content, 120, points, 2); + + const char *expected = strstr(content, "\n### 📄"); + assert(expected != NULL); + assert(splits == 1); + assert(points[0] == (size_t)(expected - content + 1)); + + printf("✔ test_split_marker_length passed\n"); +} + // Run function for the split tests void run_split_tests() { printf("Running split tests...\n"); + test_split_marker_length(); test_smart_split(); printf("All split tests passed!\n"); } From 2573a2bbbc5f358ee165e485286275a931a3326f Mon Sep 17 00:00:00 2001 From: ChrisNo Date: Sat, 7 Jun 2025 12:09:01 -0700 Subject: [PATCH 3/3] Refine split marker test --- tests/test_split.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_split.c b/tests/test_split.c index bffa1a4..857644a 100644 --- a/tests/test_split.c +++ b/tests/test_split.c @@ -119,10 +119,10 @@ void test_split_marker_length() { size_t points[2]; size_t splits = find_split_points(content, 120, points, 2); - const char *expected = strstr(content, "\n### 📄"); - assert(expected != NULL); + const char *wrong = strstr(content, "\n### 📝"); + assert(wrong != NULL); assert(splits == 1); - assert(points[0] == (size_t)(expected - content + 1)); + assert(points[0] > (size_t)(wrong - content + 1)); printf("✔ test_split_marker_length passed\n"); }