From 88de27925c41d3484a09326117ceafec321a5ba5 Mon Sep 17 00:00:00 2001 From: Andrew Bower Date: Wed, 27 Nov 2024 23:33:27 +0000 Subject: [PATCH 1/4] Factor out regcomp() usage into a wrapper. Moves regular expression compilation out into a wrapper function that handles errors, so that we can add additional regular expression usage without exploding the code. --- src/vcard.c | 53 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/src/vcard.c b/src/vcard.c index 533e587..f41f92f 100644 --- a/src/vcard.c +++ b/src/vcard.c @@ -40,6 +40,37 @@ #include "mem.h" #include "vcard.h" +/** + * Compile regex, checking and handling errors. + * + * \parm[in] preg The compiled regex. + * \parm[in] regex The pattern to match. + * \parm[in] cflags The compilation flags according to regex(3). + * + * \retval 0 If there were no errors. + * \retval 1 If an error was encounted. + **/ +static int +xregcomp(regex_t *preg, const char *regex, int cflags) { + int rerr = 0; /* Regex error code */ + size_t rlen = 0; /* Regex error string length */ + char *rstr = NULL; /* Regex error string */ + + rerr = regcomp(preg, regex, REG_EXTENDED | cflags); + if (rerr != 0) { + rlen = regerror(rerr, preg, NULL, 0); + rstr = xmalloc((rlen+1)*sizeof(char)); + regerror(rerr, preg, rstr, rlen); + warnx(_("Unable to compile regex '%s': %s\n"), regex, rstr); + if (rstr) { + free(rstr); + rstr = NULL; + } + return 1; + } + return 0; +} + /** * Search a query's result. This will run regexs over the result * to filter the data. @@ -63,8 +94,6 @@ search(const char *card) int plen = 0; /* Length of snprintf()'s */ int rerr = 0; /* Regex error code */ - size_t rlen = 0; /* Regex error string length */ - char *rstr = NULL; /* Regex error string */ size_t qlen = 0; /* Length of the query string */ char *q = NULL; /* Regex pattern for query */ @@ -96,15 +125,7 @@ search(const char *card) return(EXIT_FAILURE); } - if ((rerr = regcomp(&rq, q, REG_EXTENDED|REG_NEWLINE|REG_ICASE)) != 0) { - rlen = regerror(rerr, &rq, NULL, 0); - rstr = xmalloc((rlen+1)*sizeof(char)); - regerror(rerr, &rq, rstr, rlen); - warnx(_("Unable to compile regex '%s': %s\n"), q, rstr); - if (rstr) { - free(rstr); - rstr = NULL; - } + if (xregcomp(&rq, q, REG_NEWLINE|REG_ICASE) != 0) { return(EXIT_FAILURE); } @@ -119,15 +140,7 @@ search(const char *card) return(EXIT_FAILURE); } - if ((rerr = regcomp(&rs, s, REG_EXTENDED|REG_NEWLINE)) != 0) { - rlen = regerror(rerr, &rs, NULL, 0); - rstr = xmalloc((rlen+1)*sizeof(char)); - regerror(rerr, &rs, rstr, rlen); - warnx(_("Unable to compile regex '%s': %s\n"), s, rstr); - if (rstr) { - free(rstr); - rstr = NULL; - } + if (xregcomp(&rs, s, REG_NEWLINE) != 0) { return(EXIT_FAILURE); } From ec848bc98eb02b75d8d305ce12bb03029e9b77fa Mon Sep 17 00:00:00 2001 From: Andrew Bower Date: Wed, 27 Nov 2024 23:33:27 +0000 Subject: [PATCH 2/4] Unfold vCard before using it. Follow the RFC by unfolding folded vCard lines (CRLF WSP) before using the vCard. This is done in place as we will be accessing all the data immediately anyway as we pass the automata over it so it is likely to stay in cache. This pipelined approach seems easier than special handling of continuation lines and follows the spirit of the specification. --- src/vcard.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- src/vcard.h | 5 +++-- src/xml.c | 2 +- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/vcard.c b/src/vcard.c index f41f92f..9bbfa05 100644 --- a/src/vcard.c +++ b/src/vcard.c @@ -71,6 +71,51 @@ xregcomp(regex_t *preg, const char *regex, int cflags) { return 0; } +/** + * Unfold a vCard per RFC6350 section 3.2. + * + * It will remove the gaps between folded lines in-place. + * + * \parm[in,out] card The vcard. + * + * \retval 0 If there were no errors. + * \retval 1 If an error was encounted. + **/ +static int +unfold(char *vcard) +{ + static const char r[] = "\r\n[ \t]"; /* Continuation fold */ + regmatch_t matches[1]; + regex_t re; + size_t length = strlen(vcard); + size_t in_ptr = 0; /* AKA cut_to */ + size_t out_ptr = 0; /* AKA cut_from */ + + if (xregcomp(&re, r, 0) != 0) { + return 1; + } + + /* Hunt for folds and move the chunks inbetween them back by + * the accumulated number of folding characters. */ + while (regexec(&re, vcard + in_ptr, 1, matches, 0) == 0) { + if (matches[0].rm_so == -1 || matches[0].rm_eo == -1) { + errx(EXIT_FAILURE, _("inconsistent regex result")); + } + memmove(vcard + out_ptr, + vcard + in_ptr, + matches[0].rm_so); + in_ptr = in_ptr + matches[0].rm_eo; + out_ptr = out_ptr + matches[0].rm_so; + } + if (options.verbose) { + fprintf(stderr, "Unfolding cut %zd bytes\n", in_ptr - out_ptr); + } + memmove(vcard + out_ptr, vcard + in_ptr, length - in_ptr + 1); + + regfree(&re); + return 0; +} + /** * Search a query's result. This will run regexs over the result * to filter the data. @@ -85,7 +130,7 @@ xregcomp(regex_t *preg, const char *regex, int cflags) { * \retval 1 If an error was encounted. **/ int -search(const char *card) +search(char *card) { /* Regex patterns */ static const char r[] = "%s(.*):(.*)"; /* Whole result */ @@ -107,6 +152,11 @@ search(const char *card) regmatch_t match[3] = {0}; /* Regex matches */ + if (unfold(card)) { + warnx(_("Error unfolding vCard.")); + return(EXIT_FAILURE); + } + /* Generate a quoted query term */ if (quote(options.term, &qt)) { warnx(_("Unable to build quoted term.")); diff --git a/src/vcard.h b/src/vcard.h index 55f448c..80ed5dc 100644 --- a/src/vcard.h +++ b/src/vcard.h @@ -32,8 +32,9 @@ extern "C" { #endif -/** Search the vcard */ -int search(const char *); +/** Search the vcard. + * The supplied card string will be unfolded in place so must be modifiable. */ +int search(char *); /** Quote a string for regex's */ int quote(const char *, char **); diff --git a/src/xml.c b/src/xml.c index 5f2e1b3..802f2f5 100644 --- a/src/xml.c +++ b/src/xml.c @@ -107,7 +107,7 @@ walk_tree(xmlDocPtr doc, xmlNode *node) _("Data:\n%s\n"), data); } - search((const char *)data); + search((char *)data); xmlFree(data); } } From 8ec5147217bf67a3eabef022f7d46d56e1acd47b Mon Sep 17 00:00:00 2001 From: Andrew Bower Date: Sun, 12 Jan 2025 18:17:56 +0000 Subject: [PATCH 3/4] doxygen tweak to new xregcomp function --- src/vcard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vcard.c b/src/vcard.c index 9bbfa05..c3feb57 100644 --- a/src/vcard.c +++ b/src/vcard.c @@ -43,7 +43,7 @@ /** * Compile regex, checking and handling errors. * - * \parm[in] preg The compiled regex. + * \parm[out] preg The compiled regex. * \parm[in] regex The pattern to match. * \parm[in] cflags The compilation flags according to regex(3). * From 79ef2a1ad0c7c2bbd584badc75ba203a22fb185f Mon Sep 17 00:00:00 2001 From: Andrew Bower Date: Sat, 18 Jan 2025 11:44:09 +0000 Subject: [PATCH 4/4] Treat CR as optional when unfolding vCard --- src/vcard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vcard.c b/src/vcard.c index c3feb57..c1534fa 100644 --- a/src/vcard.c +++ b/src/vcard.c @@ -84,7 +84,7 @@ xregcomp(regex_t *preg, const char *regex, int cflags) { static int unfold(char *vcard) { - static const char r[] = "\r\n[ \t]"; /* Continuation fold */ + static const char r[] = "\r?\n[ \t]"; /* Continuation fold */ regmatch_t matches[1]; regex_t re; size_t length = strlen(vcard);