From 7f2ed38b524081a345b6e7bacc7b9a8b09d2051f Mon Sep 17 00:00:00 2001 From: Paul Salanitri Date: Mon, 22 Nov 2021 19:51:40 +1000 Subject: [PATCH 1/3] Simplest change to change from pcre to pcre2 --- Makefile.am | 4 ++-- bti.c | 33 +++++++++++++++++++++++---------- config.c | 3 ++- configure.ac | 6 ++++-- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/Makefile.am b/Makefile.am index abd875d..59925c1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -13,14 +13,14 @@ bti_CFLAGS = \ $(LIBCURL_CFLAGS) \ $(XML_CFLAGS) \ $(JSON_CFLAGS) \ - $(LIBPCRE_CFLAGS) \ + $(LIBPCRE2_CFLAGS) \ $(LIBOAUTH_CFLAGS) bti_LDADD = \ $(LIBCURL_LIBS) \ $(XML_LIBS) \ $(JSON_LIBS) \ - $(LIBPCRE_LIBS) \ + $(LIBPCRE2_LIBS) \ $(LIBOAUTH_LIBS) dist_man_MANS = \ diff --git a/bti.c b/bti.c index 7f485a8..c7edda8 100644 --- a/bti.c +++ b/bti.c @@ -15,6 +15,9 @@ #define _GNU_SOURCE +#define is_error(ptr) (ptr == NULL) + +#define PCRE2_CODE_UNIT_WIDTH 8 #include #include #include @@ -33,7 +36,7 @@ #include #include #include -#include +#include #include #include #include @@ -1260,10 +1263,12 @@ static int find_urls(const char *tweet, int **pranges) "(([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)/{1,3}" "[0-9a-zA-Z;/~?:@&=+$\\.\\-_'()%]+)" "(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?"; - pcre *re; + pcre2_code *re; const char *errptr; + int errorcode; int erroffset; - int ovector[10] = {0,}; + //int ovector[10] = {0,}; + PCRE2_SIZE *ovector; const size_t ovsize = sizeof(ovector)/sizeof(*ovector); int startoffset, tweetlen; int i, rc; @@ -1271,9 +1276,9 @@ static int find_urls(const char *tweet, int **pranges) int rcount = 0; int *ranges = malloc(sizeof(int) * rbound); - re = pcre_compile(re_magic, - PCRE_NO_AUTO_CAPTURE, - &errptr, &erroffset, NULL); + re = pcre2_compile((PCRE2_SPTR)re_magic, PCRE2_ZERO_TERMINATED, + PCRE2_NO_AUTO_CAPTURE, + &errorcode, (PCRE2_SIZE*) &erroffset, NULL); if (!re) { fprintf(stderr, "pcre_compile @%u: %s\n", erroffset, errptr); exit(1); @@ -1282,9 +1287,11 @@ static int find_urls(const char *tweet, int **pranges) tweetlen = strlen(tweet); for (startoffset = 0; startoffset < tweetlen; ) { - rc = pcre_exec(re, NULL, tweet, strlen(tweet), startoffset, 0, - ovector, ovsize); - if (rc == PCRE_ERROR_NOMATCH) + pcre2_match_data *match_data; + match_data = pcre2_match_data_create_from_pattern(re, NULL); + + rc = pcre2_match(re, (PCRE2_SPTR)tweet, strlen(tweet), startoffset, 0, match_data, NULL); + if (rc == PCRE2_ERROR_NOMATCH) break; if (rc < 0) { @@ -1293,6 +1300,9 @@ static int find_urls(const char *tweet, int **pranges) exit(1); } + ovector = pcre2_get_ovector_pointer(match_data); + fprintf(stderr,"Match succeeded at offset %d to %d\n", (int)ovector[0], (int)ovector[1]); + for (i = 0; i < rc; i += 2) { if ((rcount+2) == rbound) { rbound *= 2; @@ -1306,7 +1316,10 @@ static int find_urls(const char *tweet, int **pranges) startoffset = ovector[1]; } - pcre_free(re); + //pcre2_match_data_free(re); + pcre2_code_free(re); + + for (int i=0; i #include #include @@ -32,7 +33,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/configure.ac b/configure.ac index afcd64b..caa53a1 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,8 @@ AC_INIT([bti], [034], [greg@kroah.com]) AC_PREREQ(2.60) -AM_INIT_AUTOMAKE(bti, 034) +#AM_INIT_AUTOMAKE(bti, 034) +AM_INIT_AUTOMAKE m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) @@ -15,7 +16,8 @@ AC_PATH_PROG([XSLTPROC], [xsltproc]) PKG_PROG_PKG_CONFIG() PKG_CHECK_MODULES(LIBOAUTH, oauth) -PKG_CHECK_MODULES(LIBPCRE, libpcre) +#PKG_CHECK_MODULES(LIBPCRE2, libpcre2) +PKG_CHECK_MODULES(PCRE2, [libpcre2-8 libpcre2-32]) PKG_CHECK_MODULES([LIBCURL], [libcurl]) PKG_CHECK_MODULES([XML], [libxml-2.0]) PKG_CHECK_MODULES([JSON], [json-c]) From e71e72498bcb582f7905e15a286475d595fc8361 Mon Sep 17 00:00:00 2001 From: Paul Salanitri Date: Tue, 23 Nov 2021 06:56:46 +1000 Subject: [PATCH 2/3] Fix library reference for PCRE2 --- Makefile.am | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index 59925c1..c870b3f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -13,14 +13,14 @@ bti_CFLAGS = \ $(LIBCURL_CFLAGS) \ $(XML_CFLAGS) \ $(JSON_CFLAGS) \ - $(LIBPCRE2_CFLAGS) \ + $(PCRE2_CFLAGS) \ $(LIBOAUTH_CFLAGS) bti_LDADD = \ $(LIBCURL_LIBS) \ $(XML_LIBS) \ $(JSON_LIBS) \ - $(LIBPCRE2_LIBS) \ + $(PCRE2_LIBS) \ $(LIBOAUTH_LIBS) dist_man_MANS = \ From 6c2110c893b145b7afceb9d4a7543da77a8def5a Mon Sep 17 00:00:00 2001 From: Paul Salanitri Date: Tue, 23 Nov 2021 07:08:35 +1000 Subject: [PATCH 3/3] remove pcre2 ..-32, remove debug fprintf --- bti.c | 3 --- configure.ac | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/bti.c b/bti.c index c7edda8..62abd54 100644 --- a/bti.c +++ b/bti.c @@ -1301,7 +1301,6 @@ static int find_urls(const char *tweet, int **pranges) } ovector = pcre2_get_ovector_pointer(match_data); - fprintf(stderr,"Match succeeded at offset %d to %d\n", (int)ovector[0], (int)ovector[1]); for (i = 0; i < rc; i += 2) { if ((rcount+2) == rbound) { @@ -1319,8 +1318,6 @@ static int find_urls(const char *tweet, int **pranges) //pcre2_match_data_free(re); pcre2_code_free(re); - for (int i=0; i