From 5f674b2cfbeb229b2c8d15abceb0950d40c40ad7 Mon Sep 17 00:00:00 2001
From: benjessup-samy <ben.jessup@samy.com>
Date: Mon, 2 Feb 2026 11:47:49 +0000
Subject: [PATCH 1/6] phone tagging function

creates a flag column for whether a post contains a phone number, can also replace phone numbers with text.

Aggressive mode (default) tags consequetive digits (7-15) too.
---
 DESCRIPTION                          |   2 +-
 NAMESPACE                            |   1 +
 R/limpiar_phones.R                   | 155 +++++++++++++++++++++++++++
 README.Rmd                           |   1 +
 README.md                            |   1 +
 _pkgdown.yml                         |   1 +
 man/limpiar_phones.Rd                |  75 +++++++++++++
 tests/testthat/test-limpiar_phones.R | 144 +++++++++++++++++++++++++
 8 files changed, 379 insertions(+), 1 deletion(-)
 create mode 100644 R/limpiar_phones.R
 create mode 100644 man/limpiar_phones.Rd
 create mode 100644 tests/testthat/test-limpiar_phones.R

diff --git a/DESCRIPTION b/DESCRIPTION
index ae79cb3..da6b643 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -13,7 +13,7 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Imports:
     dplyr (>= 1.0.10),
     rlang (>= 1.0.6),
diff --git a/NAMESPACE b/NAMESPACE
index 86d390d..81c758f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -11,6 +11,7 @@ export(limpiar_link_click)
 export(limpiar_link_click_reverse)
 export(limpiar_na_cols)
 export(limpiar_non_ascii)
+export(limpiar_phones)
 export(limpiar_pos_annotate)
 export(limpiar_pos_import_model)
 export(limpiar_pp_companies)
diff --git a/R/limpiar_phones.R b/R/limpiar_phones.R
new file mode 100644
index 0000000..7e3d7b1
--- /dev/null
+++ b/R/limpiar_phones.R
@@ -0,0 +1,155 @@
+#' Clean phone numbers from text
+#'
+#' Function creates a flag column for posts containing phone numbers. 
+#' Catches various phone number formats, i.e US, UK, European etc.
+#' By default the function only replaces phone numbers in a recognised format.
+#' Can also be set to be more aggressive and catch plain digit sequences (7-15 digits).
+#' Can also be set to replace phone_numbers with a string.
+#'
+#' @param df Name of DataFrame or Tibble object
+#' @param text_var Name of text variable/character vector
+#' @param aggressive Bool: If TRUE, also catches plain digit sequences (7-15 digits)
+#' @param tag String: Default = "None", if supplied replaces phone numbers with string
+#'
+#' @return The DataFrame or Tibble object with phone number flag column
+#'
+#' @details
+#' Matches:
+#' \itemize{
+#'   \item International: +1 555-123-4567, +44 20 1234 5678
+#'   \item US/Canada: (555) 123-4567, 555-123-4567
+#'   \item UK: 07951 902 146, 01786 475545
+#'   \item European: 77 54 33 33
+#'   \item Latin American: 4782-0699
+#'   \item Local: 555-1234
+#' }
+#' Also matches when aggressive = TRUE:
+#' \itemize{
+#'   \item Plain digits: 07546104638, 1234567890
+#' }
+#'
+#' Avoids matching:
+#' \itemize{
+#'   \item 09:00-17:00
+#'   \item 192.168.1.1
+#'   \item $1,234,567
+#'   \item 1,000,000,000
+#'   \item 1995-2025
+#' }
+#'
+#' @examples
+#' # Example data
+#' phone_examples <- tibble::tibble(
+#'   id = 1:5,
+#'   text_var = c(
+#'     "Call me at 555-123-4567 or (555) 123-4568",
+#'     "WhatsApp +44 20 1234 5678",
+#'     "Contact: 07506308688",
+#'     "Meeting at 09:00-17:00, call 4782-0699",
+#'     "I earned £100,000,000 between 1995-2025"
+#'   )
+#' )
+#'
+#' # Default example
+#' phone_examples %>% 
+#'   limpiar_phones(text_var = text_var) %>% 
+#'   dplyr::select(text_var)
+#'
+#' # More aggressive version, catching sequences of digits between 7-15 in length
+#' phone_examples %>% 
+#'   limpiar_phones(text_var = text_var, aggressive = TRUE) %>% 
+#'   dplyr::select(text_var)
+#'
+#' @export
+#'
+limpiar_phones <- function(df, 
+                                   text_var = mention_content, 
+                                   aggressive = TRUE,
+                                   tag = "None") {
+  
+  # handle both quoted and unquoted column names
+  text_var <- rlang::ensym(text_var)
+
+  # check text var is correct type
+  col_data <- dplyr::pull(df, {{ text_var }})
+  
+  if (!is.character(col_data)) {
+    stop("Parameter 'text_var' must be a character vector (string type), but got type: ",
+         class(col_data)[1])
+  }
+
+  # phone number format patterns
+  patterns <- c(
+    # international with + prefix
+    # matches +[1-3 digits][7-12 more digits with optional separators]
+    "\\+\\d{1,3}(?:[\\s.-]?\\d){7,12}",
+    
+    # us/canada format with parentheses
+    # matches (XXX) XXX-XXXX or (XXX) XXX XXXX
+    "\\(\\d{3}\\)[\\s.-]?\\d{3}[\\s.-]?\\d{4}",
+    
+    # standard 10-digit with separators
+    # matches XXX-XXX-XXXX or XXX XXX XXXX or XXX.XXX.XXXX
+    "\\d{3}[\\s.-]\\d{3}[\\s.-]\\d{4}",
+    
+    # uk format (5-3-3)
+    # matches 0XXXX XXX XXX
+    "0\\d{4}\\s\\d{3}\\s\\d{3,6}",
+    
+    # uk format (5-6 with single space)
+    # matches 0XXXX XXXXXX
+    "0\\d{4}\\s\\d{6}",
+    
+    # european short format (8 digits with spaces)
+    # matches XX XX XX XX (exactly 8 digits)
+    "(?<!\\d)\\d{2}\\s\\d{2}\\s\\d{2}\\s\\d{2}(?!\\d)",
+    
+    # 8-digit format with hyphen (4-4 pattern)
+    # matches XXXX-XXXX where first digit is 3-9 (avoids year ranges like 1995-2025)
+    "(?<!\\d)[3-9]\\d{3}-\\d{4}(?!\\d)",
+    
+    # 7-digit local format with separator
+    # matches XXX-XXXX or XXX XXXX
+    "(?<!\\d)\\d{3}[\\s.-]\\d{4}(?!\\d)"
+  )
+
+  # currency symbols
+  currency_symbols <- "\\u00a3\\u20ac\\u00a5"
+  
+  # add aggressive pattern if enabled
+  if (aggressive) {
+    aggressive_pattern <- paste0(
+      "(?<![a-zA-Z.,$/", currency_symbols, "\\d])",
+      "\\d{7,15}",
+      "(?![a-zA-Z.,\\d])"
+    )
+    patterns <- c(patterns, aggressive_pattern)
+  }
+  
+  # combine patterns with negative lookarounds
+  full_pattern <- paste0(
+    "(?<!\\d)",  # not preceded by digit
+    "(?:", 
+    paste(patterns, collapse = "|"),  # combining format patterns
+    ")",
+    "(?![:/=?&\\d-])"   # not followed by colon, digit, or dash
+  )
+  
+  # output
+  if (tag == "None") {
+    # if tag not changed from default value, only create flag column
+    df <- df %>%
+      dplyr::mutate(
+        phone_number_flag = stringr::str_detect(!!text_var, full_pattern)
+      )
+  } else {
+    # create flag column and replace phone numbers with tag
+    df <- df %>%
+      dplyr::mutate(
+        phone_number_flag = stringr::str_detect(!!text_var, full_pattern),
+        !!text_var := stringr::str_replace_all(!!text_var, full_pattern, tag)
+      )
+  }
+  
+  return(df)
+}
\ No newline at end of file
diff --git a/README.Rmd b/README.Rmd
index 86fc087..788a985 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -65,6 +65,7 @@ Functions for removing unwanted posts entirely (rather than cleaning).
 | [limpiar_duplicates](https://jpcompartir.github.io/LimpiaR/reference/limpiar_duplicates.html) | Removes duplicate content | Language-agnostic | Data cleaning | Also removes protected content |
 | [limpiar_retweets](https://jpcompartir.github.io/LimpiaR/reference/limpiar_retweets.html) | Removes retweet content | Language-agnostic | Social media cleaning | Identifies RT patterns |
 | [limpiar_spam_grams](https://jpcompartir.github.io/LimpiaR/reference/limpiar_spam_grams.html) | Removes spam-like patterns | Language-agnostic | Content filtering | Uses n-gram analysis |
+| [limpiar_phones](https://jpcompartir.github.io/LimpiaR/reference/limpiar_phones.html) | Tags phone numbers in posts | Language-agnostic | Content filtering | For removing spam/advertising |
 
 ## Utility
 
diff --git a/README.md b/README.md
index 45e7b56..ffaaab3 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,7 @@ Functions for removing unwanted posts entirely (rather than cleaning).
 | [limpiar_duplicates](https://jpcompartir.github.io/LimpiaR/reference/limpiar_duplicates.html) | Removes duplicate content | Language-agnostic | Data cleaning | Also removes protected content |
 | [limpiar_retweets](https://jpcompartir.github.io/LimpiaR/reference/limpiar_retweets.html) | Removes retweet content | Language-agnostic | Social media cleaning | Identifies RT patterns |
 | [limpiar_spam_grams](https://jpcompartir.github.io/LimpiaR/reference/limpiar_spam_grams.html) | Removes spam-like patterns | Language-agnostic | Content filtering | Uses n-gram analysis |
+| [limpiar_phones](https://jpcompartir.github.io/LimpiaR/reference/limpiar_phones.html) | Tags phone numbers in posts | Language-agnostic | Content filtering | For removing spam/advertising |
 
 ## Utility
 
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 14f6566..70f99da 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -94,6 +94,7 @@ reference:
   - limpiar_duplicates
   - limpiar_retweets
   - limpiar_spam_grams
+  - limpiar_phones
 
 - title: Utility Functions
   desc: >
diff --git a/man/limpiar_phones.Rd b/man/limpiar_phones.Rd
new file mode 100644
index 0000000..354fb8d
--- /dev/null
+++ b/man/limpiar_phones.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/limpiar_phones.R
+\name{limpiar_phones}
+\alias{limpiar_phones}
+\title{Clean phone numbers from text}
+\usage{
+limpiar_phones(df, text_var = mention_content, aggressive = TRUE, tag = "None")
+}
+\arguments{
+\item{df}{Name of DataFrame or Tibble object}
+
+\item{text_var}{Name of text variable/character vector}
+
+\item{aggressive}{Bool: If TRUE, also catches plain digit sequences (7-15 digits)}
+
+\item{tag}{String: Default = "None", if supplied replaces phone numbers with string}
+}
+\value{
+The DataFrame or Tibble object with phone number flag column
+}
+\description{
+Function creates a flag column for posts containing phone numbers.
+Catches various phone number formats, i.e US, UK, European etc.
+By default the function only replaces phone numbers in a recognised format.
+Can also be set to be more aggressive and catch plain digit sequences (7-15 digits).
+Can also be set to replace phone_numbers with a string.
+}
+\details{
+Matches:
+\itemize{
+\item International: +1 555-123-4567, +44 20 1234 5678
+\item US/Canada: (555) 123-4567, 555-123-4567
+\item UK: 07951 902 146, 01786 475545
+\item European: 77 54 33 33
+\item Latin American: 4782-0699
+\item Local: 555-1234
+}
+Also matches when aggressive = TRUE:
+\itemize{
+\item Plain digits: 07546104638, 1234567890
+}
+
+Avoids matching:
+\itemize{
+\item 09:00-17:00
+\item 192.168.1.1
+\item $1,234,567
+\item 1,000,000,000
+\item 1995-2025
+}
+}
+\examples{
+# Example data
+phone_examples <- tibble::tibble(
+  id = 1:5,
+  text_var = c(
+    "Call me at 555-123-4567 or (555) 123-4568",
+    "WhatsApp +44 20 1234 5678",
+    "Contact: 07506308688",
+    "Meeting at 09:00-17:00, call 4782-0699",
+    "I earned £100,000,000 between 1995-2025"
+  )
+)
+
+# Default example
+phone_examples \%>\% 
+  limpiar_phones(text_var = text_var) \%>\% 
+  dplyr::select(text_var)
+
+# More aggressive version, catching sequences of digits between 7-15 in length
+phone_examples \%>\% 
+  limpiar_phones(text_var = text_var, aggressive = TRUE) \%>\% 
+  dplyr::select(text_var)
+
+}
diff --git a/tests/testthat/test-limpiar_phones.R b/tests/testthat/test-limpiar_phones.R
new file mode 100644
index 0000000..961bdc1
--- /dev/null
+++ b/tests/testthat/test-limpiar_phones.R
@@ -0,0 +1,144 @@
+input_example_1 <- tibble::tibble(
+   id = 1:5,
+   text_var = c(
+     "Call me at 555-123-4567 or (555) 123-4568",
+     "WhatsApp +44 20 1234 5678",
+     "Contact: 07506308688",
+     "Meeting at 09:00-17:00, call 4782-0699",
+     "I earned £100,000,000 between 1995-2025")
+)
+
+output_example_1a <- tibble::tibble(
+   id = 1:5,
+   text_var = c(
+     "Call me at phone_number or phone_number",
+     "WhatsApp phone_number",
+     "Contact: 07506308688",
+     "Meeting at 09:00-17:00, call phone_number",
+     "I earned £100,000,000 between 1995-2025"),
+   phone_number_flag = c(
+    TRUE,
+    TRUE,
+    FALSE,
+    TRUE,
+    FALSE
+   )
+)
+
+output_example_1b <- tibble::tibble(
+   id = 1:5,
+   text_var = c(
+     "Call me at phone_number or phone_number",
+     "WhatsApp phone_number",
+     "Contact: phone_number",
+     "Meeting at 09:00-17:00, call phone_number",
+     "I earned £100,000,000 between 1995-2025"),
+   phone_number_flag = c(
+    TRUE,
+    TRUE,
+    TRUE,
+    TRUE,
+    FALSE
+   )
+)
+
+output_example_1c <- tibble::tibble(
+   id = 1:5,
+   text_var = c(
+     "Call me at 555-123-4567 or (555) 123-4568",
+     "WhatsApp +44 20 1234 5678",
+     "Contact: 07506308688",
+     "Meeting at 09:00-17:00, call 4782-0699",
+     "I earned £100,000,000 between 1995-2025"),
+   phone_number_flag = c(
+    TRUE,
+    TRUE,
+    FALSE,
+    TRUE,
+    FALSE
+   )
+)
+
+output_example_1d <- tibble::tibble(
+   id = 1:5,
+   text_var = c(
+     "Call me at 555-123-4567 or (555) 123-4568",
+     "WhatsApp +44 20 1234 5678",
+     "Contact: 07506308688",
+     "Meeting at 09:00-17:00, call 4782-0699",
+     "I earned £100,000,000 between 1995-2025"),
+   phone_number_flag = c(
+    TRUE,
+    TRUE,
+    TRUE,
+    TRUE,
+    FALSE
+   )
+)
+
+# Tests
+test_that("Example test", {
+  # test example behaviour
+  output <- limpiar_phones(
+    input_example_1,
+    text_var = text_var,
+    aggressive = FALSE,
+    tag = "phone_number"
+  );
+  expect_equal(output, output_example_1a);
+})
+
+test_that("Agressive test", {
+  # test aggressive works as expected
+  output <- limpiar_phones(
+    input_example_1,
+    text_var = text_var,
+    aggressive = TRUE,
+    tag = "phone_number"
+  );
+  
+  # Test that the result is the correct value
+  expect_equal(output, output_example_1b);
+})
+
+test_that("No tag test", {
+  # Test that no-tag behaviour is expected
+  output <- limpiar_phones(
+    input_example_1,
+    text_var = text_var
+  );
+  expect_equal(output, output_example_1d);
+})
+
+test_that("Data input test", {
+  # Test that data exists
+  expect_error(limpiar_phones(
+    "string",
+    text_var = text_var,
+    aggressive = FALSE,
+    tag = "phone_number",
+    remove = FALSE
+  ));
+})
+
+test_that("text_var input test", {
+  # Test that text_var exists as a column
+  expect_error(limpiar_phones(
+    input_example_1,
+    text_var = "not_a_column",
+    aggressive = FALSE,
+    tag = "phone_number",
+    remove = FALSE
+  ));
+})
+
+test_that("Agressive input test", {
+  # Test that aggressive must be a bool
+  expect_error(limpiar_phones(
+    input_example_1,
+    text_var = text_var,
+    aggressive = "string",
+    tag = "phone_number",
+    remove = FALSE
+  ));
+})
\ No newline at end of file

From 0273fc3c41a62ecd8d103a019f4b4bc4060d7b63 Mon Sep 17 00:00:00 2001
From: benjessup-samy <ben.jessup@samy.com>
Date: Mon, 2 Feb 2026 12:19:19 +0000
Subject: [PATCH 2/6] Update test-limpiar_phones.R

added test for string input of text_var
---
 tests/testthat/test-limpiar_phones.R | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/testthat/test-limpiar_phones.R b/tests/testthat/test-limpiar_phones.R
index 961bdc1..3484446 100644
--- a/tests/testthat/test-limpiar_phones.R
+++ b/tests/testthat/test-limpiar_phones.R
@@ -141,4 +141,15 @@ test_that("Agressive input test", {
     tag = "phone_number",
     remove = FALSE
   ));
+})
+
+test_that("text input as string test", {
+  # test example behaviour
+  output <- limpiar_phones(
+    input_example_1,
+    text_var = "text_var",
+    aggressive = FALSE,
+    tag = "phone_number"
+  );
+  expect_equal(output, output_example_1a);
 })
\ No newline at end of file

From 1672a309c3e1cf9b617273b3e41adc8143c7d088 Mon Sep 17 00:00:00 2001
From: benjessup-samy <ben.jessup@samy.com>
Date: Mon, 2 Feb 2026 14:29:10 +0000
Subject: [PATCH 3/6] small fixes

fixed consistency of text_var references in phone function

fixed tests with reference to redundant variable and added regexp to expect_error tests
---
 R/limpiar_phones.R                   | 36 +++++++++++++++++++++++-----
 man/limpiar_phones.Rd                |  8 ++++++-
 tests/testthat/test-limpiar_phones.R | 15 +++++-------
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/R/limpiar_phones.R b/R/limpiar_phones.R
index 7e3d7b1..c330068 100644
--- a/R/limpiar_phones.R
+++ b/R/limpiar_phones.R
@@ -52,13 +52,19 @@
 #'
 #' # Default example
 #' phone_examples %>% 
-#'   limpiar_phones(text_var = text_var) %>% 
+#'   limpiar_phones(text_var = text_var, aggressive = FALSE) %>% 
 #'   dplyr::select(text_var)
 #'
 #' # More aggressive version, catching sequences of digits between 7-15 in length
 #' phone_examples %>% 
 #'   limpiar_phones(text_var = text_var, aggressive = TRUE) %>% 
 #'   dplyr::select(text_var)
+#' 
+#' # Filter out rows containing phone numbers
+#' phone_examples %>% 
+#'   limpiar_phones(text_var = text_var, aggressive = FALSE) %>% 
+#'   dplyr::filter(phone_number_flag == FALSE) %>% 
+#'   dplyr::select(id, text_var)
 #'
 #' @export
 #'
@@ -67,11 +73,29 @@ limpiar_phones <- function(df,
                                    aggressive = TRUE,
                                    tag = "None") {
   
+  # check data exists and is correct type
+  if (!is.data.frame(df)) {
+    stop("'df' must be a data.frame or tibble, but got type: ",
+         class(df)[1])
+  }
+
+  # check aggressive is logical
+  if (!is.logical(aggressive)) {
+    stop("Parameter 'aggressive' must be logical (TRUE/FALSE), but got type: ",
+         class(aggressive)[1])
+  }
+
+  # check tag exists and is a string
+  if (!is.character(tag) || length(tag) != 1) {
+    stop("Parameter 'tag' must be a single character string, but got type: ",
+        class(tag)[1])
+  }
+
   # handle both quoted and unquoted column names
-  text_var <- rlang::ensym(text_var)
+  text_sym <- rlang::ensym(text_var)
 
   # check text var is correct type
-  col_data <- dplyr::pull(df, {{ text_var }})
+  col_data <- dplyr::pull(df, !!text_sym)
   
   if (!is.character(col_data)) {
     stop("Parameter 'text_var' must be a character vector (string type), but got type: ",
@@ -140,14 +164,14 @@ limpiar_phones <- function(df,
     # if tag not changed from default value, only create flag column
     df <- df %>%
       dplyr::mutate(
-        phone_number_flag = stringr::str_detect(!!text_var, full_pattern)
+        phone_number_flag = stringr::str_detect(!!text_sym, full_pattern)
       )
   } else {
     # create flag column and replace phone numbers with tag
     df <- df %>%
       dplyr::mutate(
-        phone_number_flag = stringr::str_detect(!!text_var, full_pattern),
-        !!text_var := stringr::str_replace_all(!!text_var, full_pattern, tag)
+        phone_number_flag = stringr::str_detect(!!text_sym, full_pattern),
+        !!text_sym := stringr::str_replace_all(!!text_sym, full_pattern, tag)
       )
   }
   
diff --git a/man/limpiar_phones.Rd b/man/limpiar_phones.Rd
index 354fb8d..7bd8dae 100644
--- a/man/limpiar_phones.Rd
+++ b/man/limpiar_phones.Rd
@@ -64,7 +64,7 @@ phone_examples <- tibble::tibble(
 
 # Default example
 phone_examples \%>\% 
-  limpiar_phones(text_var = text_var) \%>\% 
+  limpiar_phones(text_var = text_var, aggressive = FALSE) \%>\% 
   dplyr::select(text_var)
 
 # More aggressive version, catching sequences of digits between 7-15 in length
@@ -72,4 +72,10 @@ phone_examples \%>\%
   limpiar_phones(text_var = text_var, aggressive = TRUE) \%>\% 
   dplyr::select(text_var)
 
+# Filter out rows containing phone numbers
+phone_examples \%>\% 
+  limpiar_phones(text_var = text_var, aggressive = FALSE) \%>\% 
+  dplyr::filter(phone_number_flag == FALSE) \%>\% 
+  dplyr::select(id, text_var)
+
 }
diff --git a/tests/testthat/test-limpiar_phones.R b/tests/testthat/test-limpiar_phones.R
index 3484446..a20a4b2 100644
--- a/tests/testthat/test-limpiar_phones.R
+++ b/tests/testthat/test-limpiar_phones.R
@@ -116,9 +116,8 @@ test_that("Data input test", {
     "string",
     text_var = text_var,
     aggressive = FALSE,
-    tag = "phone_number",
-    remove = FALSE
-  ));
+    tag = "phone_number"
+  ), regexp = "'df' must be a data.frame or tibble, but got type: character");
 })
 
 test_that("text_var input test", {
@@ -127,9 +126,8 @@ test_that("text_var input test", {
     input_example_1,
     text_var = "not_a_column",
     aggressive = FALSE,
-    tag = "phone_number",
-    remove = FALSE
-  ));
+    tag = "phone_number"
+  ), regexp = "object 'not_a_column' not found");
 })
 
 test_that("Agressive input test", {
@@ -138,9 +136,8 @@ test_that("Agressive input test", {
     input_example_1,
     text_var = text_var,
     aggressive = "string",
-    tag = "phone_number",
-    remove = FALSE
-  ));
+    tag = "phone_number"
+  ), regexp = "Parameter 'aggressive' must be logical");
 })
 
 test_that("text input as string test", {

From 073a3b998b0094a974a55edbfe93bd72be569d00 Mon Sep 17 00:00:00 2001
From: benjessup-samy <ben.jessup@samy.com>
Date: Mon, 2 Feb 2026 14:50:37 +0000
Subject: [PATCH 4/6] changing name to phone_numbers

limpiar_phones -> limpiar_phone_numbers
---
 NAMESPACE                                       |  2 +-
 R/limpiar_phones.R                              |  8 ++++----
 README.Rmd                                      |  2 +-
 README.md                                       |  2 +-
 _pkgdown.yml                                    |  2 +-
 ...mpiar_phones.Rd => limpiar_phone_numbers.Rd} | 17 +++++++++++------
 tests/testthat/test-limpiar_phones.R            | 14 +++++++-------
 7 files changed, 26 insertions(+), 21 deletions(-)
 rename man/{limpiar_phones.Rd => limpiar_phone_numbers.Rd} (84%)

diff --git a/NAMESPACE b/NAMESPACE
index 81c758f..44d282d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -11,7 +11,7 @@ export(limpiar_link_click)
 export(limpiar_link_click_reverse)
 export(limpiar_na_cols)
 export(limpiar_non_ascii)
-export(limpiar_phones)
+export(limpiar_phone_numbers)
 export(limpiar_pos_annotate)
 export(limpiar_pos_import_model)
 export(limpiar_pp_companies)
diff --git a/R/limpiar_phones.R b/R/limpiar_phones.R
index c330068..e7cd1ec 100644
--- a/R/limpiar_phones.R
+++ b/R/limpiar_phones.R
@@ -52,23 +52,23 @@
 #'
 #' # Default example
 #' phone_examples %>% 
-#'   limpiar_phones(text_var = text_var, aggressive = FALSE) %>% 
+#'   limpiar_phone_numbers(text_var = text_var, aggressive = FALSE) %>% 
 #'   dplyr::select(text_var)
 #'
 #' # More aggressive version, catching sequences of digits between 7-15 in length
 #' phone_examples %>% 
-#'   limpiar_phones(text_var = text_var, aggressive = TRUE) %>% 
+#'   limpiar_phone_numbers(text_var = text_var, aggressive = TRUE) %>% 
 #'   dplyr::select(text_var)
 #' 
 #' # Filter out rows containing phone numbers
 #' phone_examples %>% 
-#'   limpiar_phones(text_var = text_var, aggressive = FALSE) %>% 
+#'   limpiar_phone_numbers(text_var = text_var, aggressive = FALSE) %>% 
 #'   dplyr::filter(phone_number_flag == FALSE) %>% 
 #'   dplyr::select(id, text_var)
 #'
 #' @export
 #'
-limpiar_phones <- function(df, 
+limpiar_phone_numbers <- function(df, 
                                    text_var = mention_content, 
                                    aggressive = TRUE,
                                    tag = "None") {
diff --git a/README.Rmd b/README.Rmd
index 788a985..8e66faf 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -65,7 +65,7 @@ Functions for removing unwanted posts entirely (rather than cleaning).
 | [limpiar_duplicates](https://jpcompartir.github.io/LimpiaR/reference/limpiar_duplicates.html) | Removes duplicate content | Language-agnostic | Data cleaning | Also removes protected content |
 | [limpiar_retweets](https://jpcompartir.github.io/LimpiaR/reference/limpiar_retweets.html) | Removes retweet content | Language-agnostic | Social media cleaning | Identifies RT patterns |
 | [limpiar_spam_grams](https://jpcompartir.github.io/LimpiaR/reference/limpiar_spam_grams.html) | Removes spam-like patterns | Language-agnostic | Content filtering | Uses n-gram analysis |
-| [limpiar_phones](https://jpcompartir.github.io/LimpiaR/reference/limpiar_phones.html) | Tags phone numbers in posts | Language-agnostic | Content filtering | For removing spam/advertising |
+| [limpiar_phone_numbers](https://jpcompartir.github.io/LimpiaR/reference/limpiar_phones.html) | Tags phone numbers in posts | Language-agnostic | Content filtering | For removing spam/advertising |
 
 ## Utility
 
diff --git a/README.md b/README.md
index ffaaab3..c7700bb 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,7 @@ Functions for removing unwanted posts entirely (rather than cleaning).
 | [limpiar_duplicates](https://jpcompartir.github.io/LimpiaR/reference/limpiar_duplicates.html) | Removes duplicate content | Language-agnostic | Data cleaning | Also removes protected content |
 | [limpiar_retweets](https://jpcompartir.github.io/LimpiaR/reference/limpiar_retweets.html) | Removes retweet content | Language-agnostic | Social media cleaning | Identifies RT patterns |
 | [limpiar_spam_grams](https://jpcompartir.github.io/LimpiaR/reference/limpiar_spam_grams.html) | Removes spam-like patterns | Language-agnostic | Content filtering | Uses n-gram analysis |
-| [limpiar_phones](https://jpcompartir.github.io/LimpiaR/reference/limpiar_phones.html) | Tags phone numbers in posts | Language-agnostic | Content filtering | For removing spam/advertising |
+| [limpiar_phone_numbers](https://jpcompartir.github.io/LimpiaR/reference/limpiar_phones.html) | Tags phone numbers in posts | Language-agnostic | Content filtering | For removing spam/advertising |
 
 ## Utility
 
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 70f99da..a904fa4 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -94,7 +94,7 @@ reference:
   - limpiar_duplicates
   - limpiar_retweets
   - limpiar_spam_grams
-  - limpiar_phones
+  - limpiar_phone_numbers
 
 - title: Utility Functions
   desc: >
diff --git a/man/limpiar_phones.Rd b/man/limpiar_phone_numbers.Rd
similarity index 84%
rename from man/limpiar_phones.Rd
rename to man/limpiar_phone_numbers.Rd
index 7bd8dae..d974529 100644
--- a/man/limpiar_phones.Rd
+++ b/man/limpiar_phone_numbers.Rd
@@ -1,10 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/limpiar_phones.R
-\name{limpiar_phones}
-\alias{limpiar_phones}
+\name{limpiar_phone_numbers}
+\alias{limpiar_phone_numbers}
 \title{Clean phone numbers from text}
 \usage{
-limpiar_phones(df, text_var = mention_content, aggressive = TRUE, tag = "None")
+limpiar_phone_numbers(
+  df,
+  text_var = mention_content,
+  aggressive = TRUE,
+  tag = "None"
+)
 }
 \arguments{
 \item{df}{Name of DataFrame or Tibble object}
@@ -64,17 +69,17 @@ phone_examples <- tibble::tibble(
 
 # Default example
 phone_examples \%>\% 
-  limpiar_phones(text_var = text_var, aggressive = FALSE) \%>\% 
+  limpiar_phone_numbers(text_var = text_var, aggressive = FALSE) \%>\% 
   dplyr::select(text_var)
 
 # More aggressive version, catching sequences of digits between 7-15 in length
 phone_examples \%>\% 
-  limpiar_phones(text_var = text_var, aggressive = TRUE) \%>\% 
+  limpiar_phone_numbers(text_var = text_var, aggressive = TRUE) \%>\% 
   dplyr::select(text_var)
 
 # Filter out rows containing phone numbers
 phone_examples \%>\% 
-  limpiar_phones(text_var = text_var, aggressive = FALSE) \%>\% 
+  limpiar_phone_numbers(text_var = text_var, aggressive = FALSE) \%>\% 
   dplyr::filter(phone_number_flag == FALSE) \%>\% 
   dplyr::select(id, text_var)
 
diff --git a/tests/testthat/test-limpiar_phones.R b/tests/testthat/test-limpiar_phones.R
index a20a4b2..123d3b9 100644
--- a/tests/testthat/test-limpiar_phones.R
+++ b/tests/testthat/test-limpiar_phones.R
@@ -79,7 +79,7 @@ output_example_1d <- tibble::tibble(
 # Tests
 test_that("Example test", {
   # test example behaviour
-  output <- limpiar_phones(
+  output <- limpiar_phone_numbers(
     input_example_1,
     text_var = text_var,
     aggressive = FALSE,
@@ -90,7 +90,7 @@ test_that("Example test", {
 
 test_that("Agressive test", {
   # test aggressive works as expected
-  output <- limpiar_phones(
+  output <- limpiar_phone_numbers(
     input_example_1,
     text_var = text_var,
     aggressive = TRUE,
@@ -103,7 +103,7 @@ test_that("Agressive test", {
 
 test_that("No tag test", {
   # Test that no-tag behaviour is expected
-  output <- limpiar_phones(
+  output <- limpiar_phone_numbers(
     input_example_1,
     text_var = text_var
   );
@@ -112,7 +112,7 @@ test_that("No tag test", {
 
 test_that("Data input test", {
   # Test that data exists
-  expect_error(limpiar_phones(
+  expect_error(limpiar_phone_numbers(
     "string",
     text_var = text_var,
     aggressive = FALSE,
@@ -122,7 +122,7 @@ test_that("Data input test", {
 
 test_that("text_var input test", {
   # Test that text_var exists as a column
-  expect_error(limpiar_phones(
+  expect_error(limpiar_phone_numbers(
     input_example_1,
     text_var = "not_a_column",
     aggressive = FALSE,
@@ -132,7 +132,7 @@ test_that("text_var input test", {
 
 test_that("Agressive input test", {
   # Test that aggressive must be a bool
-  expect_error(limpiar_phones(
+  expect_error(limpiar_phone_numbers(
     input_example_1,
     text_var = text_var,
     aggressive = "string",
@@ -142,7 +142,7 @@ test_that("Agressive input test", {
 
 test_that("text input as string test", {
   # test example behaviour
-  output <- limpiar_phones(
+  output <- limpiar_phone_numbers(
     input_example_1,
     text_var = "text_var",
     aggressive = FALSE,

From 8a5155c77cbab3119c1da7002eb67b920e01e738 Mon Sep 17 00:00:00 2001
From: jpcompartir <jp1689@my.bristol.ac.uk>
Date: Mon, 2 Feb 2026 15:34:31 +0000
Subject: [PATCH 5/6] Bump version in DESCRIPTION , add item to NEWS.md, have
 pkgdown render the news sidebar automatically

---
 DESCRIPTION  | 2 +-
 NEWS.md      | 3 +++
 _pkgdown.yml | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index da6b643..8f11663 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: LimpiaR
 Title: LimpiaR
-Version: 1.1.1
+Version: 1.1.2
 Authors@R: 
     as.person(c(
     "Jack Penzer <jack.penzer@sharecreative.com> [cre]",
diff --git a/NEWS.md b/NEWS.md
index f86f7a9..5a8bacb 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,6 @@
+# LimpiaR 1.1.2
+- `limpiar_phone_numbers()` function introduced - flags documents that have phone numbers in the text, see package's function reference section and README for more information
+
 # LimpiaR 1.1.1
 
 -   Small fix for tests failing following a package change in the CI/CD VM
diff --git a/_pkgdown.yml b/_pkgdown.yml
index a904fa4..c541c75 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -120,3 +120,5 @@ repo:
 
 development:
   mode: auto
+  version_label: default
+  version_tooltip: "Version"

From a7ba33541b729a676c264c51f574f02c4f4f8229 Mon Sep 17 00:00:00 2001
From: jpcompartir <jp1689@my.bristol.ac.uk>
Date: Mon, 2 Feb 2026 15:38:36 +0000
Subject: [PATCH 6/6] add BEN as a contributor, update m email, update
 copyright holder

---
 DESCRIPTION | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8f11663..e637ce8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -3,11 +3,12 @@ Title: LimpiaR
 Version: 1.1.2
 Authors@R: 
     as.person(c(
-    "Jack Penzer <jack.penzer@sharecreative.com> [cre]",
+    "Jack Penzer <jack.penzer@samy.com> [cre]",
     "Tim Mooney <tim.mooney@sharecreative.com> [aut]",
-    "SHARE Creative [cph]"
+    "Ben Jessup <ben.jessup@samy.com> [aut]",
+    "SAMY[cph]"
     ))
-Description: SHARE & SAMY Group's dedicated pre-processing suite for Spanish & English data sets. A range of functions for getting text data ready to be analysed.
+Description: SAMY Data Science's dedicated pre-processing suite for Spanish & English data sets. A range of functions for getting text data ready to be analysed.
 URL: https://jpcompartir.github.io/LimpiaR
 License: MIT + file LICENSE
 Encoding: UTF-8