From 821a7e0ade5afcfa56417dca1d98be4d02a81255 Mon Sep 17 00:00:00 2001 From: Matthew J Mucklo Date: Wed, 4 Feb 2026 00:20:25 -0800 Subject: [PATCH 1/4] back port separator changes --- .scrutinizer.yml | 29 ++++-- README.md | 58 +++++++++++- src/Parse.php | 26 ++--- src/ParseOptions.php | 62 +++++++++++- tests/ParseTest.php | 10 +- tests/testspec.yml | 220 ++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 372 insertions(+), 33 deletions(-) diff --git a/.scrutinizer.yml b/.scrutinizer.yml index 2cb12c9..53d3598 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -1,20 +1,29 @@ # .scrutinizer.yml +filter: + paths: + - 'src/*' + excluded_paths: + - 'tests/*' + - 'vendor/*' + checks: php: code_rating: true duplication: true + build: image: default-jammy nodes: analysis: - environment: - php: - version: 8.2 - tests: - override: - - - command: 'XDEBUG_MODE=coverage bin/phpunit --coverage-clover=.coverage' - coverage: - file: '.coverage' - format: 'clover' + tests: + override: + - php-scrutinizer-run + tests: + tests: + override: + - + command: 'XDEBUG_MODE=coverage bin/phpunit --coverage-clover=.coverage' + coverage: + file: '.coverage' + format: 'clover' diff --git a/README.md b/README.md index 19d0338..650382d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ email-parse =========== +[![Support on Patreon](https://img.shields.io/badge/Patreon-Support%20Me-f96854?logo=patreon)](https://www.patreon.com/cw/MatthewJMucklo) + [![CI](https://github.com/mmucklo/email-parse/workflows/CI/badge.svg)](https://github.com/mmucklo/email-parse/actions) [![codecov](https://codecov.io/gh/mmucklo/email-parse/branch/master/graph/badge.svg)](https://codecov.io/gh/mmucklo/email-parse) [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/mmucklo/email-parse/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/mmucklo/email-parse/?branch=master) @@ -11,7 +13,7 @@ email-parse Email\Parse is a multiple (and single) batch email address parser that is reasonably RFC822 / RFC2822 compliant. -It parses a list of 1 to n email addresses separated by space or comma +It parses a list of 1 to n email addresses separated by space, comma, or semicolon (configurable). Installation: ------------- @@ -27,12 +29,64 @@ Add this line to your composer.json "require" section: Usage: ------ +### Basic Usage + ```php use Email\Parse; $result = Parse::getInstance()->parse("a@aaa.com b@bbb.com"); ``` +### Advanced Usage with ParseOptions + +You can configure separator behavior and other parsing options using `ParseOptions`: + +```php +use Email\Parse; +use Email\ParseOptions; + +// Example 1: Use comma and semicolon as separators (default behavior includes whitespace) +$options = new ParseOptions([], [',', ';']); +$parser = new Parse(null, $options); +$result = $parser->parse("a@aaa.com; b@bbb.com, c@ccc.com"); + +// Example 2: Disable whitespace as separator (only comma and semicolon work) +$options = new ParseOptions([], [',', ';'], false); +$parser = new Parse(null, $options); +$result = $parser->parse("a@aaa.com; b@bbb.com"); // Works - uses semicolon +$result = $parser->parse("a@aaa.com b@bbb.com"); // Won't split - whitespace not a separator + +// Example 3: Names with spaces always work regardless of whitespace separator setting +$options = new ParseOptions([], [',', ';'], false); +$parser = new Parse(null, $options); +$result = $parser->parse("John Doe , Jane Smith "); +// Returns 2 valid emails with names preserved +``` + +#### ParseOptions Constructor + +```php +/** + * @param array $bannedChars Array of characters to ban from email addresses (e.g., ['%', '!']) + * @param array $separators Array of separator characters (default: [',']) + * @param bool $useWhitespaceAsSeparator Whether to treat whitespace/newlines as separators (default: true) + */ +public function __construct( + array $bannedChars = [], + array $separators = [','], + bool $useWhitespaceAsSeparator = true +) +``` + +#### Supported Separators + +- **Comma (`,`)** - Configured via `$separators` parameter +- **Semicolon (`;`)** - Configured via `$separators` parameter +- **Whitespace (space, tab, newlines)** - Controlled by `$useWhitespaceAsSeparator` parameter +- **Mixed separators** - All configured separators work together seamlessly + +**Note:** When `useWhitespaceAsSeparator` is `false`, whitespace is still properly cleaned up and names with spaces (like "John Doe") continue to work correctly. + Notes: ====== This should be RFC 2822 compliant, although it will let a few obsolete RFC 822 addresses through such as `test"test"test@xyz.com` (note the quoted string in the middle of the address, which may be obsolete as of RFC 2822). However it wont allow escaping outside of quotes such as `test@test@xyz.com`. This would have to be written as `"test@test"@xyz.com` @@ -51,7 +105,7 @@ how-about-comments(this is a comment!!)@xyz.com ```php /** * function parse($emails, $multiple = true, $encoding = 'UTF-8') - * @param string $emails List of Email addresses separated by comma or space if multiple + * @param string $emails List of Email addresses separated by configured separators (comma, semicolon, whitespace by default) * @param bool $multiple (optional, default: true) Whether to parse for multiple email addresses or not * @param string $encoding (optional, default: 'UTF-8')The encoding if not 'UTF-8' * @return: see below: */ diff --git a/src/Parse.php b/src/Parse.php index 85fa25a..d8bb6f3 100644 --- a/src/Parse.php +++ b/src/Parse.php @@ -292,12 +292,10 @@ public function parse($emails, $multiple = true, $encoding = 'UTF-8') case self::STATE_SKIP_AHEAD: // Skip ahead is set when a bad email address is encountered // It's supposed to skip to the next delimiter and continue parsing from there - if ($multiple && - (' ' == $curChar || - "\r" == $curChar || - "\n" == $curChar || - "\t" == $curChar || - ',' == $curChar)) { + $isWhitespaceSeparator = $this->options->getUseWhitespaceAsSeparator() && + (' ' == $curChar || "\r" == $curChar || "\n" == $curChar || "\t" == $curChar); + + if ($multiple && ($isWhitespaceSeparator || isset($this->options->getSeparators()[$curChar]))) { $state = self::STATE_END_ADDRESS; } else { $emailAddress['original_address'] .= $curChar; @@ -329,7 +327,7 @@ public function parse($emails, $multiple = true, $encoding = 'UTF-8') // Fall through // no break case self::STATE_ADDRESS: - if (',' != $curChar || !$multiple) { + if (!isset($this->options->getSeparators()[$curChar]) || !$multiple) { $emailAddress['original_address'] .= $curChar; } @@ -339,8 +337,8 @@ public function parse($emails, $multiple = true, $encoding = 'UTF-8') $commentNestLevel = 1; break; - } elseif (',' == $curChar) { - // Handle Comma + } elseif (isset($this->options->getSeparators()[$curChar])) { + // Handle separator (comma, semicolon, etc.) if ($multiple && (self::STATE_DOMAIN == $subState || self::STATE_AFTER_DOMAIN == $subState)) { // If we're already in the domain part, this should be the end of the address $state = self::STATE_END_ADDRESS; @@ -349,9 +347,9 @@ public function parse($emails, $multiple = true, $encoding = 'UTF-8') } else { $emailAddress['invalid'] = true; if ($multiple || ($i + 5) >= $len) { - $emailAddress['invalid_reason'] = 'Misplaced Comma or missing "@" symbol'; + $emailAddress['invalid_reason'] = 'Misplaced separator or missing "@" symbol'; } else { - $emailAddress['invalid_reason'] = 'Comma not permitted - only one email address allowed'; + $emailAddress['invalid_reason'] = 'Separator not permitted - only one email address allowed'; } } } elseif (' ' == $curChar || @@ -382,8 +380,10 @@ public function parse($emails, $multiple = true, $encoding = 'UTF-8') $emailAddress['invalid'] = true; $emailAddress['invalid_reason'] = 'Email Address contains whitespace'; } - } elseif (self::STATE_DOMAIN == $subState || self::STATE_AFTER_DOMAIN == $subState) { - // If we're already in the domain part, this should be the end of the whole address + } elseif ($this->options->getUseWhitespaceAsSeparator() && + (self::STATE_DOMAIN == $subState || self::STATE_AFTER_DOMAIN == $subState)) { + // If we're already in the domain part and whitespace is a separator, + // this should be the end of the whole address $state = self::STATE_END_ADDRESS; break; diff --git a/src/ParseOptions.php b/src/ParseOptions.php index 3c7139c..2ec5a1d 100644 --- a/src/ParseOptions.php +++ b/src/ParseOptions.php @@ -5,19 +5,36 @@ class ParseOptions { /** - * @var array + * @var array */ private $bannedChars = []; - public function __construct(array $bannedChars = []) + /** + * @var array + */ + private $separators = []; + + /** + * @var bool + */ + private $useWhitespaceAsSeparator = true; + + /** + * @param array $bannedChars + * @param array $separators + * @param bool $useWhitespaceAsSeparator + */ + public function __construct(array $bannedChars = [], array $separators = [','], $useWhitespaceAsSeparator = true) { if ($bannedChars) { $this->setBannedChars($bannedChars); } + $this->setSeparators($separators); + $this->useWhitespaceAsSeparator = $useWhitespaceAsSeparator; } /** - * @param array $bannedChars + * @param array $bannedChars * @return void */ public function setBannedChars(array $bannedChars) @@ -29,10 +46,47 @@ public function setBannedChars(array $bannedChars) } /** - * @return array + * @return array */ public function getBannedChars() { return $this->bannedChars; } + + /** + * @param array $separators + * @return void + */ + public function setSeparators(array $separators) + { + $this->separators = []; + foreach ($separators as $separator) { + $this->separators[$separator] = true; + } + } + + /** + * @return array + */ + public function getSeparators() + { + return $this->separators; + } + + /** + * @param bool $useWhitespaceAsSeparator + * @return void + */ + public function setUseWhitespaceAsSeparator($useWhitespaceAsSeparator) + { + $this->useWhitespaceAsSeparator = $useWhitespaceAsSeparator; + } + + /** + * @return bool + */ + public function getUseWhitespaceAsSeparator() + { + return $this->useWhitespaceAsSeparator; + } } diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 549dc60..28ce1a3 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -6,6 +6,7 @@ require_once __DIR__.'/../src/Parse.php'; use Email\Parse; +use Email\ParseOptions; class ParseTest extends \PHPUnit\Framework\TestCase { @@ -18,7 +19,14 @@ public function testParseEmailAddresses() $multiple = $test['multiple']; $result = $test['result']; - $this->assertSame($result, Parse::getInstance()->parse($emails, $multiple)); + // Check if test specifies use_whitespace_as_separator option + $useWhitespaceAsSeparator = $test['use_whitespace_as_separator'] ?? true; + + // Configure Parse to support both comma and semicolon as separators + $options = new ParseOptions(['%', '!'], [',', ';'], $useWhitespaceAsSeparator); + $parser = new Parse(null, $options); + + $this->assertSame($result, $parser->parse($emails, $multiple)); } } } diff --git a/tests/testspec.yml b/tests/testspec.yml index 8f0e779..52d7285 100644 --- a/tests/testspec.yml +++ b/tests/testspec.yml @@ -274,7 +274,7 @@ domain: asdf.ghjkl.com ip: '' invalid: true - invalid_reason: 'Comma not permitted - only one email address allowed' + invalid_reason: 'Separator not permitted - only one email address allowed' - emails: 'tnam e@asdf.g asdfa hjkl.com, tn''''''ame@asdf.ghjkl.com, tname-test1@asdf.ghjkl.com' multiple: true @@ -307,7 +307,7 @@ domain: '' ip: '' invalid: true - invalid_reason: 'Misplaced Comma or missing "@" symbol' + invalid_reason: 'Misplaced separator or missing "@" symbol' - address: 'tn''''''ame@asdf.ghjkl.com' simple_address: 'tn''''''ame@asdf.ghjkl.com' @@ -1048,7 +1048,7 @@ domain: '' ip: 'IPv6:2001:4860:4860::8888' invalid: true - invalid_reason: 'Comma not permitted - only one email address allowed' + invalid_reason: 'Separator not permitted - only one email address allowed' - emails: 'testing@[0.0.0.0]' multiple: false @@ -1501,3 +1501,217 @@ ip: '' invalid: false invalid_reason: null +- + emails: 'test1@example.com test2@example.com' + multiple: true + use_whitespace_as_separator: false + result: + success: false + reason: 'Invalid email address' + email_addresses: + - + address: '' + simple_address: '' + original_address: 'test1@example.com test2@example.com' + name: '' + name_parsed: '' + local_part: test1 + local_part_parsed: test1 + domain_part: example.comtest2 + domain: example.comtest2 + ip: '' + invalid: true + invalid_reason: 'Multiple at ''@'' symbols in email address' +- + emails: 'test1@example.com; test2@example.com' + multiple: true + use_whitespace_as_separator: false + result: + success: true + reason: null + email_addresses: + - + address: test1@example.com + simple_address: test1@example.com + original_address: test1@example.com + name: '' + name_parsed: '' + local_part: test1 + local_part_parsed: test1 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test2@example.com + simple_address: test2@example.com + original_address: test2@example.com + name: '' + name_parsed: '' + local_part: test2 + local_part_parsed: test2 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: 'John Doe , Jane Smith ' + multiple: true + use_whitespace_as_separator: false + result: + success: true + reason: null + email_addresses: + - + address: 'John Doe ' + simple_address: john@example.com + original_address: 'John Doe ' + name: 'John Doe' + name_parsed: 'John Doe' + local_part: john + local_part_parsed: john + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: 'Jane Smith ' + simple_address: jane@example.com + original_address: 'Jane Smith ' + name: 'Jane Smith' + name_parsed: 'Jane Smith' + local_part: jane + local_part_parsed: jane + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: 'test1@example.com; test2@example.com; test3@example.org' + multiple: true + result: + success: true + reason: null + email_addresses: + - + address: test1@example.com + simple_address: test1@example.com + original_address: test1@example.com + name: '' + name_parsed: '' + local_part: test1 + local_part_parsed: test1 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test2@example.com + simple_address: test2@example.com + original_address: test2@example.com + name: '' + name_parsed: '' + local_part: test2 + local_part_parsed: test2 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test3@example.org + simple_address: test3@example.org + original_address: test3@example.org + name: '' + name_parsed: '' + local_part: test3 + local_part_parsed: test3 + domain_part: example.org + domain: example.org + ip: '' + invalid: false + invalid_reason: null +- + emails: 'John Doe ; Jane Smith ' + multiple: true + result: + success: true + reason: null + email_addresses: + - + address: 'John Doe ' + simple_address: john@example.com + original_address: 'John Doe ' + name: 'John Doe' + name_parsed: 'John Doe' + local_part: john + local_part_parsed: john + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: 'Jane Smith ' + simple_address: jane@example.com + original_address: 'Jane Smith ' + name: 'Jane Smith' + name_parsed: 'Jane Smith' + local_part: jane + local_part_parsed: jane + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: 'test1@example.com, test2@example.com; test3@example.org' + multiple: true + result: + success: true + reason: null + email_addresses: + - + address: test1@example.com + simple_address: test1@example.com + original_address: test1@example.com + name: '' + name_parsed: '' + local_part: test1 + local_part_parsed: test1 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test2@example.com + simple_address: test2@example.com + original_address: test2@example.com + name: '' + name_parsed: '' + local_part: test2 + local_part_parsed: test2 + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: test3@example.org + simple_address: test3@example.org + original_address: test3@example.org + name: '' + name_parsed: '' + local_part: test3 + local_part_parsed: test3 + domain_part: example.org + domain: example.org + ip: '' + invalid: false + invalid_reason: null From 8aecc6d86aa0af376e028718ee8bb82ab89e586b Mon Sep 17 00:00:00 2001 From: Matthew J Mucklo Date: Fri, 6 Feb 2026 23:26:00 -0800 Subject: [PATCH 2/4] Add exclusive separator and RFC 5322 quoted name tests - Add 7 new test cases for exclusive separator behavior - Test comma-only separator (semicolon doesn't work as separator) - Test semicolon-only separator (comma doesn't work as separator) - Test RFC 5322 compliant quoted display names with commas - Test unquoted display names with commas (correctly fails) - Add 'separators' configuration option to testspec.yml - Update ParseTest.php to read custom separator configurations from tests - All 92 tests passing (85 original + 7 new) --- tests/ParseTest.php | 7 +- tests/testspec.yml | 198 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+), 2 deletions(-) diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 28ce1a3..523f649 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -22,8 +22,11 @@ public function testParseEmailAddresses() // Check if test specifies use_whitespace_as_separator option $useWhitespaceAsSeparator = $test['use_whitespace_as_separator'] ?? true; - // Configure Parse to support both comma and semicolon as separators - $options = new ParseOptions(['%', '!'], [',', ';'], $useWhitespaceAsSeparator); + // Check if test specifies custom separators + $separators = $test['separators'] ?? [',', ';']; + + // Configure Parse to support configured separators + $options = new ParseOptions(['%', '!'], $separators, $useWhitespaceAsSeparator); $parser = new Parse(null, $options); $this->assertSame($result, $parser->parse($emails, $multiple)); diff --git a/tests/testspec.yml b/tests/testspec.yml index 52d7285..f848c7f 100644 --- a/tests/testspec.yml +++ b/tests/testspec.yml @@ -1715,3 +1715,201 @@ ip: '' invalid: false invalid_reason: null +- + emails: 'john@example.com;jane@example.com' + multiple: true + separators: [','] + use_whitespace_as_separator: false + result: + success: false + reason: 'Invalid email address' + email_addresses: + - + address: '' + simple_address: '' + original_address: 'john@example.com;jane@example.com' + name: '' + name_parsed: '' + local_part: john + local_part_parsed: john + domain_part: 'example.com' + domain: 'example.com' + ip: '' + invalid: true + invalid_reason: 'Invalid character found in domain of email address (please put in quotes if needed): '';''' +- + emails: 'john@example.com, jane@example.com' + multiple: true + separators: [';'] + use_whitespace_as_separator: false + result: + success: false + reason: 'Invalid email address' + email_addresses: + - + address: '' + simple_address: '' + original_address: 'john@example.com, jane@example.com' + name: '' + name_parsed: '' + local_part: john + local_part_parsed: john + domain_part: 'example.com' + domain: 'example.com' + ip: '' + invalid: true + invalid_reason: 'Invalid character found in domain of email address (please put in quotes if needed): '',''' +- + emails: 'john@example.com, jane@example.com' + multiple: true + separators: [','] + result: + success: true + reason: null + email_addresses: + - + address: john@example.com + simple_address: john@example.com + original_address: john@example.com + name: '' + name_parsed: '' + local_part: john + local_part_parsed: john + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: jane@example.com + simple_address: jane@example.com + original_address: jane@example.com + name: '' + name_parsed: '' + local_part: jane + local_part_parsed: jane + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: 'john@example.com; jane@example.com' + multiple: true + separators: [';'] + result: + success: true + reason: null + email_addresses: + - + address: john@example.com + simple_address: john@example.com + original_address: john@example.com + name: '' + name_parsed: '' + local_part: john + local_part_parsed: john + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: jane@example.com + simple_address: jane@example.com + original_address: jane@example.com + name: '' + name_parsed: '' + local_part: jane + local_part_parsed: jane + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: '"Smith, John" ' + multiple: true + result: + success: true + reason: null + email_addresses: + - + address: '"Smith, John" ' + simple_address: john@example.com + original_address: '"Smith, John" ' + name: '"Smith, John"' + name_parsed: 'Smith, John' + local_part: john + local_part_parsed: john + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: '"Smith, John" , "Doe, Jane" ' + multiple: true + result: + success: true + reason: null + email_addresses: + - + address: '"Smith, John" ' + simple_address: john@example.com + original_address: '"Smith, John" ' + name: '"Smith, John"' + name_parsed: 'Smith, John' + local_part: john + local_part_parsed: john + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null + - + address: '"Doe, Jane" ' + simple_address: jane@example.com + original_address: '"Doe, Jane" ' + name: '"Doe, Jane"' + name_parsed: 'Doe, Jane' + local_part: jane + local_part_parsed: jane + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null +- + emails: 'Smith, John ' + multiple: true + result: + success: false + reason: 'Invalid email address' + email_addresses: + - + address: '' + simple_address: '' + original_address: Smith + name: '' + name_parsed: '' + local_part: '' + local_part_parsed: '' + domain_part: '' + domain: '' + ip: '' + invalid: true + invalid_reason: 'Misplaced separator or missing "@" symbol' + - + address: 'John ' + simple_address: john@example.com + original_address: 'John ' + name: John + name_parsed: John + local_part: john + local_part_parsed: john + domain_part: example.com + domain: example.com + ip: '' + invalid: false + invalid_reason: null From 053d3ad3442421cebd1dc7c71f60cccd29568b6c Mon Sep 17 00:00:00 2001 From: Matthew J Mucklo Date: Fri, 6 Feb 2026 23:47:13 -0800 Subject: [PATCH 3/4] Enable CI for 7.1 branch --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8e19df6..99fc687 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ name: CI on: push: - branches: [ main, master, develop ] + branches: [ main, master, develop, 7.1 ] pull_request: branches: [ main, master, develop ] From cda822a483c59bea8f232b790fec3b040615b07c Mon Sep 17 00:00:00 2001 From: Matthew J Mucklo Date: Sat, 7 Feb 2026 01:34:49 -0800 Subject: [PATCH 4/4] add 2.2 branch to ci --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 99fc687..175c854 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI on: push: - branches: [ main, master, develop, 7.1 ] + branches: [ main, master, develop, 2.2 ] pull_request: - branches: [ main, master, develop ] + branches: [ main, master, develop, 2.2 ] jobs: tests: