Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions ext/standard/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -4964,6 +4964,9 @@ PHP_FUNCTION(array_unique)
bucket_compare_func_t cmp;
struct bucketindex *arTmp, *cmpdata, *lastkept;
uint32_t i, idx;
zend_long num_key;
zend_string *str_key;
zval *val;

ZEND_PARSE_PARAMETERS_START(1, 2)
Z_PARAM_ARRAY(array)
Expand All @@ -4976,6 +4979,131 @@ PHP_FUNCTION(array_unique)
return;
}

if (sort_type == PHP_SORT_REGULAR) {
/* Hash-bucketing solution for SORT_REGULAR */
#define UNIQUE_HASH_BUCKETS 256

typedef struct {
zval **values;
uint32_t count;
uint32_t capacity;
} value_bucket;

value_bucket *buckets = ecalloc(UNIQUE_HASH_BUCKETS, sizeof(value_bucket));
cmp = php_get_data_compare_func_unstable(sort_type, 0);
array_init(return_value);

ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(array), num_key, str_key, val) {
/* Dereference if this is a reference */
zval *deref_val = val;
ZVAL_DEREF(deref_val);

/* Compute hash for this value */
zend_ulong hash = 0;

if (Z_TYPE_P(deref_val) == IS_LONG) {
hash = (zend_ulong)Z_LVAL_P(deref_val);
} else if (Z_TYPE_P(deref_val) == IS_DOUBLE) {
double dval = Z_DVAL_P(deref_val);
if (zend_isnan(dval) || zend_isinf(dval)) {
hash = 0; /* NaN and Inf hash to 0 */
} else {
hash = (zend_ulong)(zend_long)dval;
}
} else if (Z_TYPE_P(deref_val) == IS_TRUE) {
hash = 1; /* true hashes like integer 1 */
} else if (Z_TYPE_P(deref_val) == IS_FALSE) {
hash = 0; /* false hashes like integer 0 */
} else if (Z_TYPE_P(deref_val) == IS_NULL) {
hash = 0; /* null hashes like integer 0 */
} else if (Z_TYPE_P(deref_val) == IS_STRING) {
/* Check if numeric string */
zend_long lval;
double dval;
zend_uchar type = is_numeric_string(Z_STRVAL_P(deref_val), Z_STRLEN_P(deref_val), &lval, &dval, 0);

if (type == IS_LONG) {
hash = (zend_ulong)lval; /* '5' and '05' hash the same */
} else if (type == IS_DOUBLE) {
hash = (zend_ulong)dval;
} else {
/* Non-numeric string */
if (Z_STRLEN_P(deref_val) == 0) {
hash = 0; /* Empty string might equal false/null */
} else {
hash = zend_string_hash_val(Z_STR_P(deref_val));
}
}
} else if (Z_TYPE_P(deref_val) == IS_OBJECT) {
/* Hash objects by class name */
zend_class_entry *ce = Z_OBJCE_P(deref_val);
hash = zend_string_hash_val(ce->name);
} else if (Z_TYPE_P(deref_val) == IS_ARRAY) {
/* Hash arrays by size and first value */
hash = zend_hash_num_elements(Z_ARRVAL_P(deref_val));

/* XOR with hash of first element if it's a simple type */
zval *first_elem = zend_hash_get_current_data(Z_ARRVAL_P(deref_val));
if (first_elem) {
if (Z_TYPE_P(first_elem) == IS_LONG) {
hash ^= Z_LVAL_P(first_elem);
} else if (Z_TYPE_P(first_elem) == IS_STRING) {
hash ^= zend_string_hash_val(Z_STR_P(first_elem));
}
}
} else {
/* Other types */
hash = Z_TYPE_P(deref_val);
}

uint32_t bucket_idx = hash % UNIQUE_HASH_BUCKETS;
value_bucket *bucket = &buckets[bucket_idx];

/* Check if duplicate exists in this bucket */
bool is_duplicate = false;
for (uint32_t i = 0; i < bucket->count; i++) {
zval *existing_deref = bucket->values[i];
ZVAL_DEREF(existing_deref);
Bucket b1 = {.val = *deref_val}, b2 = {.val = *existing_deref};
if (cmp(&b1, &b2) == 0) {
is_duplicate = true;
break;
}
}

if (!is_duplicate) {
/* Add to bucket */
if (bucket->count >= bucket->capacity) {
bucket->capacity = bucket->capacity ? bucket->capacity * 2 : 4;
bucket->values = erealloc(bucket->values, bucket->capacity * sizeof(zval*));
}
bucket->values[bucket->count++] = val;

/* Add to result */
if (UNEXPECTED(Z_ISREF_P(val) && Z_REFCOUNT_P(val) == 1)) {
ZVAL_DEREF(val);
}
Z_TRY_ADDREF_P(val);

if (str_key) {
zend_hash_add_new(Z_ARRVAL_P(return_value), str_key, val);
} else {
zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, val);
}
}
} ZEND_HASH_FOREACH_END();

/* Cleanup buckets */
for (uint32_t i = 0; i < UNIQUE_HASH_BUCKETS; i++) {
if (buckets[i].values) {
efree(buckets[i].values);
}
}
efree(buckets);

return;
}

if (sort_type == PHP_SORT_STRING) {
HashTable seen;
zend_long num_key;
Expand Down
214 changes: 214 additions & 0 deletions ext/standard/tests/array/array_unique_variation_sort_regular.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
--TEST--
Test array_unique() function : SORT_REGULAR type coercion behavior
--FILE--
<?php
echo "*** Testing array_unique() with SORT_REGULAR ***\n";

// Test 1: Integer and string representations (coerce)
echo "\n-- Integer and string coercion --\n";
var_dump(array_unique([1, "1", 2, "2"], SORT_REGULAR));

// Test 2: Boolean coercion
echo "\n-- Boolean coercion --\n";
var_dump(array_unique([true, 1, false, 0], SORT_REGULAR));

// Test 3: NULL coercion with empty string and "0"
echo "\n-- NULL coercion --\n";
var_dump(array_unique([null, "", false, 0, "0"], SORT_REGULAR));

// Test 4: Float coercion
echo "\n-- Float coercion --\n";
var_dump(array_unique([1, 1.0, "1", "1.0"], SORT_REGULAR));

// Test 5: Numeric strings coerce
echo "\n-- Numeric strings --\n";
var_dump(array_unique(["10", 10, "10.0", 10.0], SORT_REGULAR));

// Test 6: Leading zeros make strings distinct
echo "\n-- Leading zeros --\n";
var_dump(array_unique(["05", "5", 5], SORT_REGULAR));

// Test 7: Partial numeric strings don't coerce
echo "\n-- Partial numeric strings --\n";
var_dump(array_unique(["5abc", "5", 5], SORT_REGULAR));

// Test 8: Whitespace in numeric strings
echo "\n-- Whitespace in numeric strings --\n";
var_dump(array_unique(["5", " 5", "5 ", 5], SORT_REGULAR));

// Test 9: Case sensitivity for non-numeric strings
echo "\n-- Case sensitivity --\n";
var_dump(array_unique(["abc", "ABC", "Abc"], SORT_REGULAR));

// Test 10: Exponential notation coerces
echo "\n-- Exponential notation --\n";
var_dump(array_unique([1000, "1e3", "1000", 1e3], SORT_REGULAR));

// Test 11: Negative numbers
echo "\n-- Negative numbers --\n";
var_dump(array_unique([-5, "-5", -5.0], SORT_REGULAR));

// Test 12: Arrays as values
echo "\n-- Arrays --\n";
var_dump(array_unique([[1, 2], [1, 2], [1, 3]], SORT_REGULAR));

// Test 13: NaN handling (NaN != NaN)
echo "\n-- NaN handling --\n";
var_dump(array_unique([NAN, NAN, 1], SORT_REGULAR));

// Test 14: INF handling
echo "\n-- INF handling --\n";
var_dump(array_unique([INF, INF, -INF, -INF], SORT_REGULAR));

// Test 15: Bug GH-20262 - mixed numeric and alphanumeric
echo "\n-- Bug GH-20262 case --\n";
var_dump(array_unique(['5', '10', '3A', '5'], SORT_REGULAR));

// Test 16: SORT_REGULAR vs SORT_STRING comparison
echo "\n-- SORT_REGULAR vs SORT_STRING --\n";
$input = [true, 1, "1"];
echo "SORT_REGULAR: ";
var_dump(array_unique($input, SORT_REGULAR));
echo "SORT_STRING: ";
var_dump(array_unique($input, SORT_STRING));

echo "\nDone\n";
?>
--EXPECT--
*** Testing array_unique() with SORT_REGULAR ***

-- Integer and string coercion --
array(2) {
[0]=>
int(1)
[2]=>
int(2)
}

-- Boolean coercion --
array(2) {
[0]=>
bool(true)
[2]=>
bool(false)
}

-- NULL coercion --
array(2) {
[0]=>
NULL
[4]=>
string(1) "0"
}

-- Float coercion --
array(1) {
[0]=>
int(1)
}

-- Numeric strings --
array(1) {
[0]=>
string(2) "10"
}

-- Leading zeros --
array(1) {
[0]=>
string(2) "05"
}

-- Partial numeric strings --
array(2) {
[0]=>
string(4) "5abc"
[1]=>
string(1) "5"
}

-- Whitespace in numeric strings --
array(1) {
[0]=>
string(1) "5"
}

-- Case sensitivity --
array(3) {
[0]=>
string(3) "abc"
[1]=>
string(3) "ABC"
[2]=>
string(3) "Abc"
}

-- Exponential notation --
array(1) {
[0]=>
int(1000)
}

-- Negative numbers --
array(1) {
[0]=>
int(-5)
}

-- Arrays --
array(2) {
[0]=>
array(2) {
[0]=>
int(1)
[1]=>
int(2)
}
[2]=>
array(2) {
[0]=>
int(1)
[1]=>
int(3)
}
}

-- NaN handling --
array(3) {
[0]=>
float(NAN)
[1]=>
float(NAN)
[2]=>
int(1)
}

-- INF handling --
array(2) {
[0]=>
float(INF)
[2]=>
float(-INF)
}

-- Bug GH-20262 case --
array(3) {
[0]=>
string(1) "5"
[1]=>
string(2) "10"
[2]=>
string(2) "3A"
}

-- SORT_REGULAR vs SORT_STRING --
SORT_REGULAR: array(1) {
[0]=>
bool(true)
}
SORT_STRING: array(1) {
[0]=>
bool(true)
}

Done
19 changes: 19 additions & 0 deletions ext/standard/tests/array/gh20262.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
--TEST--
Bug GH-20262 (array_unique() with SORT_REGULAR fails to remove duplicates with mixed strings)
--FILE--
<?php

// Original bug report case
$units = ['5', '10', '3A', '5'];
var_dump(array_unique($units, SORT_REGULAR));

?>
--EXPECT--
array(3) {
[0]=>
string(1) "5"
[1]=>
string(2) "10"
[2]=>
string(2) "3A"
}