Skip to content

Commit 1bdb1f8

Browse files
committed
Merge remote-tracking branch 'upstream/main' into aijams-take-function-invalid-dtype
2 parents 040c127 + e9e1b32 commit 1bdb1f8

File tree

13 files changed

+266
-310
lines changed

13 files changed

+266
-310
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ jobs:
399399
pyodide build
400400
401401
- name: Set up Node.js
402-
uses: actions/setup-node@v5
402+
uses: actions/setup-node@v6
403403
with:
404404
node-version: '20'
405405

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,7 @@ Indexing
10411041
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
10421042
- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
10431043
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
1044+
- Bug in :meth:`Index.get_indexer` not casting missing values correctly for new string datatype (:issue:`55833`)
10441045
- Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`)
10451046
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
10461047

@@ -1147,7 +1148,6 @@ Groupby/resample/rolling
11471148
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
11481149
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
11491150
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
1150-
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` where the end of window was not indexed correctly. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
11511151

11521152
Reshaping
11531153
^^^^^^^^^

pandas/_libs/include/pandas/portable.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,51 @@ The full license is in the LICENSE file, distributed with this software.
3535
do { \
3636
} while (0) /* fallthrough */
3737
#endif
38+
39+
#if defined(_WIN32)
40+
#ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS
41+
#define ENABLE_INTSAFE_SIGNED_FUNCTIONS
42+
#endif
43+
#include <intsafe.h>
44+
#define checked_add(a, b, res) \
45+
_Generic((res), \
46+
int *: IntAdd, \
47+
unsigned int *: UIntAdd, \
48+
long *: LongAdd, \
49+
unsigned long *: ULongAdd, \
50+
long long *: LongLongAdd, \
51+
unsigned long long *: ULongLongAdd, \
52+
short *: ShortAdd, \
53+
unsigned short *: UShortAdd)(a, b, res)
54+
55+
#define checked_sub(a, b, res) \
56+
_Generic((res), \
57+
int *: IntSub, \
58+
unsigned int *: UIntSub, \
59+
long *: LongSub, \
60+
unsigned long *: ULongSub, \
61+
long long *: LongLongSub, \
62+
unsigned long long *: ULongLongSub, \
63+
short *: ShortSub, \
64+
unsigned short *: UShortSub)(a, b, res)
65+
66+
#define checked_mul(a, b, res) \
67+
_Generic((res), \
68+
int *: IntMult, \
69+
unsigned int *: UIntMult, \
70+
long *: LongMult, \
71+
unsigned long *: ULongMult, \
72+
long long *: LongLongMult, \
73+
unsigned long long *: ULongLongMult, \
74+
short *: ShortMult, \
75+
unsigned short *: UShortMult)(a, b, res)
76+
77+
#elif (defined(__has_builtin) && __has_builtin(__builtin_add_overflow)) || \
78+
__GNUC__ > 7
79+
#define checked_add(a, b, res) __builtin_add_overflow(a, b, res)
80+
#define checked_sub(a, b, res) __builtin_sub_overflow(a, b, res)
81+
#define checked_mul(a, b, res) __builtin_mul_overflow(a, b, res)
82+
#else
83+
_Static_assert(0,
84+
"Overflow checking not detected; please try a newer compiler");
85+
#endif

pandas/_libs/src/vendored/numpy/datetime/np_datetime.c

Lines changed: 40 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -23,39 +23,11 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
2323
#include "pandas/vendored/numpy/datetime/np_datetime.h"
2424
#define NO_IMPORT_ARRAY
2525
#define PY_ARRAY_UNIQUE_SYMBOL PANDAS_DATETIME_NUMPY
26+
#include "pandas/portable.h"
2627
#include <numpy/ndarrayobject.h>
2728
#include <numpy/npy_common.h>
2829
#include <stdbool.h>
2930

30-
#if defined(_WIN32)
31-
#ifndef ENABLE_INTSAFE_SIGNED_FUNCTIONS
32-
#define ENABLE_INTSAFE_SIGNED_FUNCTIONS
33-
#endif
34-
#include <intsafe.h>
35-
#define checked_int64_add(a, b, res) LongLongAdd(a, b, res)
36-
#define checked_int64_sub(a, b, res) LongLongSub(a, b, res)
37-
#define checked_int64_mul(a, b, res) LongLongMult(a, b, res)
38-
#else
39-
#if defined __has_builtin
40-
#if __has_builtin(__builtin_add_overflow)
41-
#define checked_int64_add(a, b, res) __builtin_add_overflow(a, b, res)
42-
#define checked_int64_sub(a, b, res) __builtin_sub_overflow(a, b, res)
43-
#define checked_int64_mul(a, b, res) __builtin_mul_overflow(a, b, res)
44-
#else
45-
_Static_assert(0,
46-
"Overflow checking not detected; please try a newer compiler");
47-
#endif
48-
// __has_builtin was added in gcc 10, but our muslinux_1_1 build environment
49-
// only has gcc-9.3, so fall back to __GNUC__ macro as long as we have that
50-
#elif __GNUC__ > 7
51-
#define checked_int64_add(a, b, res) __builtin_add_overflow(a, b, res)
52-
#define checked_int64_sub(a, b, res) __builtin_sub_overflow(a, b, res)
53-
#define checked_int64_mul(a, b, res) __builtin_mul_overflow(a, b, res)
54-
#else
55-
_Static_assert(0, "__has_builtin not detected; please try a newer compiler");
56-
#endif
57-
#endif
58-
5931
#define XSTR(a) STR(a)
6032
#define STR(a) #a
6133

@@ -140,53 +112,53 @@ npy_int64 get_datetimestruct_days(const npy_datetimestruct *dts) {
140112
npy_int64 year, days = 0;
141113
const int *month_lengths;
142114

143-
PD_CHECK_OVERFLOW(checked_int64_sub(dts->year, 1970, &year));
144-
PD_CHECK_OVERFLOW(checked_int64_mul(year, 365, &days));
115+
PD_CHECK_OVERFLOW(checked_sub(dts->year, 1970, &year));
116+
PD_CHECK_OVERFLOW(checked_mul(year, 365, &days));
145117

146118
/* Adjust for leap years */
147119
if (days >= 0) {
148120
/*
149121
* 1968 is the closest leap year before 1970.
150122
* Exclude the current year, so add 1.
151123
*/
152-
PD_CHECK_OVERFLOW(checked_int64_add(year, 1, &year));
124+
PD_CHECK_OVERFLOW(checked_add(year, 1, &year));
153125
/* Add one day for each 4 years */
154-
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days));
126+
PD_CHECK_OVERFLOW(checked_add(days, year / 4, &days));
155127
/* 1900 is the closest previous year divisible by 100 */
156-
PD_CHECK_OVERFLOW(checked_int64_add(year, 68, &year));
128+
PD_CHECK_OVERFLOW(checked_add(year, 68, &year));
157129
/* Subtract one day for each 100 years */
158-
PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days));
130+
PD_CHECK_OVERFLOW(checked_sub(days, year / 100, &days));
159131
/* 1600 is the closest previous year divisible by 400 */
160-
PD_CHECK_OVERFLOW(checked_int64_add(year, 300, &year));
132+
PD_CHECK_OVERFLOW(checked_add(year, 300, &year));
161133
/* Add one day for each 400 years */
162-
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days));
134+
PD_CHECK_OVERFLOW(checked_add(days, year / 400, &days));
163135
} else {
164136
/*
165137
* 1972 is the closest later year after 1970.
166138
* Include the current year, so subtract 2.
167139
*/
168-
PD_CHECK_OVERFLOW(checked_int64_sub(year, 2, &year));
140+
PD_CHECK_OVERFLOW(checked_sub(year, 2, &year));
169141
/* Subtract one day for each 4 years */
170-
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 4, &days));
142+
PD_CHECK_OVERFLOW(checked_add(days, year / 4, &days));
171143
/* 2000 is the closest later year divisible by 100 */
172-
PD_CHECK_OVERFLOW(checked_int64_sub(year, 28, &year));
144+
PD_CHECK_OVERFLOW(checked_sub(year, 28, &year));
173145
/* Add one day for each 100 years */
174-
PD_CHECK_OVERFLOW(checked_int64_sub(days, year / 100, &days));
146+
PD_CHECK_OVERFLOW(checked_sub(days, year / 100, &days));
175147
/* 2000 is also the closest later year divisible by 400 */
176148
/* Subtract one day for each 400 years */
177-
PD_CHECK_OVERFLOW(checked_int64_add(days, year / 400, &days));
149+
PD_CHECK_OVERFLOW(checked_add(days, year / 400, &days));
178150
}
179151

180152
month_lengths = days_per_month_table[is_leapyear(dts->year)];
181153
month = dts->month - 1;
182154

183155
/* Add the months */
184156
for (i = 0; i < month; ++i) {
185-
PD_CHECK_OVERFLOW(checked_int64_add(days, month_lengths[i], &days));
157+
PD_CHECK_OVERFLOW(checked_add(days, month_lengths[i], &days));
186158
}
187159

188160
/* Add the days */
189-
PD_CHECK_OVERFLOW(checked_int64_add(days, dts->day - 1, &days));
161+
PD_CHECK_OVERFLOW(checked_add(days, dts->day - 1, &days));
190162

191163
return days;
192164
}
@@ -341,11 +313,11 @@ PyObject *extract_utc_offset(PyObject *obj) {
341313
}
342314

343315
static inline int scaleYearToEpoch(int64_t year, int64_t *result) {
344-
return checked_int64_sub(year, 1970, result);
316+
return checked_sub(year, 1970, result);
345317
}
346318

347319
static inline int scaleYearsToMonths(int64_t years, int64_t *result) {
348-
return checked_int64_mul(years, 12, result);
320+
return checked_mul(years, 12, result);
349321
}
350322

351323
static inline int scaleDaysToWeeks(int64_t days, int64_t *result) {
@@ -355,7 +327,7 @@ static inline int scaleDaysToWeeks(int64_t days, int64_t *result) {
355327
} else {
356328
int res;
357329
int64_t checked_days;
358-
if ((res = checked_int64_sub(days, 6, &checked_days))) {
330+
if ((res = checked_sub(days, 6, &checked_days))) {
359331
return res;
360332
}
361333

@@ -365,43 +337,43 @@ static inline int scaleDaysToWeeks(int64_t days, int64_t *result) {
365337
}
366338

367339
static inline int scaleDaysToHours(int64_t days, int64_t *result) {
368-
return checked_int64_mul(days, 24, result);
340+
return checked_mul(days, 24, result);
369341
}
370342

371343
static inline int scaleHoursToMinutes(int64_t hours, int64_t *result) {
372-
return checked_int64_mul(hours, 60, result);
344+
return checked_mul(hours, 60, result);
373345
}
374346

375347
static inline int scaleMinutesToSeconds(int64_t minutes, int64_t *result) {
376-
return checked_int64_mul(minutes, 60, result);
348+
return checked_mul(minutes, 60, result);
377349
}
378350

379351
static inline int scaleSecondsToMilliseconds(int64_t seconds, int64_t *result) {
380-
return checked_int64_mul(seconds, 1000, result);
352+
return checked_mul(seconds, 1000, result);
381353
}
382354

383355
static inline int scaleSecondsToMicroseconds(int64_t seconds, int64_t *result) {
384-
return checked_int64_mul(seconds, 1000000, result);
356+
return checked_mul(seconds, 1000000, result);
385357
}
386358

387359
static inline int scaleMicrosecondsToNanoseconds(int64_t microseconds,
388360
int64_t *result) {
389-
return checked_int64_mul(microseconds, 1000, result);
361+
return checked_mul(microseconds, 1000, result);
390362
}
391363

392364
static inline int scaleMicrosecondsToPicoseconds(int64_t microseconds,
393365
int64_t *result) {
394-
return checked_int64_mul(microseconds, 1000000, result);
366+
return checked_mul(microseconds, 1000000, result);
395367
}
396368

397369
static inline int64_t scalePicosecondsToFemtoseconds(int64_t picoseconds,
398370
int64_t *result) {
399-
return checked_int64_mul(picoseconds, 1000, result);
371+
return checked_mul(picoseconds, 1000, result);
400372
}
401373

402374
static inline int64_t scalePicosecondsToAttoseconds(int64_t picoseconds,
403375
int64_t *result) {
404-
return checked_int64_mul(picoseconds, 1000000, result);
376+
return checked_mul(picoseconds, 1000000, result);
405377
}
406378

407379
/*
@@ -422,8 +394,8 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
422394
PD_CHECK_OVERFLOW(scaleYearsToMonths(years, &months));
423395

424396
int64_t months_adder;
425-
PD_CHECK_OVERFLOW(checked_int64_sub(dts->month, 1, &months_adder));
426-
PD_CHECK_OVERFLOW(checked_int64_add(months, months_adder, &months));
397+
PD_CHECK_OVERFLOW(checked_sub(dts->month, 1, &months_adder));
398+
PD_CHECK_OVERFLOW(checked_add(months, months_adder, &months));
427399

428400
if (base == NPY_FR_M) {
429401
return months;
@@ -452,23 +424,23 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
452424

453425
int64_t hours;
454426
PD_CHECK_OVERFLOW(scaleDaysToHours(days, &hours));
455-
PD_CHECK_OVERFLOW(checked_int64_add(hours, dts->hour, &hours));
427+
PD_CHECK_OVERFLOW(checked_add(hours, dts->hour, &hours));
456428

457429
if (base == NPY_FR_h) {
458430
return hours;
459431
}
460432

461433
int64_t minutes;
462434
PD_CHECK_OVERFLOW(scaleHoursToMinutes(hours, &minutes));
463-
PD_CHECK_OVERFLOW(checked_int64_add(minutes, dts->min, &minutes));
435+
PD_CHECK_OVERFLOW(checked_add(minutes, dts->min, &minutes));
464436

465437
if (base == NPY_FR_m) {
466438
return minutes;
467439
}
468440

469441
int64_t seconds;
470442
PD_CHECK_OVERFLOW(scaleMinutesToSeconds(minutes, &seconds));
471-
PD_CHECK_OVERFLOW(checked_int64_add(seconds, dts->sec, &seconds));
443+
PD_CHECK_OVERFLOW(checked_add(seconds, dts->sec, &seconds));
472444

473445
if (base == NPY_FR_s) {
474446
return seconds;
@@ -477,15 +449,14 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
477449
if (base == NPY_FR_ms) {
478450
int64_t milliseconds;
479451
PD_CHECK_OVERFLOW(scaleSecondsToMilliseconds(seconds, &milliseconds));
480-
PD_CHECK_OVERFLOW(
481-
checked_int64_add(milliseconds, dts->us / 1000, &milliseconds));
452+
PD_CHECK_OVERFLOW(checked_add(milliseconds, dts->us / 1000, &milliseconds));
482453

483454
return milliseconds;
484455
}
485456

486457
int64_t microseconds;
487458
PD_CHECK_OVERFLOW(scaleSecondsToMicroseconds(seconds, &microseconds));
488-
PD_CHECK_OVERFLOW(checked_int64_add(microseconds, dts->us, &microseconds));
459+
PD_CHECK_OVERFLOW(checked_add(microseconds, dts->us, &microseconds));
489460

490461
if (base == NPY_FR_us) {
491462
return microseconds;
@@ -499,21 +470,20 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
499470
if (microseconds == min_nanoseconds / 1000 - 1) {
500471
// For values within one microsecond of min_nanoseconds, use it as base
501472
// and offset it with nanosecond delta to avoid overflow during scaling.
502-
PD_CHECK_OVERFLOW(checked_int64_add(
473+
PD_CHECK_OVERFLOW(checked_add(
503474
min_nanoseconds, (dts->ps - _NS_MIN_DTS.ps) / 1000, &nanoseconds));
504475
} else {
505476
PD_CHECK_OVERFLOW(
506477
scaleMicrosecondsToNanoseconds(microseconds, &nanoseconds));
507-
PD_CHECK_OVERFLOW(
508-
checked_int64_add(nanoseconds, dts->ps / 1000, &nanoseconds));
478+
PD_CHECK_OVERFLOW(checked_add(nanoseconds, dts->ps / 1000, &nanoseconds));
509479
}
510480

511481
return nanoseconds;
512482
}
513483

514484
int64_t picoseconds;
515485
PD_CHECK_OVERFLOW(scaleMicrosecondsToPicoseconds(microseconds, &picoseconds));
516-
PD_CHECK_OVERFLOW(checked_int64_add(picoseconds, dts->ps, &picoseconds));
486+
PD_CHECK_OVERFLOW(checked_add(picoseconds, dts->ps, &picoseconds));
517487

518488
if (base == NPY_FR_ps) {
519489
return picoseconds;
@@ -523,15 +493,14 @@ npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base,
523493
int64_t femtoseconds;
524494
PD_CHECK_OVERFLOW(
525495
scalePicosecondsToFemtoseconds(picoseconds, &femtoseconds));
526-
PD_CHECK_OVERFLOW(
527-
checked_int64_add(femtoseconds, dts->as / 1000, &femtoseconds));
496+
PD_CHECK_OVERFLOW(checked_add(femtoseconds, dts->as / 1000, &femtoseconds));
528497
return femtoseconds;
529498
}
530499

531500
if (base == NPY_FR_as) {
532501
int64_t attoseconds;
533502
PD_CHECK_OVERFLOW(scalePicosecondsToAttoseconds(picoseconds, &attoseconds));
534-
PD_CHECK_OVERFLOW(checked_int64_add(attoseconds, dts->as, &attoseconds));
503+
PD_CHECK_OVERFLOW(checked_add(attoseconds, dts->as, &attoseconds));
535504
return attoseconds;
536505
}
537506

pandas/_libs/window/aggregations.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
442442

443443
# Over the first window, observations can only be added
444444
# never removed
445-
if i == 0 or not is_monotonic_increasing_bounds or s < end[i]:
445+
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
446446

447447
prev_value = values[s]
448448
num_consecutive_same_value = 0

pandas/core/indexes/base.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6619,6 +6619,14 @@ def _maybe_cast_listlike_indexer(self, target) -> Index:
66196619
# If we started with a list-like, avoid inference to string dtype if self
66206620
# is object dtype (coercing to string dtype will alter the missing values)
66216621
target_index = Index(target, dtype=self.dtype)
6622+
elif (
6623+
not hasattr(target, "dtype")
6624+
and isinstance(self.dtype, StringDtype)
6625+
and self.dtype.na_value is np.nan
6626+
and using_string_dtype()
6627+
):
6628+
# Fill missing values to ensure consistent missing value representation
6629+
target_index = target_index.fillna(np.nan)
66226630
return target_index
66236631

66246632
@final

0 commit comments

Comments
 (0)