pandas-dev · eicchen · Oct 18, 2025 · Oct 19, 2025 · Oct 19, 2025 · Oct 19, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -170,6 +170,7 @@ Other enhancements
 - :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`)
 - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`).
+- :meth:`DataFrame.corr` now uses two pass Welford's Method to improve numerical stability with precision for very large/small values (:issue:`59652`)
 - :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`)
 - :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
 - Added missing parameter ``weights`` in :meth:`DataFrame.plot.kde` for the estimation of the PDF (:issue:`59337`)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -345,7 +345,8 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
         float64_t[:, ::1] result
         uint8_t[:, :] mask
         int64_t nobs = 0
-        float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy, val
+        float64_t vx, vy, meanx, meany, divisor, ssqdmx, ssqdmy, cxy, val
+        float64_t sumx, sumy
 
     N, K = (<object>mat).shape
     if minp is None:
@@ -358,39 +359,42 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
 
     with nogil:
         for xi in range(K):
-            for yi in range(xi + 1):
+            for yi in range(xi+1):
                 # Welford's method for the variance-calculation
                 # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
-                nobs = ssqdmx = ssqdmy = covxy = meanx = meany = 0
+                # Changed to Welford's two-pass for improved numeric stability
+                nobs = ssqdmx = ssqdmy = cxy = meanx = meany = 0
+                sumx = sumy = 0
                 for i in range(N):
                     if mask[i, xi] and mask[i, yi]:
-                        vx = mat[i, xi]
-                        vy = mat[i, yi]
+                        sumx += mat[i, xi]
+                        sumy += mat[i, yi]
                         nobs += 1
-                        dx = vx - meanx
-                        dy = vy - meany
-                        meanx += 1. / nobs * dx
-                        meany += 1. / nobs * dy
-                        ssqdmx += (vx - meanx) * dx
-                        ssqdmy += (vy - meany) * dy
-                        covxy += (vx - meanx) * dy
-
                 if nobs < minpv:
                     result[xi, yi] = result[yi, xi] = NaN
+                    continue
+                meanx = sumx / nobs
+                meany = sumy / nobs
+                for i in range(N):
+                    if mask[i, xi] and mask[i, yi]:
+                        vx = mat[i, xi] - meanx
+                        vy = mat[i, yi] - meany
+                        cxy += vx * vy
+                        ssqdmx += vx * vx
+                        ssqdmy += vy * vy
+                divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy)
+
+                # clip `covxy / divisor` to ensure coeff is within bounds
+                if divisor != 0:
+                    val = cxy / divisor
+                    if not cov:
+                        if val > 1.0:
+                            val = 1.0
+                        elif val < -1.0:
+                            val = -1.0
+                    result[xi, yi] = result[yi, xi] = val
                 else:
-                    divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy)
-
-                    # clip `covxy / divisor` to ensure coeff is within bounds
-                    if divisor != 0:
-                        val = covxy / divisor
-                        if not cov:
-                            if val > 1.0:
-                                val = 1.0
-                            elif val < -1.0:
-                                val = -1.0
-                        result[xi, yi] = result[yi, xi] = val
-                    else:
-                        result[xi, yi] = result[yi, xi] = NaN
+                    result[xi, yi] = result[yi, xi] = NaN
 
     return result.base
 

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -210,11 +210,15 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method):
     @pytest.mark.parametrize("length", [2, 20, 200, 2000])
     def test_corr_for_constant_columns(self, length):
         # GH: 37448
+        # now matches numpy behavior
         df = DataFrame(length * [[0.4, 0.1]], columns=["A", "B"])
         result = df.corr()
-        expected = DataFrame(
-            {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, index=["A", "B"]
-        )
+        if length == 2:
+            expected = DataFrame(
+                {"A": [np.nan, np.nan], "B": [np.nan, np.nan]}, index=["A", "B"]
+            )
+        else:
+            expected = DataFrame({"A": [1.0, 1.0], "B": [1.0, 1.0]}, index=["A", "B"])
         tm.assert_frame_equal(result, expected)
 
     def test_calc_corr_small_numbers(self):
@@ -493,3 +497,12 @@ def test_cov_with_missing_values(self):
         result2 = df.dropna().cov()
         tm.assert_frame_equal(result1, expected)
         tm.assert_frame_equal(result2, expected)
+
+    def test_close_corr(self):
+        values = np.array(
+            [[30.0, 30.100000381469727], [116.80000305175781, 116.8000030517578]]
+        )
+        df = DataFrame(values.T)
+        result = df.corr(method="pearson")
+        expected = DataFrame(np.corrcoef(values[0], values[1]))
+        tm.assert_frame_equal(result, expected)