From 62e84ec5894824b7443f6be8e97f249ba1655293 Mon Sep 17 00:00:00 2001
From: Keith Battocchi <kebatt@microsoft.com>
Date: Fri, 22 Mar 2024 23:04:41 -0400
Subject: [PATCH 01/10] Use environments for additional deployment safety

Signed-off-by: Keith Battocchi <kebatt@microsoft.com>
Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 .github/workflows/ci.yml                    |  2 +-
 .github/workflows/publish-documentation.yml |  3 ++-
 .github/workflows/publish-package.yml       | 13 +++++++------
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 6d9df3844..1ae55a779 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -346,7 +346,7 @@ jobs:
     uses: ./.github/workflows/publish-package.yml
     with:
       publish: false
-      repository: testpypi
+      environment: test
       # don't have access to env context here for some reason
       ref: ${{ github.event_name == 'workflow_dispatch' && inputs.ref || null }}
       # can't use env context here so need to duplicate expression, but these are true boolean values so don't need extra string logic
diff --git a/.github/workflows/publish-documentation.yml b/.github/workflows/publish-documentation.yml
index 499764cd6..0f9731433 100644
--- a/.github/workflows/publish-documentation.yml
+++ b/.github/workflows/publish-documentation.yml
@@ -91,11 +91,12 @@ jobs:
         
   publish-docs:
     name: Publish documentation
-    runs-on: ubuntu-latest
     needs: create_docs
     permissions:
       id-token: write # needed to publish to Azure
+    environment: ${{ inputs.environment }}
     if: ${{ inputs.publish }}
+    runs-on: ubuntu-latest
     steps:
       - name: Download docs artifact
         uses: actions/download-artifact@v3
diff --git a/.github/workflows/publish-package.yml b/.github/workflows/publish-package.yml
index 7cd4322c2..d9dfbbdca 100644
--- a/.github/workflows/publish-package.yml
+++ b/.github/workflows/publish-package.yml
@@ -8,12 +8,12 @@ on:
         required: false
         default: true
         type: boolean
-      repository:
+      environment:
         description: 'Whether to publish to production PyPI or test PyPI'
         required: false
-        default: pypi
+        default: prod
         type: choice
-        options: [pypi, testpypi]
+        options: [prod, test]
       ref:
         description: 'The git ref to build the package for'
         required: false
@@ -34,10 +34,10 @@ on:
         default: true
         type: boolean
       # choice type only supported for workflow_dispatch, not workflow_call
-      repository:
+      environment:
         description: 'Whether to publish to production PyPI or test PyPI'
         required: false
-        default: pypi
+        default: prod
         type: string
       ref:
         description: 'The git ref to build the package for'
@@ -119,6 +119,7 @@ jobs:
     needs: [merge]
     permissions:
       id-token: write
+    environment: ${{ inputs.environment }}
     if: ${{ inputs.publish }}
     runs-on: ubuntu-latest
     steps:
@@ -130,4 +131,4 @@ jobs:
       - name: Upload wheels and sdist to package index
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
-          repository-url: ${{ inputs.repository == 'testpypi' && 'https://test.pypi.org/legacy/' || '' }}
+          repository-url: ${{ inputs.environment == 'test' && 'https://test.pypi.org/legacy/' || '' }}

From 4309e847cd1602680940a9ad2b9201ba07575de3 Mon Sep 17 00:00:00 2001
From: Gabriel Daiha <gabriel.alves@picpay.com>
Date: Fri, 10 May 2024 13:38:29 -0300
Subject: [PATCH 02/10] Avoid double impurity calculation

Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 econml/tree/_criterion.pyx | 24 ++++++++++++++----------
 econml/tree/_splitter.pyx  | 15 ++++++++++-----
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/econml/tree/_criterion.pyx b/econml/tree/_criterion.pyx
index d0b12a92f..c0b00d351 100644
--- a/econml/tree/_criterion.pyx
+++ b/econml/tree/_criterion.pyx
@@ -209,7 +209,9 @@ cdef class Criterion:
         return (- self.weighted_n_right * impurity_right
                 - self.weighted_n_left * impurity_left)
 
-    cdef double impurity_improvement(self, double impurity) nogil:
+    cdef double impurity_improvement(self, float64_t impurity_parent,
+                                     float64_t impurity_left,
+                                     float64_t impurity_right) nogil:
         """Compute the improvement in impurity
         This method computes the improvement in impurity when a split occurs.
         The weighted impurity improvement equation is the following:
@@ -218,22 +220,24 @@ cdef class Criterion:
         where N is the total number of samples, N_t is the number of samples
         at the current node, N_t_L is the number of samples in the left child,
         and N_t_R is the number of samples in the right child,
+
         Parameters
         ----------
-        impurity : double
-            The initial impurity of the node before the split
+        impurity_parent : float64_t
+            The initial impurity of the parent node before the split
+
+        impurity_left : float64_t
+            The impurity of the left child
+
+        impurity_right : float64_t
+            The impurity of the right child
+
         Return
         ------
         double : improvement in impurity after the split occurs
         """
-
-        cdef double impurity_left
-        cdef double impurity_right
-
-        self.children_impurity(&impurity_left, &impurity_right)
-
         return ((self.weighted_n_node_samples / self.weighted_n_samples) *
-                (impurity - (self.weighted_n_right / 
+                (impurity_parent - (self.weighted_n_right / 
                              self.weighted_n_node_samples * impurity_right)
                           - (self.weighted_n_left / 
                              self.weighted_n_node_samples * impurity_left)))
diff --git a/econml/tree/_splitter.pyx b/econml/tree/_splitter.pyx
index 557dc6e59..fb2d74fb0 100644
--- a/econml/tree/_splitter.pyx
+++ b/econml/tree/_splitter.pyx
@@ -615,11 +615,6 @@ cdef class BestSplitter(Splitter):
             if self.honest:
                 self.criterion_val.reset()
                 self.criterion_val.update(best.pos_val)
-            # Calculate a more accurate version of impurity improvement using the input baseline impurity
-            # passed here by the TreeBuilder. The TreeBuilder uses the proxy_node_impurity() to calculate
-            # this baseline if self.is_children_impurity_proxy(), else uses the call to children_impurity()
-            # on the parent node, when that node was split.
-            best.improvement = self.criterion.impurity_improvement(impurity)
             # if we need children impurities by the builder, then we populate these entries
             # otherwise, we leave them blank to avoid the extra computation.
             if not self.is_children_impurity_proxy():
@@ -630,6 +625,16 @@ cdef class BestSplitter(Splitter):
                 else:
                     best.impurity_left_val = best.impurity_left
                     best.impurity_right_val = best.impurity_right
+                
+                # Calculate a more accurate version of impurity improvement using the input baseline impurity
+                # passed here by the TreeBuilder. The TreeBuilder uses the proxy_node_impurity() to calculate
+                # this baseline if self.is_children_impurity_proxy(), else uses the call to children_impurity()
+                # on the parent node, when that node was split.
+                best.improvement = self.criterion.impurity_improvement(impurity, 
+                best.impurity_left, best.impurity_right)
+
+                if self.honest:
+                    
 
         # Respect invariant for constant features: the original order of
         # element in features[:n_known_constants] must be preserved for sibling

From c00b8cf5e07893a4de227cdb7da428d850759c5e Mon Sep 17 00:00:00 2001
From: Gabriel Daiha <gabriel.alves@picpay.com>
Date: Fri, 10 May 2024 13:41:04 -0300
Subject: [PATCH 03/10] moved comment inside criterion.pyx

Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 econml/tree/_splitter.pyx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/econml/tree/_splitter.pyx b/econml/tree/_splitter.pyx
index fb2d74fb0..426a7e7b4 100644
--- a/econml/tree/_splitter.pyx
+++ b/econml/tree/_splitter.pyx
@@ -615,6 +615,10 @@ cdef class BestSplitter(Splitter):
             if self.honest:
                 self.criterion_val.reset()
                 self.criterion_val.update(best.pos_val)
+            # Calculate a more accurate version of impurity improvement using the input baseline impurity
+            # passed here by the TreeBuilder. The TreeBuilder uses the proxy_node_impurity() to calculate
+            # this baseline if self.is_children_impurity_proxy(), else uses the call to children_impurity()
+            # on the parent node, when that node was split.
             # if we need children impurities by the builder, then we populate these entries
             # otherwise, we leave them blank to avoid the extra computation.
             if not self.is_children_impurity_proxy():
@@ -626,10 +630,6 @@ cdef class BestSplitter(Splitter):
                     best.impurity_left_val = best.impurity_left
                     best.impurity_right_val = best.impurity_right
                 
-                # Calculate a more accurate version of impurity improvement using the input baseline impurity
-                # passed here by the TreeBuilder. The TreeBuilder uses the proxy_node_impurity() to calculate
-                # this baseline if self.is_children_impurity_proxy(), else uses the call to children_impurity()
-                # on the parent node, when that node was split.
                 best.improvement = self.criterion.impurity_improvement(impurity, 
                 best.impurity_left, best.impurity_right)
 

From 3164b7c12fef41403431ffa53ecbeb6186290163 Mon Sep 17 00:00:00 2001
From: Gabriel Daiha <gabriel.alves@picpay.com>
Date: Fri, 10 May 2024 13:42:10 -0300
Subject: [PATCH 04/10] removed unused self.honest condition

Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 econml/tree/_splitter.pyx | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/econml/tree/_splitter.pyx b/econml/tree/_splitter.pyx
index 426a7e7b4..dd30e7586 100644
--- a/econml/tree/_splitter.pyx
+++ b/econml/tree/_splitter.pyx
@@ -631,10 +631,7 @@ cdef class BestSplitter(Splitter):
                     best.impurity_right_val = best.impurity_right
                 
                 best.improvement = self.criterion.impurity_improvement(impurity, 
-                best.impurity_left, best.impurity_right)
-
-                if self.honest:
-                    
+                best.impurity_left, best.impurity_right)                    
 
         # Respect invariant for constant features: the original order of
         # element in features[:n_known_constants] must be preserved for sibling

From c88a3488eb0d1c97a1305bcab94cf71815a69ba4 Mon Sep 17 00:00:00 2001
From: Keith Battocchi <kebatt@microsoft.com>
Date: Fri, 3 May 2024 00:26:52 -0400
Subject: [PATCH 05/10] Use macos-12 instead of macos-latest to avoid ARM until
 dependencies are available.

Signed-off-by: Keith Battocchi <kebatt@microsoft.com>
Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 1ae55a779..4e02aa507 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -201,12 +201,12 @@ jobs:
     if: ${{ needs.eval.outputs.testCode == 'True' }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, windows-latest, macos-12]
         python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
         kind: [serial, other, dml, main, treatment, ray]
         exclude:
           # Serial tests fail randomly on mac sometimes, so we don't run them there
-          - os: macos-latest
+          - os: macos-12
             kind: serial
           # Ray tests run out of memory on Windows
           - os: windows-latest

From 390287c03dbc60ed796fb8b56a3289aa23a46b64 Mon Sep 17 00:00:00 2001
From: gdaiha <110778471+gdaiha@users.noreply.github.com>
Date: Tue, 7 May 2024 05:23:11 -0300
Subject: [PATCH 06/10] Optimizing NormalInferenceResults confidence interval
 method speed (#879)

* Fixed normal inference results confidence interval unnecessary loop

Signed-off-by: gdaiha <110778471+gdaiha@users.noreply.github.com>
Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 econml/inference/_inference.py            | 17 +++------------
 econml/sklearn_extensions/linear_model.py | 26 +++++++++--------------
 2 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/econml/inference/_inference.py b/econml/inference/_inference.py
index 23658f846..438151943 100644
--- a/econml/inference/_inference.py
+++ b/econml/inference/_inference.py
@@ -1066,14 +1066,9 @@ def conf_int(self, alpha=0.05):
         """
         if self.stderr is None:
             raise AttributeError("Only point estimates are available!")
-        if np.isscalar(self.point_estimate):
+        else:
             return _safe_norm_ppf(alpha / 2, loc=self.point_estimate, scale=self.stderr), \
                 _safe_norm_ppf(1 - alpha / 2, loc=self.point_estimate, scale=self.stderr)
-        else:
-            return np.array([_safe_norm_ppf(alpha / 2, loc=p, scale=err)
-                             for p, err in zip(self.point_estimate, self.stderr)]), \
-                np.array([_safe_norm_ppf(1 - alpha / 2, loc=p, scale=err)
-                          for p, err in zip(self.point_estimate, self.stderr)])
 
     def pvalue(self, value=0):
         """
@@ -1398,14 +1393,8 @@ def conf_int_mean(self, *, alpha=None):
         alpha = self.alpha if alpha is None else alpha
         mean_point = self.mean_point
         stderr_mean = self.stderr_mean
-        if np.isscalar(mean_point):
-            return (_safe_norm_ppf(alpha / 2, loc=mean_point, scale=stderr_mean),
-                    _safe_norm_ppf(1 - alpha / 2, loc=mean_point, scale=stderr_mean))
-        else:
-            return np.array([_safe_norm_ppf(alpha / 2, loc=p, scale=err)
-                             for p, err in zip(mean_point, stderr_mean)]), \
-                np.array([_safe_norm_ppf(1 - alpha / 2, loc=p, scale=err)
-                          for p, err in zip(mean_point, stderr_mean)])
+        return (_safe_norm_ppf(alpha / 2, loc=mean_point, scale=stderr_mean),
+                _safe_norm_ppf(1 - alpha / 2, loc=mean_point, scale=stderr_mean))
 
     @property
     def std_point(self):
diff --git a/econml/sklearn_extensions/linear_model.py b/econml/sklearn_extensions/linear_model.py
index c95f392de..a16b3d852 100644
--- a/econml/sklearn_extensions/linear_model.py
+++ b/econml/sklearn_extensions/linear_model.py
@@ -1627,10 +1627,8 @@ def coef__interval(self, alpha=0.05):
         coef__interval : {tuple ((p, d) array, (p,d) array), tuple ((d,) array, (d,) array)}
             The lower and upper bounds of the confidence interval of the coefficients
         """
-        return np.array([_safe_norm_ppf(alpha / 2, loc=p, scale=err)
-                         for p, err in zip(self.coef_, self.coef_stderr_)]), \
-            np.array([_safe_norm_ppf(1 - alpha / 2, loc=p, scale=err)
-                      for p, err in zip(self.coef_, self.coef_stderr_)])
+        return (_safe_norm_ppf(alpha / 2, loc=self.coef_, scale=self.coef_stderr_),
+                _safe_norm_ppf(1 - alpha / 2, loc=self.coef_, scale=self.coef_stderr_))
 
     def intercept__interval(self, alpha=0.05):
         """
@@ -1651,14 +1649,8 @@ def intercept__interval(self, alpha=0.05):
             return (0 if self._n_out == 0 else np.zeros(self._n_out)), \
                 (0 if self._n_out == 0 else np.zeros(self._n_out))
 
-        if self._n_out == 0:
-            return _safe_norm_ppf(alpha / 2, loc=self.intercept_, scale=self.intercept_stderr_), \
-                _safe_norm_ppf(1 - alpha / 2, loc=self.intercept_, scale=self.intercept_stderr_)
-        else:
-            return np.array([_safe_norm_ppf(alpha / 2, loc=p, scale=err)
-                             for p, err in zip(self.intercept_, self.intercept_stderr_)]), \
-                np.array([_safe_norm_ppf(1 - alpha / 2, loc=p, scale=err)
-                          for p, err in zip(self.intercept_, self.intercept_stderr_)])
+        return (_safe_norm_ppf(alpha / 2, loc=self.intercept_, scale=self.intercept_stderr_),
+                _safe_norm_ppf(1 - alpha / 2, loc=self.intercept_, scale=self.intercept_stderr_))
 
     def predict_interval(self, X, alpha=0.05):
         """
@@ -1677,10 +1669,12 @@ def predict_interval(self, X, alpha=0.05):
         prediction_intervals : {tuple ((n,) array, (n,) array), tuple ((n,p) array, (n,p) array)}
             The lower and upper bounds of the confidence intervals of the predicted mean outcomes
         """
-        return np.array([_safe_norm_ppf(alpha / 2, loc=p, scale=err)
-                         for p, err in zip(self.predict(X), self.prediction_stderr(X))]), \
-            np.array([_safe_norm_ppf(1 - alpha / 2, loc=p, scale=err)
-                      for p, err in zip(self.predict(X), self.prediction_stderr(X))])
+
+        pred = self.predict(X)
+        pred_stderr = self.prediction_stderr(X)
+
+        return (_safe_norm_ppf(alpha / 2, loc=pred, scale=pred_stderr),
+                _safe_norm_ppf(1 - alpha / 2, loc=pred, scale=pred_stderr))
 
 
 class StatsModelsLinearRegression(_StatsModelsWrapper):

From 546ecc8f8bb9788ea1b7fd348dd41b119d9c7db0 Mon Sep 17 00:00:00 2001
From: Kevin Gao <kevin.leo.gao@gmail.com>
Date: Tue, 7 May 2024 14:24:43 -0400
Subject: [PATCH 07/10] Fix flaky RScorer test (#876)

Increase sample size to reduce errors

Signed-off-by: kgao <kevin.leo.gao@gmail.com>
Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 econml/tests/test_rscorer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/econml/tests/test_rscorer.py b/econml/tests/test_rscorer.py
index 9ca2d5c50..4f41b5016 100644
--- a/econml/tests/test_rscorer.py
+++ b/econml/tests/test_rscorer.py
@@ -21,9 +21,9 @@ def _fit_model(name, model, Y, T, X):
 class TestRScorer(unittest.TestCase):
 
     def _get_data(self):
-        X = np.random.normal(0, 1, size=(2000, 2))
-        T = np.random.binomial(1, .5, size=(2000,))
-        y = X[:, 0] * T + np.random.normal(size=(2000,))
+        X = np.random.normal(0, 1, size=(100000, 2))
+        T = np.random.binomial(1, .5, size=(100000,))
+        y = X[:, 0] * T + np.random.normal(size=(100000,))
         return y, T, X, X[:, 0]
 
     def test_comparison(self):

From 727bba92cf9ee6cd59618b985ffa9adbf040e184 Mon Sep 17 00:00:00 2001
From: Gabriel Daiha <gabriel.alves@picpay.com>
Date: Fri, 10 May 2024 13:59:05 -0300
Subject: [PATCH 08/10] fixed criterion.pxd with new signature

Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 econml/tree/_criterion.pxd | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/econml/tree/_criterion.pxd b/econml/tree/_criterion.pxd
index 2eb6b8ba3..c0125d788 100644
--- a/econml/tree/_criterion.pxd
+++ b/econml/tree/_criterion.pxd
@@ -78,7 +78,9 @@ cdef class Criterion:
     cdef void node_value(self, double* dest) nogil
     cdef void node_jacobian(self, double* dest) nogil
     cdef void node_precond(self, double* dest) nogil
-    cdef double impurity_improvement(self, double impurity) nogil
+    cdef double impurity_improvement(self, double impurity_parent,
+                                     double impurity_left,
+                                     double impurity_right) nogil
     cdef double proxy_impurity_improvement(self) nogil
     cdef double min_eig_left(self) nogil
     cdef double min_eig_right(self) nogil

From 0df03b6d2d3ccb94294792ce16368a009f856d6a Mon Sep 17 00:00:00 2001
From: Gabriel Daiha <gabriel.alves@picpay.com>
Date: Fri, 10 May 2024 14:01:47 -0300
Subject: [PATCH 09/10] fixed types in impurity_improvement definition in
 criterion

Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 econml/tree/_criterion.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/econml/tree/_criterion.pyx b/econml/tree/_criterion.pyx
index c0b00d351..a2f9acdb3 100644
--- a/econml/tree/_criterion.pyx
+++ b/econml/tree/_criterion.pyx
@@ -209,9 +209,9 @@ cdef class Criterion:
         return (- self.weighted_n_right * impurity_right
                 - self.weighted_n_left * impurity_left)
 
-    cdef double impurity_improvement(self, float64_t impurity_parent,
-                                     float64_t impurity_left,
-                                     float64_t impurity_right) nogil:
+    cdef double impurity_improvement(self, double impurity_parent,
+                                     double impurity_left,
+                                     double impurity_right) nogil:
         """Compute the improvement in impurity
         This method computes the improvement in impurity when a split occurs.
         The weighted impurity improvement equation is the following:

From 1d44cef36fe2736d333f52f5bccbc87b70a41c2c Mon Sep 17 00:00:00 2001
From: Gabriel Daiha <gabriel.alves@picpay.com>
Date: Fri, 10 May 2024 14:32:18 -0300
Subject: [PATCH 10/10] removed best.improvement from
 is_children_impurity_proxy condition

Signed-off-by: Gabriel Daiha <gabriel.alves@picpay.com>
---
 econml/tree/_splitter.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/econml/tree/_splitter.pyx b/econml/tree/_splitter.pyx
index dd30e7586..d022c2038 100644
--- a/econml/tree/_splitter.pyx
+++ b/econml/tree/_splitter.pyx
@@ -630,8 +630,8 @@ cdef class BestSplitter(Splitter):
                     best.impurity_left_val = best.impurity_left
                     best.impurity_right_val = best.impurity_right
                 
-                best.improvement = self.criterion.impurity_improvement(impurity, 
-                best.impurity_left, best.impurity_right)                    
+            best.improvement = self.criterion.impurity_improvement(impurity, 
+            best.impurity_left, best.impurity_right)                    
 
         # Respect invariant for constant features: the original order of
         # element in features[:n_known_constants] must be preserved for sibling