diff --git a/ffx/core/build_strategy.py b/ffx/core/build_strategy.py index 8132441..6da7654 100644 --- a/ffx/core/build_strategy.py +++ b/ffx/core/build_strategy.py @@ -26,6 +26,9 @@ def __init__(self, approach): # eps -- Length of the path. eps=1e-3 means that alpha_min / alpha_max # = 1e-3. self._eps = 1e-70 + + # Optimization strategy: 'pathwise' (original) or 'cv' (improved) + self._optimization_strategy = 'cv' # will use all if 'nonlin1', else [] self.all_nonlin_ops = [OP_ABS, OP_LOG10] @@ -63,3 +66,7 @@ def l1_ratio(self): @property def num_alphas(self): return self._num_alphas + + @property + def optimization_strategy(self): + return self._optimization_strategy diff --git a/ffx/core/model_factories.py b/ffx/core/model_factories.py index e639281..9ea6a74 100644 --- a/ffx/core/model_factories.py +++ b/ffx/core/model_factories.py @@ -21,6 +21,7 @@ ) from .models import ConstantModel, FFXModel from .utils import ElasticNetWithTimeout, nmse, nondominated_indices_2d, y_is_poor +from sklearn.linear_model import ElasticNetCV, LassoCV class MultiFFXModelFactory: @@ -444,13 +445,23 @@ def _pathwiseLearn( verbose=False, **fit_params, ): - """Adapted from enet_path() in sklearn.linear_model. - http://scikit-learn.sourceforge.net/modules/linear_model.html - Compute Elastic-Net path with coordinate descent. - Returns list of model (or None if failure).""" + """Adaptive pathwise learning with CV or traditional approach. + + Uses cross-validation based optimization when ss.optimization_strategy=='cv' + for better convergence and fewer iterations, or falls back to traditional + pathwise approach when ss.optimization_strategy=='pathwise'. + """ if verbose: - print(" Pathwise learn: begin. max_num_bases=%d" % max_num_bases) - max_iter = 5000 # default 5000. magic number. + print(" Pathwise learn: begin. max_num_bases=%d, strategy=%s" % + (max_num_bases, ss.optimization_strategy)) + + # Use improved CV-based optimization if enabled + if ss.optimization_strategy == 'cv': + return self._cvBasedLearn( + ss, varnames, bases, X_orig_regress, y_orig, + max_num_bases, target_nmse, verbose, **fit_params + ) + max_iter = 10000 # increased from 5000 to help convergence # Condition X and y: # -"unbias" = rescale so that (mean=0, stddev=1) -- subtract each row's @@ -496,6 +507,7 @@ def _pathwiseLearn( l1_ratio=ss.l1_ratio, fit_intercept=False, max_iter=max_iter, + tol=1e-2, **fit_params, ) try: @@ -579,6 +591,145 @@ def _pathwiseLearn( print(" Pathwise learn: done") return models + def _cvBasedLearn( + self, + ss, + varnames, + bases, + X_orig_regress, + y_orig, + max_num_bases, + target_nmse, + verbose=False, + **fit_params, + ): + """Improved optimization using cross-validation for better convergence. + + This method replaces the traditional pathwise approach with: + 1. Cross-validation for automatic alpha selection + 2. Adaptive tolerance based on data characteristics + 3. Fewer iterations with better convergence + 4. Less aggressive L1 ratio for better stability + """ + if verbose: + print(" CV-based learn: begin. max_num_bases=%d" % max_num_bases) + + # Condition X and y (same as original method) + (X_unbiased, y_unbiased, X_avgs, X_stds, y_avg, y_std) = self._unbiasedXy( + X_orig_regress, y_orig + ) + X_unbiased = numpy.asfortranarray(X_unbiased) + + # Compute adaptive tolerance based on data characteristics + data_scale = numpy.std(y_unbiased) + n_samples, n_features = X_unbiased.shape + condition_est = min(1e6, n_features * 10) # Simple condition number estimate + adaptive_tol = max(1e-6, min(1e-2, 1e-4 * data_scale * numpy.sqrt(condition_est / 100))) + + if verbose: + print(f" CV-based learn: adaptive tolerance = {adaptive_tol:.2e}") + + # Use less aggressive L1 ratio for better convergence + adjusted_l1_ratio = min(0.7, ss.l1_ratio) + if verbose and adjusted_l1_ratio != ss.l1_ratio: + print(f" CV-based learn: adjusted l1_ratio from {ss.l1_ratio} to {adjusted_l1_ratio}") + + # Prepare fit params + if "precompute" not in fit_params or fit_params["precompute"] is True: + fit_params["precompute"] = numpy.dot(X_unbiased.T, X_unbiased) + + # Create alpha range - fewer, smarter alphas + n_samples = X_unbiased.shape[0] + vals = numpy.dot(X_unbiased.T, y_unbiased) + vals = [val for val in vals if not numpy.isnan(val)] + if vals: + alpha_max = numpy.abs(max(vals) / (n_samples * adjusted_l1_ratio)) + else: + alpha_max = 1.0 + + # Use logarithmic spacing with focus on transition region + alphas = numpy.logspace(numpy.log10(alpha_max * 1e-4), numpy.log10(alpha_max), 50) + + start_time = time.time() + try: + # Choose CV method based on L1 ratio + if adjusted_l1_ratio >= 0.99: + # Use LassoCV for pure L1 case + cv_model = LassoCV( + alphas=alphas, + cv=5, + tol=adaptive_tol, + max_iter=10000, + fit_intercept=False + ) + else: + # Use ElasticNetCV for mixed case + cv_model = ElasticNetCV( + alphas=alphas, + l1_ratio=adjusted_l1_ratio, + cv=5, + tol=adaptive_tol, + max_iter=10000, + fit_intercept=False + ) + + if verbose: + print(f" CV-based learn: fitting with {len(alphas)} alphas") + + cv_model.fit(X_unbiased, y_unbiased) + + # Get coefficients and create model + cur_unbiased_coefs = cv_model.coef_.copy() + if cur_unbiased_coefs.shape == tuple(): + cur_unbiased_coefs = cur_unbiased_coefs.reshape((1,)) + + # Check if we meet stopping criteria + num_bases = len(numpy.nonzero(cur_unbiased_coefs)[0]) + nmse_unbiased = nmse( + numpy.dot(cur_unbiased_coefs, X_unbiased.T), + y_unbiased, + min(y_unbiased), + max(y_unbiased), + ) + + if verbose: + print(f" CV-based learn: best alpha = {cv_model.alpha_:.6e}") + print(f" CV-based learn: num_bases = {num_bases}, nmse = {nmse_unbiased:.6f}") + print(f" CV-based learn: time = {time.time() - start_time:.2f}s") + + # Early stopping checks + if numpy.isinf(nmse_unbiased): + if verbose: + print(" CV-based learn: Early stop because nmse is inf") + return None + + if nmse_unbiased < target_nmse: + if verbose: + print(" CV-based learn: Target NMSE achieved") + + if num_bases > max_num_bases: + if verbose: + print(f" CV-based learn: Warning: {num_bases} bases > max {max_num_bases}") + + # Convert to FFX model format + coefs = self._rebiasCoefs( + [0.0] + list(cur_unbiased_coefs), X_stds, X_avgs, y_std, y_avg + ) + (coefs_n, bases_n, coefs_d, bases_d) = self._allocateToNumerDenom( + ss, bases, coefs + ) + model = FFXModel(coefs_n, bases_n, coefs_d, bases_d, varnames) + + if verbose: + print(" CV-based learn: done") + + return [model] + + except Exception as e: + if verbose: + print(f" CV-based learn failed: {e}") + return None + def _allocateToNumerDenom(self, ss, bases, coefs): """Prune out nonzero coefficients/bases. Allocate to numerator vs. denominator.""" if ss.include_denominator():