From 4b8b8341bc66183f67d89506a691e1a00e99a8d6 Mon Sep 17 00:00:00 2001 From: Noah Amsel Date: Wed, 8 Apr 2026 09:58:52 -0700 Subject: [PATCH] revert to use_gram_newton_schulz = False due to bug --- dion/dion2.py | 2 +- dion/megabatch_base.py | 2 +- dion/muon.py | 2 +- dion/normuon.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dion/dion2.py b/dion/dion2.py index 8837aea..3ad2c93 100644 --- a/dion/dion2.py +++ b/dion/dion2.py @@ -61,7 +61,7 @@ def __init__( flatten: bool = False, use_triton: bool = False, use_polar_express: bool = True, - use_gram_newton_schulz: bool = True, + use_gram_newton_schulz: bool = False, newton_schulz_func: Optional[Callable] = None, verbose: bool = False, ): diff --git a/dion/megabatch_base.py b/dion/megabatch_base.py index 519e6ce..47e8062 100644 --- a/dion/megabatch_base.py +++ b/dion/megabatch_base.py @@ -34,7 +34,7 @@ def __init__( distributed_mesh: Optional[Union[DeviceMesh, ProcessGroup]], algo_name: str, defaults: dict, - use_gram_newton_schulz: bool = True, + use_gram_newton_schulz: bool = False, use_triton: bool = False, use_polar_express: bool = True, newton_schulz_func: Optional[Callable] = None, diff --git a/dion/muon.py b/dion/muon.py index a320052..4a96b9d 100644 --- a/dion/muon.py +++ b/dion/muon.py @@ -61,7 +61,7 @@ def __init__( nesterov: bool = False, adjust_lr: Optional[str] = "spectral_norm", flatten: bool = False, - use_gram_newton_schulz: bool = True, + use_gram_newton_schulz: bool = False, use_triton: bool = False, use_polar_express: bool = True, newton_schulz_func: Optional[Callable] = None, diff --git a/dion/normuon.py b/dion/normuon.py index a419186..34ccce3 100644 --- a/dion/normuon.py +++ b/dion/normuon.py @@ -64,7 +64,7 @@ def __init__( nesterov: bool = False, adjust_lr: Optional[str] = "spectral_norm", flatten: bool = False, - use_gram_newton_schulz: bool = True, + use_gram_newton_schulz: bool = False, use_triton: bool = False, use_polar_express: bool = True, newton_schulz_func: Optional[Callable] = None,