From abe7d049baec6cb17c7062585f4a52a777cf9151 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-172-31-41-80.ec2.internal>
Date: Tue, 27 May 2025 05:23:46 +0000
Subject: [PATCH] wandb

---
 .gitignore                              |   3 +
 requirements.txt                        | 159 ++++++++++++++++-------
 results_metrics_finetune.json           |  32 +++--
 results_metrics_linear_probe.json       |  33 +++--
 src/models/model_comparison_baseline.py | 161 ++++++++++++++++++++----
 5 files changed, 289 insertions(+), 99 deletions(-)

diff --git a/.gitignore b/.gitignore
index e82aa1e..cfcf652 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,9 @@ simclr_linear_probe/
 **/rng_state.pth
 **/training_args.bin
 
+# Weights & Biases
+wandb/
+
 # Python
 __pycache__/
 *.py[cod]
diff --git a/requirements.txt b/requirements.txt
index d842fa4..6a2ce92 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,60 +1,125 @@
-asttokens
-colorama
-comm
+accelerate==1.7.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.0
+aiosignal==1.3.2
+annotated-types==0.7.0
+asttokens==3.0.0
+async-timeout==5.0.1
+attrs==25.3.0
+certifi==2025.4.26
+charset-normalizer==3.4.2
+click==8.1.8
+colorama==0.4.6
+comm==0.2.2
 contourpy==1.3.0
 cycler==0.12.1
-debugpy
-decorator
-exceptiongroup
-executing
+datasets==3.6.0
+debugpy==1.8.14
+decorator==5.2.1
+dill==0.3.8
+docker-pycreds==0.4.0
+eval_type_backport==0.2.2
+exceptiongroup==1.3.0
+executing==2.2.0
+filelock==3.18.0
 fonttools==4.54.1
-importlib_metadata
-ipykernel
-ipython
-jedi
+frozenlist==1.6.0
+fsspec==2025.3.0
+gitdb==4.0.12
+GitPython==3.1.44
+hf-xet==1.1.2
+huggingface-hub==0.32.0
+idna==3.10
+importlib_metadata==8.7.0
+importlib_resources==6.5.2
+iniconfig==2.1.0
+ipykernel==6.29.5
+ipython==8.18.1
+jedi==0.19.2
+Jinja2==3.1.6
 joblib==1.4.2
-jupyter_client
-jupyter_core
+jupyter_client==8.6.3
+jupyter_core==5.7.2
 kiwisolver==1.4.7
+MarkupSafe==3.0.2
 matplotlib==3.9.2
-matplotlib-inline
-nest_asyncio
-numpy
-packaging
+matplotlib-inline==0.1.7
+mpmath==1.3.0
+multidict==6.4.4
+multiprocess==0.70.16
+nest-asyncio==1.6.0
+networkx==3.2.1
+numpy==2.0.2
+nvidia-cublas-cu12==12.6.4.1
+nvidia-cuda-cupti-cu12==12.6.80
+nvidia-cuda-nvrtc-cu12==12.6.77
+nvidia-cuda-runtime-cu12==12.6.77
+nvidia-cudnn-cu12==9.5.1.17
+nvidia-cufft-cu12==11.3.0.4
+nvidia-cufile-cu12==1.11.1.6
+nvidia-curand-cu12==10.3.7.77
+nvidia-cusolver-cu12==11.7.1.2
+nvidia-cusparse-cu12==12.5.4.2
+nvidia-cusparselt-cu12==0.6.3
+nvidia-ml-py==12.575.51
+nvidia-nccl-cu12==2.26.2
+nvidia-nvjitlink-cu12==12.6.85
+nvidia-nvtx-cu12==12.6.77
+packaging==25.0
 pandas==2.2.3
-parso
-pickleshare
+parso==0.8.4
+pexpect==4.9.0
+pickleshare==0.7.5
 pillow==11.0.0
-platformdirs
-prompt_toolkit
-psutil
-pure_eval
-Pygments
+platformdirs==4.3.8
+pluggy==1.6.0
+prompt_toolkit==3.0.51
+propcache==0.3.1
+protobuf==6.31.0
+psutil==7.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==20.0.0
+pydantic==2.11.5
+pydantic_core==2.33.2
+Pygments==2.19.1
+pynvml==12.0.0
 pyparsing==3.2.0
-python-dateutil
+pytest==8.3.5
+python-dateutil==2.9.0.post0
 pytz==2024.2
-pyzmq
+PyYAML==6.0.2
+pyzmq==26.4.0
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.5.3
 scikit-learn==1.5.2
-scipy
-setuptools==75.1.0
-six
-stack-data
+scipy==1.13.1
+seaborn==0.13.2
+sentry-sdk==2.29.1
+setproctitle==1.3.6
+six==1.17.0
+smmap==5.0.2
+stack-data==0.6.3
+sympy==1.14.0
+thop==0.1.1.post2209072238
 threadpoolctl==3.5.0
-tornado
-traitlets
-typing_extensions
+timm==1.0.15
+tokenizers==0.21.1
+tomli==2.2.1
+torch==2.7.0
+torchvision==0.22.0
+tornado==6.5.1
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.52.3
+triton==3.3.0
+typing-inspection==0.4.1
+typing_extensions==4.13.2
 tzdata==2024.2
-wcwidth
-wheel==0.44.0
-zipp
-# External/ML packages
-torch>=2.0.0
-torchvision>=0.15.0
-transformers>=4.36.0
-datasets>=2.14.0
-timm>=0.9.0
-seaborn>=0.12.0
-pynvml>=11.5.0
-thop>=0.1.1
-Pillow>=9.0.0  # included again for compatibility
-accelerate
\ No newline at end of file
+urllib3==2.4.0
+wandb==0.19.11
+wcwidth==0.2.13
+xxhash==3.5.0
+yarl==1.20.0
+zipp==3.21.0
diff --git a/results_metrics_finetune.json b/results_metrics_finetune.json
index 7b44e37..46f9038 100644
--- a/results_metrics_finetune.json
+++ b/results_metrics_finetune.json
@@ -1,18 +1,24 @@
 {
-    "simclr": {
-        "peak_memory_mb": 989.1875,
-        "flops_giga": 4.131698688,
-        "train_time_seconds": 417.5733857154846,
-        "eval_time_seconds": 25.519327640533447,
+    "vit": {
+        "model_name": "vit",
+        "model_type": "vit",
+        "peak_memory_mb": 3645.1875,
+        "flops_giga": 16.862863872,
+        "train_time_seconds": 454.5931091308594,
+        "eval_time_seconds": 33.34872579574585,
         "eval_metrics": {
-            "eval_loss": 0.6880730390548706,
-            "eval_accuracy": 0.6410256410256411,
-            "eval_f1": 0.6350267379679144,
-            "eval_auc": 0.6370808678500987,
-            "eval_runtime": 25.5184,
-            "eval_samples_per_second": 3.057,
-            "eval_steps_per_second": 0.196,
-            "epoch": 3.0
+            "eval_loss": 0.6044296026229858,
+            "eval_accuracy": 0.7435897435897436,
+            "eval_f1": 0.7420634920634921,
+            "eval_auc": 0.8060486522024982,
+            "eval_runtime": 33.3459,
+            "eval_samples_per_second": 2.339,
+            "eval_steps_per_second": 0.15,
+            "epoch": 3.0,
+            "model": "vit",
+            "phase": "finetune",
+            "gpu_memory_mb": 3645.1875,
+            "best_accuracy": 0.7435897435897436
         }
     }
 }
\ No newline at end of file
diff --git a/results_metrics_linear_probe.json b/results_metrics_linear_probe.json
index 8b7c417..f093390 100644
--- a/results_metrics_linear_probe.json
+++ b/results_metrics_linear_probe.json
@@ -1,18 +1,25 @@
 {
-    "simclr": {
-        "peak_memory_mb": 1003.1875,
-        "flops_giga": 4.131698688,
-        "train_time_seconds": 128.30324125289917,
-        "eval_time_seconds": 25.598750591278076,
+    "vit": {
+        "model_name": "vit",
+        "model_type": "vit",
+        "phase": "linear_probe",
+        "peak_memory_mb": 3645.1875,
+        "flops_giga": 16.862863872,
+        "train_time_seconds": 186.49300956726074,
+        "eval_time_seconds": 40.83929514884949,
         "eval_metrics": {
-            "eval_loss": 0.6896047592163086,
-            "eval_accuracy": 0.5512820512820513,
-            "eval_f1": 0.49019607843137253,
-            "eval_auc": 0.5746219592373438,
-            "eval_runtime": 25.5978,
-            "eval_samples_per_second": 3.047,
-            "eval_steps_per_second": 0.195,
-            "epoch": 1.0
+            "eval_loss": 0.7638523578643799,
+            "eval_accuracy": 0.46153846153846156,
+            "eval_f1": 0.415,
+            "eval_auc": 0.38067061143984227,
+            "eval_runtime": 40.8364,
+            "eval_samples_per_second": 1.91,
+            "eval_steps_per_second": 0.122,
+            "epoch": 1.0,
+            "model": "vit",
+            "phase": "linear_probe",
+            "gpu_memory_mb": 1165.1875,
+            "best_accuracy": 0.46153846153846156
         }
     }
 }
\ No newline at end of file
diff --git a/src/models/model_comparison_baseline.py b/src/models/model_comparison_baseline.py
index e3b2962..e11ea11 100644
--- a/src/models/model_comparison_baseline.py
+++ b/src/models/model_comparison_baseline.py
@@ -8,8 +8,6 @@
 # Environment Setup
 import os
 
-os.environ["WANDB_DISABLED"] = "true"  # Disable Weights & Biases logging
-
 # Standard Libraries
 import io
 import json
@@ -40,6 +38,8 @@
 )
 from datasets import load_dataset, ClassLabel
 
+# Weights & Biases
+import wandb
 
 # Metrics
 from sklearn.metrics import (
@@ -75,7 +75,6 @@
 # GPU Memory Monitoring (optional)
 try:
     import pynvml
-
     pynvml.nvmlInit()
     GPU_AVAILABLE = True
 except ImportError:
@@ -91,13 +90,63 @@
 )
 os.environ["HF_HOME"] = os.getenv("HF_HOME", "~/.cache/huggingface")
 
+class WandbCallback(TrainerCallback):
+    def __init__(self, model_name, phase):
+        self.model_name = model_name
+        self.phase = phase
+        self.best_accuracy = 0.0
+
+    def on_log(self, args, state, control, logs=None, **kwargs):
+        if logs is not None:
+            # Add model name and phase to logs
+            logs["model"] = self.model_name
+            logs["phase"] = self.phase
+            
+            # Track GPU memory if available
+            if GPU_AVAILABLE:
+                logs["gpu_memory_mb"] = get_gpu_memory()
+            
+            # Log to wandb
+            wandb.log(logs)
+
+    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
+        if metrics is not None:
+            # Track best accuracy
+            if "eval_accuracy" in metrics:
+                self.best_accuracy = max(self.best_accuracy, metrics["eval_accuracy"])
+                metrics["best_accuracy"] = self.best_accuracy
+            
+            # Log evaluation metrics
+            wandb.log(metrics)
+
+def main(num_train_images=5000, proportion_per_transform=0.2, resolution=224):
+    
+    # Simple batch size configuration
+    batch_size = 64
     
+    # Initialize wandb
+    wandb_config = {
+        "num_train_images": num_train_images,
+        "proportion_per_transform": proportion_per_transform,
+        "resolution": resolution,
+        "batch_size": batch_size,
+        "num_epochs": 3,
+        "warmup_steps": 500,
+        "weight_decay": 0.01,
+        "gpu_available": GPU_AVAILABLE,
+    }
+    
+    wandb.init(
+        entity="ericcui-use-stanford-university",
+        project="CS231N Test",
+        config=wandb_config,
+        tags=["baseline", "model-comparison"]
+    )
 
-def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
     models = [
-        # {"name": "vit", "model_id": "google/vit-base-patch16-224", "type": "vit"},
+        {"name": "vit", "model_id": "google/vit-base-patch16-224", "type": "vit"},
         # {"name": "dinov2", "model_id": "facebook/dinov2-base", "type": "dinov2"},
-        {"name": "simclr", "model_id": "resnet50", "type": "simclr"},
+        # {"name": "simclr", "model_id": "resnet50", "type": "simclr"},
     ]
 
     results = {m["name"]: {} for m in models}
@@ -116,6 +165,7 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
         i for i, label in enumerate(dataset["label"])
         if str(label) in FILTERED_CLASSES  # Convert to string for comparison
     ]
+    
     # Select only those indices
     dataset = dataset.select(filtered_indices)
     print(f"Number of images after filtering for classes {FILTERED_CLASSES}: {len(dataset)}")
@@ -270,8 +320,8 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
         train_args = TrainingArguments(
             output_dir=os.path.join(env_path("TRAIN_OUTPUT_DIR", "."), f"{name}"),
             num_train_epochs=3,
-            per_device_train_batch_size=16,
-            per_device_eval_batch_size=16,
+            per_device_train_batch_size=batch_size,
+            per_device_eval_batch_size=batch_size,
             warmup_steps=500,
             weight_decay=0.01,
             logging_dir=os.path.join(env_path("LOG_DIR", "."), f"{name}"),
@@ -294,7 +344,8 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
                     log_dir=env_path("LOG_DIR", "./logs"),
                     phase="finetune",
                     model_name=name,
-                )
+                ),
+                WandbCallback(name, "finetune"),
             ],
         )
 
@@ -302,6 +353,12 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
         start_time = time.time()
         peak_memory = get_gpu_memory() if GPU_AVAILABLE else -1
 
+        # Log model architecture
+        if typ in HF_MODELS:
+            wandb.watch(model, log="all", log_freq=100)
+        elif typ == SSL_MODEL:
+            wandb.watch(model.backbone, log="all", log_freq=100)
+
         trainer.train()
 
         current_memory = get_gpu_memory() if GPU_AVAILABLE else -1
@@ -312,6 +369,18 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
         eval_time = time.time() - eval_start_time
         train_time = time.time() - start_time - eval_time
 
+        # Log model-specific metrics
+        model_metrics = {
+            "model_name": name,
+            "model_type": typ,
+            "peak_memory_mb": peak_memory,
+            "flops_giga": flops,
+            "train_time_seconds": train_time,
+            "eval_time_seconds": eval_time,
+            "eval_metrics": eval_results,
+        }
+        wandb.log(model_metrics)
+
         model_dir = os.path.join(
             env_path("MODEL_DIR", "."), f"{name}"
         )
@@ -334,19 +403,30 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
                     f,
                 )
 
-        results[name] = {
-            "peak_memory_mb": peak_memory,
-            "flops_giga": flops,
-            "train_time_seconds": train_time,
-            "eval_time_seconds": eval_time,
-            "eval_metrics": eval_results,
-        }
+        # Save model as wandb artifact
+        artifact = wandb.Artifact(
+            name=f"{name}_model",
+            type="model",
+            description=f"Trained {name} model with {typ} architecture"
+        )
+        artifact.add_dir(model_dir)
+        wandb.log_artifact(artifact)
+
+        results[name] = model_metrics
 
         print(
             f"[Finetune] {name}: {results[name]}"
         )
 
         # ---- LINEAR PROBE PHASE ----
+        # Create a new wandb run for linear probe
+        wandb.init(
+            project="model-comparison-baseline",
+            config=wandb_config,
+            tags=["baseline", "model-comparison", "linear-probe"],
+            name=f"{name}_linear_probe"
+        )
+
         if typ == "vit":
             model = ViTForImageClassification.from_pretrained(
                 model_id, num_labels=NUM_FILTERED_CLASSES, ignore_mismatched_sizes=True
@@ -362,14 +442,20 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
         model.to(device)
         freeze_backbone(model, typ)
 
+        # Log model architecture for linear probe
+        if typ in HF_MODELS:
+            wandb.watch(model, log="all", log_freq=100)
+        elif typ == SSL_MODEL:
+            wandb.watch(model.backbone, log="all", log_freq=100)
+
         linear_args = TrainingArguments(
             output_dir=os.path.join(
                 env_path("TRAIN_OUTPUT_DIR", "."),
                 f"{name}_linear_probe",
             ),
             num_train_epochs=1,
-            per_device_train_batch_size=16,
-            per_device_eval_batch_size=16,
+            per_device_train_batch_size=batch_size,
+            per_device_eval_batch_size=batch_size,
             warmup_steps=100,
             weight_decay=0.01,
             logging_dir=os.path.join(
@@ -394,7 +480,8 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
                     log_dir=env_path("LOG_DIR", "./logs"),
                     phase="linear_probe",
                     model_name=name,
-                )
+                ),
+                WandbCallback(name, "linear_probe"),
             ],
         )
 
@@ -409,6 +496,19 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
         eval_time = time.time() - eval_start_time
         train_time = time.time() - start_time - eval_time
 
+        # Log linear probe metrics
+        linear_probe_metrics = {
+            "model_name": name,
+            "model_type": typ,
+            "phase": "linear_probe",
+            "peak_memory_mb": peak_memory,
+            "flops_giga": flops,
+            "train_time_seconds": train_time,
+            "eval_time_seconds": eval_time,
+            "eval_metrics": eval_results,
+        }
+        wandb.log(linear_probe_metrics)
+
         model_dir = os.path.join(
             env_path("MODEL_DIR", "."), f"{name}_linear_probe"
         )
@@ -431,18 +531,27 @@ def main(num_train_images=1000, proportion_per_transform=0.2, resolution=224):
                     f,
                 )
 
-        results_linear_probe[name] = {
-            "peak_memory_mb": peak_memory,
-            "flops_giga": flops,
-            "train_time_seconds": train_time,
-            "eval_time_seconds": eval_time,
-            "eval_metrics": eval_results,
-        }
+        # Save linear probe model as wandb artifact
+        artifact = wandb.Artifact(
+            name=f"{name}_linear_probe_model",
+            type="model",
+            description=f"Linear probe {name} model with {typ} architecture"
+        )
+        artifact.add_dir(model_dir)
+        wandb.log_artifact(artifact)
+
+        results_linear_probe[name] = linear_probe_metrics
 
         print(
             f"[LinearProbe] {name}: {results_linear_probe[name]}"
         )
 
+        # Close the wandb run for linear probe
+        wandb.finish()
+
+    # Close the main wandb run
+    wandb.finish()
+
     with open(
         os.path.join(
             env_path("TRAIN_OUTPUT_DIR", "."), "results_metrics_finetune.json"