Fall2024CS584 · Hj006 · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/.DS_Store b/.DS_Store
diff --git a/1.png b/1.png
diff --git a/2.png b/2.png
diff --git a/3.png b/3.png
diff --git a/4.png b/4.png
diff --git a/5.png b/5.png
diff --git a/6.png b/6.png
diff --git a/README.md b/README.md
diff --git a/elasticnet/models/ElasticNet.py b/elasticnet/models/ElasticNet.py
@@ -1,17 +1,68 @@
-
+import numpy as np
 
 class ElasticNetModel():
-    def __init__(self):
+    def __init__(self,lambdas,thresh = 0.5,max_iter=1000, tol=1e-4,learning_rate=0.01):
+        self.lambdas = lambdas
+        self.thresh = thresh# between [0,1],0 means only L1 regularization, 1 means only L2 regularization      
+        self.max_iter = max_iter#Maximum number of iterations
+        self.tol = tol
+        self.learning_rate = learning_rate       
+        self.w = None
+        self.intercept_ = None
         pass
 
 
     def fit(self, X, y):
-        return ElasticNetModelResults()
+
+        def calculate_weights(X, residual):
+            gradient = (2 / X.shape[0]) * (X.T @ residual)  # MSE gradient
+            l1_grad = self.lambdas * self.thresh * np.sign(self.w)  # L1 penalty gradient
+            l2_grad = 2 * self.lambdas * (1 - self.thresh) * self.w  # L2 penalty gradien
+
+            # make sure l1_grad and l2_grad has same dimention with gradient 
+            if l1_grad.ndim == 1:
+                l1_grad = l1_grad.reshape(-1)
+            if l2_grad.ndim == 1:
+                l2_grad = l2_grad.reshape(-1)
+
+            if gradient.ndim > 1:
+                gradient = np.sum(gradient, axis=1)
+
+            gradient += l1_grad + l2_grad
+            return gradient
+
+        # Add intercept term
+        X = np.c_[np.ones(X.shape[0]), X]
+        self.w = np.zeros(X.shape[1])  # Initialize weights including intercept
+        Max_iter = self.max_iter
+
+        while Max_iter > 0:
+            Max_iter -= 1
+            # Compute predictions and residuals
+            pred = X @ self.w
+            residual = pred - y
+
+            # Calculate weight gradients
+            gradient = calculate_weights(X, residual)
+            # Update weights
+            w_old = self.w.copy()
+            self.w -= self.learning_rate * gradient
+
+            # Check convergence
+            if np.linalg.norm(self.w - w_old) < self.tol:
+                break
+        # divide intercept and coef
+        self.intercept = self.w[0]
+        self.coef = self.w[1:]
+        return ElasticNetResults(self.coef, self.intercept)
+
 
 
-class ElasticNetModelResults():
-    def __init__(self):
+class ElasticNetResults():
+    def __init__(self, coef, intercept):
+        self.coef = coef
+        self.intercept = intercept
         pass
 
-    def predict(self, x):
-        return 0.5
+    def predict(self, X):
+        return X @ self.coef + self.intercept
diff --git a/elasticnet/tests/test_ElasticNetModel.py b/elasticnet/tests/test_ElasticNetModel.py
@@ -1,19 +1,137 @@
+import sys
 import csv
-
-import numpy
-
+import numpy as np
+import os
+'''
+import io
+# Add the project root directory to sys.path
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+'''
 from elasticnet.models.ElasticNet import ElasticNetModel
-
-def test_predict():
-    model = ElasticNetModel()
+from generate_positive_regression_data import generate_rotated_positive_data
+from generate_nagetive_regression_data import generate_negative_data
+import matplotlib.pyplot as plt
+from sklearn.metrics import mean_absolute_error, r2_score
+from sklearn.model_selection import train_test_split
+# Original test code part
+def test_predict_with_csv():
+    # Initialize the ElasticNet model with the given parameters
+    model = ElasticNetModel(lambdas=0.1, thresh=0.6, max_iter=1000, tol=1e-4, learning_rate=0.01)
+
+    # Read the CSV file and load data
     data = []
+    #with open("elasticnet/tests/small_test.csv", "r") as file:
     with open("small_test.csv", "r") as file:
         reader = csv.DictReader(file)
         for row in reader:
             data.append(row)
 
-    X = numpy.array([[v for k,v in datum.items() if k.startswith('x')] for datum in data])
-    y = numpy.array([[v for k,v in datum.items() if k=='y'] for datum in data])
-    results = model.fit(X,y)
-    preds = results.predict(X)
-    assert preds == 0.5
+    # Extract features (X) and target (y) from the data
+    X = np.array([[v for k, v in datum.items() if k.startswith('x')] for datum in data], dtype=float)
+    y = np.array([float(datum['y']) for datum in data], dtype=float)  # Convert 'y' to a 1D array
+
+    # Split the data into 80% training and 20% testing sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+    # Fit the model on the training data
+    results = model.fit(X_train, y_train)
+
+    # Make predictions on the test data
+    preds = results.predict(X_test)
+
+    # Calculate various evaluation metrics
+    mse = np.mean((y_test - preds) ** 2)  # Mean Squared Error
+    rmse = np.sqrt(mse)  # Root Mean Squared Error
+    mae = mean_absolute_error(y_test, preds)  # Mean Absolute Error
+    r2 = r2_score(y_test, preds)  # R-squared
+
+    # Print the results of the evaluation metrics
+    print(f"Mean Squared Error (MSE): {mse:.4f}")
+    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
+    print(f"Mean Absolute Error (MAE): {mae:.4f}")
+    print(f"R-squared (R²): {r2:.4f}")
+
+
+def test_with_generated_data(data_type = 0):
+    if(data_type == 1):
+        # Generate training data
+        X, y = generate_negative_data(range_x=[-40, 40], noise_scale=5, size=300, num_features=6, seed=42)
+        # Generate test dataset
+        X_test, y_test = generate_negative_data(range_x=[-40, 40], noise_scale=50, size=50, num_features=6, seed=526)
+        # Instantiate the ElasticNetModel and train
+        model = model = ElasticNetModel(lambdas=0.1, thresh=0.5, max_iter=1000, tol=1e-4, learning_rate=0.0001)#must turn down the learning rate
+    else:
+        X, y = generate_rotated_positive_data(range_x=[-30, 30], noise_scale=2, size=200, num_features=6, seed=42, rotation_angle=45, mode=0)
+
+        X_test, y_test = generate_rotated_positive_data(range_x=[-30, 30], noise_scale=50, size=50, num_features=6, seed=100, rotation_angle=45, mode=1)
+
+        model = ElasticNetModel(lambdas=0.1, thresh=0.5, max_iter=1000, tol=1e-4, learning_rate=0.01)
+
+    # Visualize the generated data
+    plt.figure(figsize=(15, 6))
+    for i in range(X.shape[1]):
+        plt.subplot(1, X.shape[1], i + 1)
+        plt.scatter(X[:, i], y, label=f'Feature {i + 1}', alpha=0.6)
+        plt.xlabel(f'Feature {i + 1}')
+        plt.ylabel('Y_Train')
+        plt.title(f'Train Feature {i + 1}')
+        plt.grid(True)
+    plt.tight_layout()
+    plt.show()
+    # Visualize the test data
+    plt.figure(figsize=(15, 6))
+    for i in range(X_test.shape[1]):
+        plt.subplot(1, X_test.shape[1], i + 1)
+        plt.scatter(X_test[:, i], y_test, label=f'Test Feature {i + 1}', alpha=0.6, color='orange')
+        plt.xlabel(f'Feature {i + 1}')
+        plt.ylabel('Y_test')
+        plt.title(f'Test Feature {i + 1}')
+        plt.grid(True)
+    plt.tight_layout()
+    plt.show()
+
+
+    model_results = model.fit(X, y)
+    # Predict on the test data
+    y_pred = model_results.predict(X_test)
+
+    # Visualize actual and predicted values
+    for i in range(X_test.shape[1]):
+        plt.subplot(2, 3, i + 1)
+        plt.scatter(X_test[:, i], y_test, color='orange', alpha=0.6, label='Actual Values')
+        plt.scatter(X_test[:, i], y_pred, color='blue', alpha=0.6, label='Predicted Values')
+
+        # Fit line
+        z = np.polyfit(X_test[:, i], y_test, 1)
+        p = np.poly1d(z)
+        sorted_indices = np.argsort(X_test[:, i])
+        X_sorted = X_test[:, i][sorted_indices]
+        y_fit = p(X_sorted)
+        plt.plot(X_sorted, y_fit, "r--", label='Fit Line')
+
+        plt.xlabel(f'Feature {i + 1}')
+        plt.ylabel('Target')
+        plt.title(f'Feature {i + 1} vs. Actual and Predicted Target')
+        plt.grid(True)
+        plt.legend()
+
+    plt.tight_layout()
+    plt.show()
+
+    # Manually calculate MSE
+    mse = np.mean((y_test - y_pred) ** 2)
+    rmse = np.sqrt(mse)
+    mae = mean_absolute_error(y_test, y_pred)
+    r2 = r2_score(y_test, y_pred)
+
+    print(f"Mean Squared Error (MSE): {mse:.4f}")
+    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
+    print(f"Mean Absolute Error (MAE): {mae:.4f}")
+    print(f"R-squared (R²): {r2:.4f}")
+
+# Run the tests
+if __name__ == "__main__":
+    test_predict_with_csv()  # Test the original CSV code
+    test_with_generated_data(data_type=0)  # Test with generated positive regression data
+    test_with_generated_data(data_type=1) # Test with generated half positive and half nagetive regression data
diff --git a/generate_nagetive_regression_data.py b/generate_nagetive_regression_data.py
@@ -0,0 +1,52 @@
+import numpy as np
+
+def generate_negative_data(range_x, noise_scale, size, num_features, seed):
+    """
+    Generate synthetic data with specified patterns:
+    - The first half of the features have a monotonic trend.
+    - The second half of the features have a linear decreasing (negative slope) pattern.
+
+    Parameters:
+        range_x - Range of feature values (min, max)
+        noise_scale - Standard deviation of the noise
+        size - Number of samples
+        num_features - Number of features
+        seed - Random seed for reproducibility
+
+    Returns:
+        X - Generated multi-dimensional feature dataset
+        y - Target values
+    """
+    rng = np.random.default_rng(seed=seed)
+    half_features = num_features // 2  # Calculate half the number of features
+
+    # Generate X1 data with a clear linear trend
+    X1 = np.zeros((size, half_features))
+    for i in range(half_features):
+        # Generate linear data from low to high to ensure a clear positive slope
+        X1[:, i] = np.linspace(range_x[1], range_x[0], size) + rng.normal(loc=0, scale=noise_scale, size=size)###
+
+    # Define X2 matrix as (size, num_features - half_features)
+    X2 = np.zeros((size, num_features - half_features))
+
+    # Generate data with a negative slope
+    for i in range(X2.shape[1]):
+        # Generate data from high to low with a negative slope
+        X2[:, i] = np.linspace(range_x[1], range_x[0], size)
+        # Add appropriate negative weights to ensure negative correlation between features and target values
+        X2[:, i] += rng.normal(loc=0, scale=noise_scale, size=size)#####
+
+    # Flip to ensure X2 is arranged from large to small
+    X2 = np.flip(X2, axis=0)
+
+    # Combine X1 and X2
+    X = np.hstack((X1, X2))
+
+    # Generate target values y, enhancing the negative correlation of negative slope features
+    y = (2 * X1[:, 0] +    # Contribution of the first dimension multiplied by 2
+         3 * X1[:, 1] +    # Contribution of the second dimension multiplied by 3
+         4 * X1[:, 2] -    # Contribution of the third dimension multiplied by 4
+         4 * np.sum(X2, axis=1) +  # Strong negative slope contribution from decreasing data
+         rng.normal(loc=0, scale=noise_scale, size=size))  # Add noise
+
+    return X, y
diff --git a/generate_positive_regression_data.py b/generate_positive_regression_data.py
@@ -0,0 +1,84 @@
+import numpy as np
+
+def generate_rotated_positive_data(range_x, noise_scale, size, num_features, seed, rotation_angle=45, mode=0):
+    """
+    Generate synthetic data with specified patterns:
+    - The first half of the features have a monotonic trend.
+    - The second half of the features have a wavy (slanted S-shaped) pattern adjusted by rotation.
+
+    Parameters:
+        range_x - Range of feature values (min, max)
+        noise_scale - Standard deviation of the noise
+        size - Number of samples
+        num_features - Number of features
+        seed - Random seed for reproducibility
+        rotation_angle - Rotation angle to adjust the direction of the S-shape
+        mode - Determines scaling factors to use
+
+    Returns:
+        X - Generated multi-dimensional feature dataset
+        y - Target values
+    """
+
+    def scale_random_rows(X, size, scale_factors, seed=None):
+        """
+        Scale random rows of a given matrix X by specified scaling factors.
+
+        Parameters:
+            X - Input matrix
+            size - Number of rows in the matrix, i.e., number of samples
+            scale_factors - A list of scaling factors, e.g., [0.5, 0.7, 0.3], indicating factors for scaling some rows
+            seed - Random seed for reproducibility
+
+        Returns:
+            Modified matrix X
+        """
+        rng = np.random.default_rng(seed=seed)
+        remaining_indices = np.arange(size)  # Initialize with all row indices
+
+        for scale in scale_factors:
+            # Randomly select 1/n of rows for scaling
+            selected_indices = rng.choice(remaining_indices, size // len(scale_factors), replace=False)
+            X[selected_indices, :] *= scale  # Scale by the scaling factor
+            # Update the remaining indices
+            remaining_indices = np.setdiff1d(remaining_indices, selected_indices)
+
+        return X
+
+    rng = np.random.default_rng(seed=seed)
+    half_features = num_features // 2  # Calculate half the number of features
+
+    # Generate X1 data
+    X1 = rng.uniform(low=range_x[0], high=range_x[1], size=(size, half_features))
+    if mode == 0:
+        scale_factors = [0.5, 0.7, 0.3]  # Scaling factors needed
+    else:
+        scale_factors = [0.8, 0.4]
+    X1 = scale_random_rows(X1, X1.shape[0], scale_factors, seed=42)
+
+    # Generate S-shaped data for the second half
+    X2 = rng.uniform(low=range_x[0], high=range_x[1], size=(size, num_features - half_features))
+    for i in range(X2.shape[1]):
+        # Create standard S-shaped curve
+        X2[:, i] = np.sin(X2[:, i] / 2) * 10 + 0.5 * X2[:, i]
+    X2 = scale_random_rows(X2, X2.shape[0], scale_factors, seed=52)
+
+    # Create rotation matrix, convert angle to radians
+    theta = np.radians(rotation_angle)
+    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
+
+    # Rotate the first two dimensions of S-shaped data to make it slanted
+    rotated_X2 = X2[:, :2] @ rotation_matrix  # Rotate only the first two columns since the matrix is 2x2
+    X2[:, :2] = rotated_X2  # Assign the rotated part back
+
+    # Combine the two parts
+    X = np.hstack((X1, X2))
+
+    # Generate target values y, adjusted by the contribution of each dimension
+    y = (2 * X1[:, 0] +    # Contribution of the first dimension multiplied by 2
+        3 * X1[:, 1] +    # Contribution of the second dimension multiplied by 3
+        4 * X1[:, 2] +    # Contribution of the third dimension multiplied by 4
+        np.sum(np.sin(X2), axis=1) +  # Contribution of S-shaped data
+        rng.normal(loc=0, scale=noise_scale, size=size))  # Add noise
+
+    return X, y