From 2081323ed81db4f1cd5089b6358501410b5d913b Mon Sep 17 00:00:00 2001
From: Gary Hutson <44023992+StatsGary@users.noreply.github.com>
Date: Tue, 20 May 2025 20:24:20 +0100
Subject: [PATCH] Added deep learning and regression utils

---
 .gitignore                  |   3 +-
 modelviz/__init__.py        |   2 +-
 modelviz/deep_learning.py   | 130 ++++++++++++++++++++++++++++++++++++
 modelviz/regression.py      | 100 +++++++++++++++++++++++++++
 tests/test_deep_learning.py |  40 +++++++++++
 tests/test_regression.py    |  44 ++++++++++++
 6 files changed, 317 insertions(+), 2 deletions(-)
 create mode 100644 modelviz/deep_learning.py
 create mode 100644 modelviz/regression.py
 create mode 100644 tests/test_deep_learning.py
 create mode 100644 tests/test_regression.py

diff --git a/.gitignore b/.gitignore
index 4f9d09c..bbb4ece 100644
--- a/.gitignore
+++ b/.gitignore
@@ -110,4 +110,5 @@ local_settings.py
 file.txt
 build_package.sh
 token.txt
-test.ipynb
\ No newline at end of file
+test.ipynb
+.trunk/
diff --git a/modelviz/__init__.py b/modelviz/__init__.py
index 67265f9..6be9ee7 100644
--- a/modelviz/__init__.py
+++ b/modelviz/__init__.py
@@ -1,2 +1,2 @@
-__version__ = "2.2.0"
+__version__ = "2.2.1"
 __author__ = "Gary Hutson"
\ No newline at end of file
diff --git a/modelviz/deep_learning.py b/modelviz/deep_learning.py
new file mode 100644
index 0000000..7de726d
--- /dev/null
+++ b/modelviz/deep_learning.py
@@ -0,0 +1,130 @@
+import matplotlib.pyplot as plt
+from typing import List, Tuple
+
+
+def draw_mlp(
+    layer_sizes: List[int],
+    show_bias: bool = True,
+    activation: str = 'σ',
+    figsize: Tuple[int, int] = (11, 7),
+    neuron_radius: float = 0.24,
+    h_spacing: float = 2.7,
+    v_spacing: float = 1.5,
+    input_color: str = '#eef4fa',
+    edge_color: str = '#336699',
+    hidden_text_color: str = '#336699',
+    conn_color: str = '#444',
+    conn_alpha: float = 0.4,
+    weight_color: str = '#ba2222',
+    weight_fontsize: int = 10,
+    weight_box_color: str = 'white',
+    bias_color: str = '#fcf7cd',
+    bias_edge_color: str = '#998a26',
+    bias_line_color: str = '#998a26',
+    bias_box_alpha: float = 0.7,
+    activation_text_color: str = '#008488'
+) -> None:
+    """
+    Visualize a multilayer perceptron (MLP) architecture.
+
+    Parameters:
+    - layer_sizes: A list of integers indicating the number of neurons per layer.
+    - show_bias: Whether to show bias nodes and their connections.
+    - activation: Activation function symbol to display between layers.
+    - figsize: Size of the matplotlib figure.
+    - neuron_radius: Radius of each neuron circle.
+    - h_spacing: Horizontal spacing between layers.
+    - v_spacing: Vertical spacing between neurons in a layer.
+    - input_color: Fill color for neuron circles.
+    - edge_color: Edge color for neuron circles.
+    - hidden_text_color: Font color for hidden neurons.
+    - conn_color: Color of the lines connecting neurons.
+    - conn_alpha: Transparency of connection lines.
+    - weight_color: Color of the weight text.
+    - weight_fontsize: Font size for weight labels.
+    - weight_box_color: Background color for weight label boxes.
+    - bias_color: Color of bias nodes.
+    - bias_edge_color: Edge color of bias nodes.
+    - bias_line_color: Color of dashed lines from bias to neuron.
+    - bias_box_alpha: Transparency of bias label box.
+    - activation_text_color: Color of activation function label."""
+    fig, ax = plt.subplots(figsize=figsize)
+    ax.axis('off')
+    n_layers = len(layer_sizes)
+
+    # Calculate vertical positions for neurons in each layer
+    y_offset = []
+    for n in layer_sizes:
+        total_h = v_spacing * (n - 1)
+        y_offset.append([i * v_spacing - total_h / 2 for i in range(n)])
+
+    # Draw neurons
+    for i, layer in enumerate(layer_sizes):
+        for j in range(layer):
+            circle = plt.Circle((i * h_spacing, y_offset[i][j]), neuron_radius,
+                                color=input_color, ec=edge_color, lw=2.2, zorder=3, alpha=0.97)
+            ax.add_patch(circle)
+            # Add labels
+            if i == 0:
+                ax.text(i * h_spacing - 0.5, y_offset[i][j], f"$x_{{{j+1}}}$", fontsize=16,
+                        va='center', ha='right')
+            elif i == n_layers - 1:
+                ax.text(i * h_spacing + 0.5, y_offset[i][j], f"$y_{{{j+1}}}$", fontsize=16,
+                        va='center', ha='left', fontweight='bold')
+            else:
+                ax.text(i * h_spacing, y_offset[i][j], f"$h_{{{i},{j+1}}}$", fontsize=16,
+                        ha='center', va='center', color=hidden_text_color, fontweight='bold')
+
+    # Draw connections and weights
+    for i in range(n_layers - 1):
+        for j, y1 in enumerate(y_offset[i]):
+            for k, y2 in enumerate(y_offset[i + 1]):
+                line = plt.Line2D([i * h_spacing, (i + 1) * h_spacing], [y1, y2],
+                                  color=conn_color, lw=1, alpha=conn_alpha, zorder=1)
+                ax.add_line(line)
+                # Add weight label
+                x_mid = (i * h_spacing + (i + 1) * h_spacing) / 2
+                y_mid = (y1 + y2) / 2
+                ax.text(x_mid, y_mid + 0.18,
+                        f"$w^{{({i+1})}}_{{{k+1},{j+1}}}$",
+                        fontsize=weight_fontsize, color=weight_color, alpha=0.95,
+                        ha='center', va='bottom',
+                        bbox=dict(boxstyle="round,pad=0.12", fc=weight_box_color,
+                                  ec='none', alpha=bias_box_alpha))
+
+        # Activation function label
+        if i < n_layers - 2:
+            ax.text((i + 0.5) * h_spacing, max(y_offset[i + 1]) + 0.7,
+                    f"Activation: ${activation}$", fontsize=13,
+                    ha='center', color=activation_text_color, alpha=0.7)
+
+        # Bias nodes
+        if show_bias:
+            bias_y = max(y_offset[i + 1]) + 0.75
+            ax.scatter((i + 1) * h_spacing, bias_y, s=200, marker='s',
+                       color=bias_color, edgecolors=bias_edge_color, zorder=4)
+            ax.text((i + 1) * h_spacing + 0.3, bias_y,
+                    f"$b^{{({i+1})}}$", fontsize=13, color=bias_edge_color, va='center')
+            for y2 in y_offset[i + 1]:
+                ax.plot([(i + 1) * h_spacing, (i + 1) * h_spacing],
+                        [bias_y, y2 - 0.1],
+                        color=bias_line_color, lw=1.2, ls='dashed', alpha=0.7, zorder=1)
+
+    # Layer labels
+    ax.text(-0.1, max(y_offset[0]) + 1.1, "Input\nLayer", ha='center',
+            fontsize=15, fontweight='bold', color='#222')
+    for i in range(1, n_layers - 1):
+        ax.text(i * h_spacing, max(y_offset[i]) + 1.1,
+                f"Hidden\nLayer {i}", ha='center', fontsize=15,
+                color='#0084e6', fontweight='bold')
+    ax.text((n_layers - 1) * h_spacing, max(y_offset[-1]) + 1.1,
+            "Output\nLayer", ha='center', fontsize=15,
+            color='#222', fontweight='bold')
+
+    # Set axis limits
+    ax.set_xlim(-1.5, n_layers * h_spacing)
+    ax.set_ylim(-max(layer_sizes) * v_spacing / 1.5 - 1,
+                max(layer_sizes) * v_spacing / 1.5 + 2)
+
+    plt.tight_layout()
+    plt.show()
\ No newline at end of file
diff --git a/modelviz/regression.py b/modelviz/regression.py
new file mode 100644
index 0000000..6f1fc63
--- /dev/null
+++ b/modelviz/regression.py
@@ -0,0 +1,100 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import scipy.stats as stats
+
+def regression_diagnostics_panel(y_test,
+                                  y_pred,
+                                  figsize=(18, 5),
+                                  font_size=12,
+                                  hist_bins=30,
+                                  hist_alpha=0.7,
+                                  hist_color='grey',
+                                  hist_edgecolor='black',
+                                  vline_color='black',
+                                  vline_style='--',
+                                  vline_width=1,
+                                  scatter_alpha=0.5,
+                                  scatter_color='grey',
+                                  line_color='black',
+                                  line_style='--',
+                                  line_width=1,
+                                  qq_line_color='red',
+                                  qq_point_color='blue',
+                                  qq_point_size=20,
+                                  qq_line_style='-',
+                                  show_grid=True):
+    """
+    Creates a 3-panel diagnostic plot:
+    - Histogram of residuals
+    - Actual vs. Predicted
+    - Q-Q plot of residuals
+
+    Parameters:
+    - y_test, y_pred: true and predicted values
+    - figsize: tuple of figure size
+    - font_size: int for axis and title labels
+    - hist_bins: number of bins in histogram
+    - hist_alpha: alpha for histogram bars
+    - hist_color: fill color for histogram
+    - hist_edgecolor: edge color for histogram bars
+    - vline_color, vline_style, vline_width: vertical line over histogram at 0
+    - scatter_alpha, scatter_color: actual vs. predicted plot
+    - line_color, line_style, line_width: y=x reference line in actual vs. predicted
+    - qq_line_color, qq_point_color, qq_point_size, qq_line_style: Q-Q plot styling
+    - show_grid: bool for showing grid on all subplots
+
+    Returns:
+    - None: displays the plots  
+    Example:
+    >>> from sklearn.model_selection import train_test_split    
+    >>> from sklearn.linear_model import LinearRegression
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(n_samples=100, n_features=1, noise=0.1)
+    >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    >>> model = LinearRegression()
+    >>> model.fit(X_train, y_train)
+    >>> y_pred = model.predict(X_test)
+    >>> regression_diagnostics_panel(y_test, y_pred)
+    >>> # This will display the diagnostic plots for the regression model.
+    >>> # Note: Ensure that you have matplotlib and scipy installed in your environment.
+    >>> # You can customize the appearance of the plots using the parameters.
+    """
+   
+
+    residuals = y_test - y_pred
+    fig, axes = plt.subplots(1, 3, figsize=figsize)
+
+    # 1. Distribution of Residuals
+    axes[0].hist(residuals, bins=hist_bins, alpha=hist_alpha,
+                 color=hist_color, edgecolor=hist_edgecolor)
+    axes[0].axvline(x=0, color=vline_color, linestyle=vline_style, lw=vline_width)
+    axes[0].set_title('Distribution of Residuals', fontsize=font_size)
+    axes[0].set_xlabel('Residuals', fontsize=font_size)
+    axes[0].set_ylabel('Frequency', fontsize=font_size)
+    if show_grid:
+        axes[0].grid(True)
+
+    # 2. Actual vs. Predicted
+    axes[1].scatter(y_test, y_pred, alpha=scatter_alpha, color=scatter_color)
+    axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()],
+                 color=line_color, linestyle=line_style, lw=line_width)
+    axes[1].set_title('Actual vs. Predicted', fontsize=font_size)
+    axes[1].set_xlabel('Actual', fontsize=font_size)
+    axes[1].set_ylabel('Predicted', fontsize=font_size)
+    if show_grid:
+        axes[1].grid(True)
+
+    # 3. Q-Q Plot
+    (osm, osr), (slope, intercept, r) = stats.probplot(residuals, dist="norm")
+    axes[2].plot(osm, slope * np.array(osm) + intercept,
+                 qq_line_style, color=qq_line_color, label='Q-Q Line')
+    axes[2].scatter(osm, osr, color=qq_point_color, s=qq_point_size, alpha=0.6, label='Residuals')
+    axes[2].set_title('Q-Q Plot of Residuals', fontsize=font_size)
+    axes[2].set_xlabel('Theoretical Quantiles', fontsize=font_size)
+    axes[2].set_ylabel('Ordered Values', fontsize=font_size)
+    axes[2].legend()
+    if show_grid:
+        axes[2].grid(True, linestyle='--', alpha=0.5)
+
+    plt.tight_layout()
+    plt.show()
\ No newline at end of file
diff --git a/tests/test_deep_learning.py b/tests/test_deep_learning.py
new file mode 100644
index 0000000..9eed1d7
--- /dev/null
+++ b/tests/test_deep_learning.py
@@ -0,0 +1,40 @@
+import pytest
+from matplotlib import pyplot as plt
+from modelviz.deep_learning import draw_mlp  
+
+@pytest.fixture(autouse=True)
+def no_show(monkeypatch):
+    monkeypatch.setattr(plt, 'show', lambda: None)
+
+def test_basic_network_runs():
+    """Check a simple 3-layer MLP runs without error."""
+    draw_mlp(layer_sizes=[3, 4, 2])
+
+def test_single_layer_input_output():
+    """Check a network with input and output only (no hidden layers)."""
+    draw_mlp(layer_sizes=[5, 1])
+
+def test_deep_network():
+    """Check a deeper MLP with several hidden layers."""
+    draw_mlp(layer_sizes=[3, 5, 4, 3, 2, 1])
+
+def test_with_bias_disabled():
+    """Ensure function runs with bias disabled."""
+    draw_mlp(layer_sizes=[4, 4, 2], show_bias=False)
+
+def test_custom_styling_runs():
+    """Test that passing various styling options doesn't break the function."""
+    draw_mlp(
+        layer_sizes=[2, 3, 1],
+        activation='ReLU',
+        neuron_radius=0.3,
+        input_color='#ffeecc',
+        edge_color='#333333',
+        conn_color='gray',
+        weight_color='blue',
+        weight_fontsize=8,
+        weight_box_color='#eeeeee',
+        bias_color='red',
+        bias_edge_color='darkred',
+        activation_text_color='purple'
+    )
diff --git a/tests/test_regression.py b/tests/test_regression.py
new file mode 100644
index 0000000..067c9c5
--- /dev/null
+++ b/tests/test_regression.py
@@ -0,0 +1,44 @@
+import pytest
+import numpy as np
+from modelviz.regression import regression_diagnostics_panel
+
+# Generate fake regression data
+@pytest.fixture
+def sample_data():
+    np.random.seed(42)
+    y_true = np.random.normal(loc=100, scale=10, size=100)
+    noise = np.random.normal(loc=0, scale=5, size=100)
+    y_pred = y_true + noise
+    return y_true, y_pred
+
+def test_regression_diagnostics_runs(sample_data):
+    y_true, y_pred = sample_data
+    # Should run without error
+    regression_diagnostics_panel(y_true, y_pred)
+
+def test_custom_plot_args(sample_data):
+    y_true, y_pred = sample_data
+    # Try changing style params
+    regression_diagnostics_panel(
+        y_test=y_true,
+        y_pred=y_pred,
+        font_size=10,
+        figsize=(12, 4),
+        hist_bins=20,
+        hist_color='skyblue',
+        scatter_color='black',
+        qq_point_color='green',
+        show_grid=False
+    )
+
+def test_handles_different_input_shapes():
+    # Column vector inputs
+    y_true = np.array([[10], [20], [30]])
+    y_pred = np.array([[12], [19], [29]])
+    regression_diagnostics_panel(y_true.flatten(), y_pred.flatten())
+
+def test_fails_with_mismatched_shapes():
+    y_true = np.array([10, 20, 30])
+    y_pred = np.array([12, 19])  # wrong shape
+    with pytest.raises(ValueError):
+        regression_diagnostics_panel(y_true, y_pred)