From 2081323ed81db4f1cd5089b6358501410b5d913b Mon Sep 17 00:00:00 2001 From: Gary Hutson <44023992+StatsGary@users.noreply.github.com> Date: Tue, 20 May 2025 20:24:20 +0100 Subject: [PATCH] Added deep learning and regression utils --- .gitignore | 3 +- modelviz/__init__.py | 2 +- modelviz/deep_learning.py | 130 ++++++++++++++++++++++++++++++++++++ modelviz/regression.py | 100 +++++++++++++++++++++++++++ tests/test_deep_learning.py | 40 +++++++++++ tests/test_regression.py | 44 ++++++++++++ 6 files changed, 317 insertions(+), 2 deletions(-) create mode 100644 modelviz/deep_learning.py create mode 100644 modelviz/regression.py create mode 100644 tests/test_deep_learning.py create mode 100644 tests/test_regression.py diff --git a/.gitignore b/.gitignore index 4f9d09c..bbb4ece 100644 --- a/.gitignore +++ b/.gitignore @@ -110,4 +110,5 @@ local_settings.py file.txt build_package.sh token.txt -test.ipynb \ No newline at end of file +test.ipynb +.trunk/ diff --git a/modelviz/__init__.py b/modelviz/__init__.py index 67265f9..6be9ee7 100644 --- a/modelviz/__init__.py +++ b/modelviz/__init__.py @@ -1,2 +1,2 @@ -__version__ = "2.2.0" +__version__ = "2.2.1" __author__ = "Gary Hutson" \ No newline at end of file diff --git a/modelviz/deep_learning.py b/modelviz/deep_learning.py new file mode 100644 index 0000000..7de726d --- /dev/null +++ b/modelviz/deep_learning.py @@ -0,0 +1,130 @@ +import matplotlib.pyplot as plt +from typing import List, Tuple + + +def draw_mlp( + layer_sizes: List[int], + show_bias: bool = True, + activation: str = 'σ', + figsize: Tuple[int, int] = (11, 7), + neuron_radius: float = 0.24, + h_spacing: float = 2.7, + v_spacing: float = 1.5, + input_color: str = '#eef4fa', + edge_color: str = '#336699', + hidden_text_color: str = '#336699', + conn_color: str = '#444', + conn_alpha: float = 0.4, + weight_color: str = '#ba2222', + weight_fontsize: int = 10, + weight_box_color: str = 'white', + bias_color: str = '#fcf7cd', + bias_edge_color: str = '#998a26', + bias_line_color: str = '#998a26', + bias_box_alpha: float = 0.7, + activation_text_color: str = '#008488' +) -> None: + """ + Visualize a multilayer perceptron (MLP) architecture. + + Parameters: + - layer_sizes: A list of integers indicating the number of neurons per layer. + - show_bias: Whether to show bias nodes and their connections. + - activation: Activation function symbol to display between layers. + - figsize: Size of the matplotlib figure. + - neuron_radius: Radius of each neuron circle. + - h_spacing: Horizontal spacing between layers. + - v_spacing: Vertical spacing between neurons in a layer. + - input_color: Fill color for neuron circles. + - edge_color: Edge color for neuron circles. + - hidden_text_color: Font color for hidden neurons. + - conn_color: Color of the lines connecting neurons. + - conn_alpha: Transparency of connection lines. + - weight_color: Color of the weight text. + - weight_fontsize: Font size for weight labels. + - weight_box_color: Background color for weight label boxes. + - bias_color: Color of bias nodes. + - bias_edge_color: Edge color of bias nodes. + - bias_line_color: Color of dashed lines from bias to neuron. + - bias_box_alpha: Transparency of bias label box. + - activation_text_color: Color of activation function label.""" + fig, ax = plt.subplots(figsize=figsize) + ax.axis('off') + n_layers = len(layer_sizes) + + # Calculate vertical positions for neurons in each layer + y_offset = [] + for n in layer_sizes: + total_h = v_spacing * (n - 1) + y_offset.append([i * v_spacing - total_h / 2 for i in range(n)]) + + # Draw neurons + for i, layer in enumerate(layer_sizes): + for j in range(layer): + circle = plt.Circle((i * h_spacing, y_offset[i][j]), neuron_radius, + color=input_color, ec=edge_color, lw=2.2, zorder=3, alpha=0.97) + ax.add_patch(circle) + # Add labels + if i == 0: + ax.text(i * h_spacing - 0.5, y_offset[i][j], f"$x_{{{j+1}}}$", fontsize=16, + va='center', ha='right') + elif i == n_layers - 1: + ax.text(i * h_spacing + 0.5, y_offset[i][j], f"$y_{{{j+1}}}$", fontsize=16, + va='center', ha='left', fontweight='bold') + else: + ax.text(i * h_spacing, y_offset[i][j], f"$h_{{{i},{j+1}}}$", fontsize=16, + ha='center', va='center', color=hidden_text_color, fontweight='bold') + + # Draw connections and weights + for i in range(n_layers - 1): + for j, y1 in enumerate(y_offset[i]): + for k, y2 in enumerate(y_offset[i + 1]): + line = plt.Line2D([i * h_spacing, (i + 1) * h_spacing], [y1, y2], + color=conn_color, lw=1, alpha=conn_alpha, zorder=1) + ax.add_line(line) + # Add weight label + x_mid = (i * h_spacing + (i + 1) * h_spacing) / 2 + y_mid = (y1 + y2) / 2 + ax.text(x_mid, y_mid + 0.18, + f"$w^{{({i+1})}}_{{{k+1},{j+1}}}$", + fontsize=weight_fontsize, color=weight_color, alpha=0.95, + ha='center', va='bottom', + bbox=dict(boxstyle="round,pad=0.12", fc=weight_box_color, + ec='none', alpha=bias_box_alpha)) + + # Activation function label + if i < n_layers - 2: + ax.text((i + 0.5) * h_spacing, max(y_offset[i + 1]) + 0.7, + f"Activation: ${activation}$", fontsize=13, + ha='center', color=activation_text_color, alpha=0.7) + + # Bias nodes + if show_bias: + bias_y = max(y_offset[i + 1]) + 0.75 + ax.scatter((i + 1) * h_spacing, bias_y, s=200, marker='s', + color=bias_color, edgecolors=bias_edge_color, zorder=4) + ax.text((i + 1) * h_spacing + 0.3, bias_y, + f"$b^{{({i+1})}}$", fontsize=13, color=bias_edge_color, va='center') + for y2 in y_offset[i + 1]: + ax.plot([(i + 1) * h_spacing, (i + 1) * h_spacing], + [bias_y, y2 - 0.1], + color=bias_line_color, lw=1.2, ls='dashed', alpha=0.7, zorder=1) + + # Layer labels + ax.text(-0.1, max(y_offset[0]) + 1.1, "Input\nLayer", ha='center', + fontsize=15, fontweight='bold', color='#222') + for i in range(1, n_layers - 1): + ax.text(i * h_spacing, max(y_offset[i]) + 1.1, + f"Hidden\nLayer {i}", ha='center', fontsize=15, + color='#0084e6', fontweight='bold') + ax.text((n_layers - 1) * h_spacing, max(y_offset[-1]) + 1.1, + "Output\nLayer", ha='center', fontsize=15, + color='#222', fontweight='bold') + + # Set axis limits + ax.set_xlim(-1.5, n_layers * h_spacing) + ax.set_ylim(-max(layer_sizes) * v_spacing / 1.5 - 1, + max(layer_sizes) * v_spacing / 1.5 + 2) + + plt.tight_layout() + plt.show() \ No newline at end of file diff --git a/modelviz/regression.py b/modelviz/regression.py new file mode 100644 index 0000000..6f1fc63 --- /dev/null +++ b/modelviz/regression.py @@ -0,0 +1,100 @@ +import numpy as np +import matplotlib.pyplot as plt +import scipy.stats as stats + +def regression_diagnostics_panel(y_test, + y_pred, + figsize=(18, 5), + font_size=12, + hist_bins=30, + hist_alpha=0.7, + hist_color='grey', + hist_edgecolor='black', + vline_color='black', + vline_style='--', + vline_width=1, + scatter_alpha=0.5, + scatter_color='grey', + line_color='black', + line_style='--', + line_width=1, + qq_line_color='red', + qq_point_color='blue', + qq_point_size=20, + qq_line_style='-', + show_grid=True): + """ + Creates a 3-panel diagnostic plot: + - Histogram of residuals + - Actual vs. Predicted + - Q-Q plot of residuals + + Parameters: + - y_test, y_pred: true and predicted values + - figsize: tuple of figure size + - font_size: int for axis and title labels + - hist_bins: number of bins in histogram + - hist_alpha: alpha for histogram bars + - hist_color: fill color for histogram + - hist_edgecolor: edge color for histogram bars + - vline_color, vline_style, vline_width: vertical line over histogram at 0 + - scatter_alpha, scatter_color: actual vs. predicted plot + - line_color, line_style, line_width: y=x reference line in actual vs. predicted + - qq_line_color, qq_point_color, qq_point_size, qq_line_style: Q-Q plot styling + - show_grid: bool for showing grid on all subplots + + Returns: + - None: displays the plots + Example: + >>> from sklearn.model_selection import train_test_split + >>> from sklearn.linear_model import LinearRegression + >>> from sklearn.datasets import make_regression + >>> X, y = make_regression(n_samples=100, n_features=1, noise=0.1) + >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + >>> model = LinearRegression() + >>> model.fit(X_train, y_train) + >>> y_pred = model.predict(X_test) + >>> regression_diagnostics_panel(y_test, y_pred) + >>> # This will display the diagnostic plots for the regression model. + >>> # Note: Ensure that you have matplotlib and scipy installed in your environment. + >>> # You can customize the appearance of the plots using the parameters. + """ + + + residuals = y_test - y_pred + fig, axes = plt.subplots(1, 3, figsize=figsize) + + # 1. Distribution of Residuals + axes[0].hist(residuals, bins=hist_bins, alpha=hist_alpha, + color=hist_color, edgecolor=hist_edgecolor) + axes[0].axvline(x=0, color=vline_color, linestyle=vline_style, lw=vline_width) + axes[0].set_title('Distribution of Residuals', fontsize=font_size) + axes[0].set_xlabel('Residuals', fontsize=font_size) + axes[0].set_ylabel('Frequency', fontsize=font_size) + if show_grid: + axes[0].grid(True) + + # 2. Actual vs. Predicted + axes[1].scatter(y_test, y_pred, alpha=scatter_alpha, color=scatter_color) + axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], + color=line_color, linestyle=line_style, lw=line_width) + axes[1].set_title('Actual vs. Predicted', fontsize=font_size) + axes[1].set_xlabel('Actual', fontsize=font_size) + axes[1].set_ylabel('Predicted', fontsize=font_size) + if show_grid: + axes[1].grid(True) + + # 3. Q-Q Plot + (osm, osr), (slope, intercept, r) = stats.probplot(residuals, dist="norm") + axes[2].plot(osm, slope * np.array(osm) + intercept, + qq_line_style, color=qq_line_color, label='Q-Q Line') + axes[2].scatter(osm, osr, color=qq_point_color, s=qq_point_size, alpha=0.6, label='Residuals') + axes[2].set_title('Q-Q Plot of Residuals', fontsize=font_size) + axes[2].set_xlabel('Theoretical Quantiles', fontsize=font_size) + axes[2].set_ylabel('Ordered Values', fontsize=font_size) + axes[2].legend() + if show_grid: + axes[2].grid(True, linestyle='--', alpha=0.5) + + plt.tight_layout() + plt.show() \ No newline at end of file diff --git a/tests/test_deep_learning.py b/tests/test_deep_learning.py new file mode 100644 index 0000000..9eed1d7 --- /dev/null +++ b/tests/test_deep_learning.py @@ -0,0 +1,40 @@ +import pytest +from matplotlib import pyplot as plt +from modelviz.deep_learning import draw_mlp + +@pytest.fixture(autouse=True) +def no_show(monkeypatch): + monkeypatch.setattr(plt, 'show', lambda: None) + +def test_basic_network_runs(): + """Check a simple 3-layer MLP runs without error.""" + draw_mlp(layer_sizes=[3, 4, 2]) + +def test_single_layer_input_output(): + """Check a network with input and output only (no hidden layers).""" + draw_mlp(layer_sizes=[5, 1]) + +def test_deep_network(): + """Check a deeper MLP with several hidden layers.""" + draw_mlp(layer_sizes=[3, 5, 4, 3, 2, 1]) + +def test_with_bias_disabled(): + """Ensure function runs with bias disabled.""" + draw_mlp(layer_sizes=[4, 4, 2], show_bias=False) + +def test_custom_styling_runs(): + """Test that passing various styling options doesn't break the function.""" + draw_mlp( + layer_sizes=[2, 3, 1], + activation='ReLU', + neuron_radius=0.3, + input_color='#ffeecc', + edge_color='#333333', + conn_color='gray', + weight_color='blue', + weight_fontsize=8, + weight_box_color='#eeeeee', + bias_color='red', + bias_edge_color='darkred', + activation_text_color='purple' + ) diff --git a/tests/test_regression.py b/tests/test_regression.py new file mode 100644 index 0000000..067c9c5 --- /dev/null +++ b/tests/test_regression.py @@ -0,0 +1,44 @@ +import pytest +import numpy as np +from modelviz.regression import regression_diagnostics_panel + +# Generate fake regression data +@pytest.fixture +def sample_data(): + np.random.seed(42) + y_true = np.random.normal(loc=100, scale=10, size=100) + noise = np.random.normal(loc=0, scale=5, size=100) + y_pred = y_true + noise + return y_true, y_pred + +def test_regression_diagnostics_runs(sample_data): + y_true, y_pred = sample_data + # Should run without error + regression_diagnostics_panel(y_true, y_pred) + +def test_custom_plot_args(sample_data): + y_true, y_pred = sample_data + # Try changing style params + regression_diagnostics_panel( + y_test=y_true, + y_pred=y_pred, + font_size=10, + figsize=(12, 4), + hist_bins=20, + hist_color='skyblue', + scatter_color='black', + qq_point_color='green', + show_grid=False + ) + +def test_handles_different_input_shapes(): + # Column vector inputs + y_true = np.array([[10], [20], [30]]) + y_pred = np.array([[12], [19], [29]]) + regression_diagnostics_panel(y_true.flatten(), y_pred.flatten()) + +def test_fails_with_mismatched_shapes(): + y_true = np.array([10, 20, 30]) + y_pred = np.array([12, 19]) # wrong shape + with pytest.raises(ValueError): + regression_diagnostics_panel(y_true, y_pred)