Complaints Analysis Dashboard
+Santander UK Customer Complaints - Live Data Analysis
+diff --git a/dashboard/.env.example b/dashboard/.env.example new file mode 100644 index 00000000..6d082e41 --- /dev/null +++ b/dashboard/.env.example @@ -0,0 +1,4 @@ +# Supabase Configuration +# Get these values from your Supabase project settings +SUPABASE_URL=https://your-project.supabase.co +SUPABASE_KEY=your-anon-key diff --git a/dashboard/.gitkeep b/dashboard/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/dashboard/README.md b/dashboard/README.md new file mode 100644 index 00000000..e68c9426 --- /dev/null +++ b/dashboard/README.md @@ -0,0 +1,56 @@ +# Complaints Analysis Dashboard + +An interactive Streamlit dashboard for analyzing Santander UK customer complaints data, connected to Supabase. + +## Features + +- **Live Data Connection**: Connects directly to Supabase with 5-minute caching +- **Interactive Filters**: Filter by category, severity, status, date range, and customer segment +- **Dynamic Statistics**: KPI cards that update based on applied filters +- **Interactive Charts**: + - Time series with daily/weekly/monthly toggle + - Complaints by category bar chart + - Severity distribution donut chart + - Resolution time distribution +- **Outlier Highlights**: Identifies extended resolution times, high compensation, and same-day resolutions +- **Repeat Complainers Analysis**: Customers with 3+ complaints and their patterns + +## Setup + +1. Install dependencies: + ```bash + pip install -r requirements.txt + ``` + +2. Configure environment variables: + ```bash + cp .env.example .env + # Edit .env with your Supabase credentials + ``` + +3. Run the dashboard: + ```bash + streamlit run app.py + ``` + +## Environment Variables + +| Variable | Description | +|----------|-------------| +| `SUPABASE_URL` | Your Supabase project URL | +| `SUPABASE_KEY` | Your Supabase anon/public key | + +## Data Requirements + +The dashboard expects a `santander_customer_complaints` table in Supabase with the following columns: + +- `complaint_id` - Unique complaint identifier +- `customer_id` - Customer identifier +- `category` - Complaint category +- `severity` - Low, Medium, High, or Critical +- `status` - Complaint status +- `complaint_date` - Date of complaint +- `resolution_date` - Date of resolution +- `resolution_days` - Days to resolve +- `compensation_amount` - Compensation paid (e.g., "£100.00") +- `customer_segment` - Customer segment classification diff --git a/dashboard/app.py b/dashboard/app.py new file mode 100644 index 00000000..7b9cdc7a --- /dev/null +++ b/dashboard/app.py @@ -0,0 +1,695 @@ +""" +Santander UK Complaints Analysis Dashboard + +An interactive Streamlit dashboard connected to Supabase for analyzing +customer complaints data with filtering, dynamic statistics, and charts. +""" + +import os +from datetime import datetime, timedelta +from typing import Optional + +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import streamlit as st +from supabase import create_client, Client + + +# Page configuration +st.set_page_config( + page_title="Complaints Analysis Dashboard", + page_icon="📊", + layout="wide", + initial_sidebar_state="expanded" +) + +# Custom CSS for Santander branding +st.markdown(""" + +""", unsafe_allow_html=True) + + +def get_supabase_client() -> Client: + """Initialize and return Supabase client. + + Returns: + Supabase client instance. + + Raises: + ValueError: If Supabase credentials are not configured. + """ + url = os.environ.get("SUPABASE_URL") + key = os.environ.get("SUPABASE_KEY") + + if not url or not key: + st.error("Supabase credentials not configured. Please set SUPABASE_URL and SUPABASE_KEY.") + st.stop() + + return create_client(url, key) + + +@st.cache_data(ttl=300) +def load_complaints_data() -> pd.DataFrame: + """Load complaints data from Supabase with caching. + + Data is cached for 5 minutes (300 seconds) to reduce API calls. + + Returns: + DataFrame containing all complaints data. + """ + try: + supabase = get_supabase_client() + response = supabase.table("santander_customer_complaints").select("*").execute() + df = pd.DataFrame(response.data) + + # Convert date columns + df['complaint_date'] = pd.to_datetime(df['complaint_date']) + df['resolution_date'] = pd.to_datetime(df['resolution_date'], errors='coerce') + + # Convert compensation to numeric + df['compensation_numeric'] = df['compensation_amount'].str.replace('£', '').astype(float) + + return df + except Exception as e: + st.error(f"Error loading data from Supabase: {str(e)}") + st.stop() + + +def apply_filters( + df: pd.DataFrame, + categories: list[str], + severities: list[str], + statuses: list[str], + segments: list[str], + date_range: tuple[datetime, datetime] +) -> pd.DataFrame: + """Apply filters to the complaints DataFrame. + + Args: + df: Original DataFrame. + categories: List of selected categories. + severities: List of selected severities. + statuses: List of selected statuses. + segments: List of selected customer segments. + date_range: Tuple of (start_date, end_date). + + Returns: + Filtered DataFrame. + """ + filtered_df = df.copy() + + if categories: + filtered_df = filtered_df[filtered_df['category'].isin(categories)] + + if severities: + filtered_df = filtered_df[filtered_df['severity'].isin(severities)] + + if statuses: + filtered_df = filtered_df[filtered_df['status'].isin(statuses)] + + if segments: + filtered_df = filtered_df[filtered_df['customer_segment'].isin(segments)] + + if date_range[0] and date_range[1]: + start_date = pd.to_datetime(date_range[0]) + end_date = pd.to_datetime(date_range[1]) + filtered_df = filtered_df[ + (filtered_df['complaint_date'] >= start_date) & + (filtered_df['complaint_date'] <= end_date) + ] + + return filtered_df + + +def calculate_statistics(df: pd.DataFrame) -> dict: + """Calculate summary statistics for the filtered data. + + Args: + df: Filtered DataFrame. + + Returns: + Dictionary containing calculated statistics. + """ + if df.empty: + return { + 'total_complaints': 0, + 'avg_resolution_days': 0, + 'repeat_complainers': 0, + 'pct_with_compensation': 0, + 'total_compensation': 0, + 'median_resolution_days': 0, + 'max_resolution_days': 0, + 'avg_compensation': 0 + } + + repeat_complainers = df.groupby('customer_id').size() + repeat_count = (repeat_complainers >= 3).sum() + + complaints_with_comp = (df['compensation_numeric'] > 0).sum() + pct_with_comp = (complaints_with_comp / len(df) * 100) if len(df) > 0 else 0 + + return { + 'total_complaints': len(df), + 'avg_resolution_days': df['resolution_days'].mean(), + 'repeat_complainers': repeat_count, + 'pct_with_compensation': pct_with_comp, + 'total_compensation': df['compensation_numeric'].sum(), + 'median_resolution_days': df['resolution_days'].median(), + 'max_resolution_days': df['resolution_days'].max(), + 'avg_compensation': df['compensation_numeric'].mean() + } + + +def create_time_series_chart(df: pd.DataFrame, aggregation: str) -> go.Figure: + """Create time series chart with configurable aggregation. + + Args: + df: Filtered DataFrame. + aggregation: One of 'Daily', 'Weekly', or 'Monthly'. + + Returns: + Plotly figure object. + """ + if df.empty: + fig = go.Figure() + fig.add_annotation(text="No data available", xref="paper", yref="paper", + x=0.5, y=0.5, showarrow=False) + return fig + + df_copy = df.copy() + + if aggregation == 'Daily': + df_copy['period'] = df_copy['complaint_date'].dt.date + elif aggregation == 'Weekly': + df_copy['period'] = df_copy['complaint_date'].dt.to_period('W').apply(lambda x: x.start_time) + else: # Monthly + df_copy['period'] = df_copy['complaint_date'].dt.to_period('M').apply(lambda x: x.start_time) + + time_series = df_copy.groupby('period').size().reset_index(name='count') + time_series['period'] = pd.to_datetime(time_series['period']) + + fig = px.line( + time_series, + x='period', + y='count', + title=f'Complaints Over Time ({aggregation})', + labels={'period': 'Date', 'count': 'Number of Complaints'} + ) + + fig.update_traces( + line_color='#ec0000', + fill='tozeroy', + fillcolor='rgba(236, 0, 0, 0.1)' + ) + + fig.update_layout( + hovermode='x unified', + plot_bgcolor='white', + paper_bgcolor='white', + xaxis=dict(showgrid=False), + yaxis=dict(showgrid=True, gridcolor='rgba(0,0,0,0.1)') + ) + + return fig + + +def create_category_chart(df: pd.DataFrame) -> go.Figure: + """Create bar chart showing complaints by category. + + Args: + df: Filtered DataFrame. + + Returns: + Plotly figure object. + """ + if df.empty: + fig = go.Figure() + fig.add_annotation(text="No data available", xref="paper", yref="paper", + x=0.5, y=0.5, showarrow=False) + return fig + + category_counts = df['category'].value_counts().reset_index() + category_counts.columns = ['category', 'count'] + + fig = px.bar( + category_counts, + x='category', + y='count', + title='Complaints by Category', + labels={'category': 'Category', 'count': 'Number of Complaints'}, + color='count', + color_continuous_scale=['#ffcccc', '#ec0000'] + ) + + fig.update_layout( + plot_bgcolor='white', + paper_bgcolor='white', + xaxis=dict(showgrid=False, tickangle=45), + yaxis=dict(showgrid=True, gridcolor='rgba(0,0,0,0.1)'), + showlegend=False, + coloraxis_showscale=False + ) + + return fig + + +def create_severity_chart(df: pd.DataFrame) -> go.Figure: + """Create donut chart showing complaints by severity. + + Args: + df: Filtered DataFrame. + + Returns: + Plotly figure object. + """ + if df.empty: + fig = go.Figure() + fig.add_annotation(text="No data available", xref="paper", yref="paper", + x=0.5, y=0.5, showarrow=False) + return fig + + severity_counts = df['severity'].value_counts().reset_index() + severity_counts.columns = ['severity', 'count'] + + colors = {'Low': '#059669', 'Medium': '#2563eb', 'High': '#d97706', 'Critical': '#dc2626'} + severity_counts['color'] = severity_counts['severity'].map(colors) + + fig = go.Figure(data=[go.Pie( + labels=severity_counts['severity'], + values=severity_counts['count'], + hole=0.5, + marker_colors=severity_counts['color'] + )]) + + fig.update_layout( + title='Complaints by Severity', + plot_bgcolor='white', + paper_bgcolor='white' + ) + + return fig + + +def create_resolution_distribution_chart(df: pd.DataFrame) -> go.Figure: + """Create bar chart showing resolution time distribution. + + Args: + df: Filtered DataFrame. + + Returns: + Plotly figure object. + """ + if df.empty: + fig = go.Figure() + fig.add_annotation(text="No data available", xref="paper", yref="paper", + x=0.5, y=0.5, showarrow=False) + return fig + + def categorize_resolution(days: int) -> str: + if days == 0: + return 'Same day' + elif days <= 7: + return '1-7 days' + elif days <= 14: + return '8-14 days' + elif days <= 30: + return '15-30 days' + elif days <= 60: + return '31-60 days' + else: + return '60+ days' + + df_copy = df.copy() + df_copy['resolution_bucket'] = df_copy['resolution_days'].apply(categorize_resolution) + + bucket_order = ['Same day', '1-7 days', '8-14 days', '15-30 days', '31-60 days', '60+ days'] + bucket_counts = df_copy['resolution_bucket'].value_counts().reindex(bucket_order, fill_value=0) + + colors = ['#059669', '#10b981', '#34d399', '#fbbf24', '#f97316', '#dc2626'] + + fig = go.Figure(data=[go.Bar( + x=bucket_counts.index, + y=bucket_counts.values, + marker_color=colors + )]) + + fig.update_layout( + title='Resolution Time Distribution', + xaxis_title='Resolution Time', + yaxis_title='Number of Complaints', + plot_bgcolor='white', + paper_bgcolor='white', + xaxis=dict(showgrid=False), + yaxis=dict(showgrid=True, gridcolor='rgba(0,0,0,0.1)') + ) + + return fig + + +def display_outliers(df: pd.DataFrame) -> None: + """Display outlier highlights section. + + Args: + df: Filtered DataFrame. + """ + st.subheader("Outlier Highlights") + + col1, col2, col3 = st.columns(3) + + with col1: + extended_resolution = df[df['resolution_days'] > 60] + st.markdown(f""" +
{len(extended_resolution)} cases
+Complaints taking longer than 60 days to resolve
+{len(high_compensation)} cases
+Complaints with compensation exceeding £300
+{len(same_day)} cases
+May indicate quick fixes or data quality issues
+Santander UK Customer Complaints - Live Data Analysis
+" + f"Generated on {datetime.now().strftime('%B %d, %Y')} | " + f"Data Source: Supabase - santander_customer_complaints | " + f"Last refresh: {datetime.now().strftime('%H:%M:%S')}" + f"
", + unsafe_allow_html=True + ) + + +if __name__ == "__main__": + main() diff --git a/dashboard/requirements.txt b/dashboard/requirements.txt new file mode 100644 index 00000000..e98b62a4 --- /dev/null +++ b/dashboard/requirements.txt @@ -0,0 +1,5 @@ +streamlit>=1.28.0 +supabase>=2.0.0 +pandas>=2.0.0 +plotly>=5.18.0 +python-dotenv>=1.0.0