From e6d49b13a3908a03a0da08eee535cfe841f03658 Mon Sep 17 00:00:00 2001
From: Dev-Jeff28 <71960243+Dev-Jeff28@users.noreply.github.com>
Date: Sat, 25 Oct 2025 09:22:51 +0530
Subject: [PATCH 1/5] Create .gitkeep

---
 Data_Science/streamlit_ml_app/.gitkeep | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 Data_Science/streamlit_ml_app/.gitkeep

diff --git a/Data_Science/streamlit_ml_app/.gitkeep b/Data_Science/streamlit_ml_app/.gitkeep
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/Data_Science/streamlit_ml_app/.gitkeep
@@ -0,0 +1 @@
+

From 1bb6c4978c682b3d5b695c081487a7bf0c7621b8 Mon Sep 17 00:00:00 2001
From: Dev-Jeff28 <71960243+Dev-Jeff28@users.noreply.github.com>
Date: Sat, 25 Oct 2025 09:23:39 +0530
Subject: [PATCH 2/5] Add files via upload

---
 Data_Science/streamlit_ml_app/Readme.md       |  26 ++++
 .../streamlit_ml_app/requirements.txt         |   7 +
 .../streamlit_ml_app/streamlit_app.py         | 140 ++++++++++++++++++
 3 files changed, 173 insertions(+)
 create mode 100644 Data_Science/streamlit_ml_app/Readme.md
 create mode 100644 Data_Science/streamlit_ml_app/requirements.txt
 create mode 100644 Data_Science/streamlit_ml_app/streamlit_app.py

diff --git a/Data_Science/streamlit_ml_app/Readme.md b/Data_Science/streamlit_ml_app/Readme.md
new file mode 100644
index 0000000..0194ffe
--- /dev/null
+++ b/Data_Science/streamlit_ml_app/Readme.md
@@ -0,0 +1,26 @@
+R# 🧠 Interactive Streamlit ML App
+
+A small Streamlit web application that allows users to **upload a CSV file**, **choose a machine learning model**, and **view model performance metrics interactively**.
+
+This fulfills the GitHub issue:
+
+> **"Build a small Streamlit app in `/Data_Science/` that allows uploading a CSV, choosing a model, and viewing metrics interactively."**
+
+---
+
+## 🚀 Features
+
+- 📂 Upload any CSV dataset
+- 🎯 Select your **target (label) column**
+- ⚙️ Choose from built-in ML models (Logistic Regression, Decision Tree, Random Forest, etc.)
+- 📊 View metrics such as Accuracy, Precision, Recall, F1-Score, and Confusion Matrix
+- 🧩 Adjustable train-test split and random seed
+- 🧰 Works for both **classification** and **regression**
+
+---
+
+## 🧰 Installation & Requirements
+pip install -r requirements.txt
+
+▶️ Running the App
+streamlit run streamlit_app.py
diff --git a/Data_Science/streamlit_ml_app/requirements.txt b/Data_Science/streamlit_ml_app/requirements.txt
new file mode 100644
index 0000000..f12f2d8
--- /dev/null
+++ b/Data_Science/streamlit_ml_app/requirements.txt
@@ -0,0 +1,7 @@
+streamlit>=1.20
+pandas
+scikit-learn
+matplotlib
+seaborn
+numpy
+plotly
diff --git a/Data_Science/streamlit_ml_app/streamlit_app.py b/Data_Science/streamlit_ml_app/streamlit_app.py
new file mode 100644
index 0000000..607d221
--- /dev/null
+++ b/Data_Science/streamlit_ml_app/streamlit_app.py
@@ -0,0 +1,140 @@
+# data_science/streamlit_app.py
+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import Pipeline
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.dummy import DummyClassifier
+from sklearn.metrics import (
+    accuracy_score, precision_score, recall_score, f1_score,
+    confusion_matrix, classification_report, roc_auc_score, roc_curve
+)
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+st.set_page_config(page_title="Data Science Demo", layout="wide")
+
+st.title("Small Streamlit Data Science App")
+st.markdown("Upload a CSV, pick the target column, choose a model, and view metrics.")
+
+uploaded = st.file_uploader("Upload a CSV file", type=["csv"])
+if uploaded is None:
+    st.info("Upload a CSV to get started. Example: a classification dataset with a target column.")
+    st.stop()
+
+# read csv
+df = pd.read_csv(uploaded)
+st.write("### Preview of uploaded data", df.head())
+
+# choose target
+all_columns = df.columns.tolist()
+target = st.selectbox("Select target column (label)", options=all_columns)
+
+# simple features selection: drop non-numeric by default but allow user to choose
+st.write("Select feature columns (default: numeric columns excluding target)")
+numeric = df.select_dtypes(include=[np.number]).columns.tolist()
+default_features = [c for c in numeric if c != target]
+features = st.multiselect("Features", options=all_columns, default=default_features)
+
+if len(features) == 0:
+    st.error("Please select at least one feature column.")
+    st.stop()
+
+# task type detection (very naive)
+unique_vals = df[target].nunique()
+task_type = "classification" if unique_vals <= 20 else "regression (not implemented)"
+st.write(f"Detected: **{task_type}** (unique labels: {unique_vals})")
+
+if task_type != "classification":
+    st.warning("This demo only supports classification. Choose a categorical/binary target.")
+    st.stop()
+
+# train/test split params
+test_size = st.sidebar.slider("Test size (%)", min_value=10, max_value=50, value=25) / 100.0
+random_state = st.sidebar.number_input("Random state", min_value=0, max_value=9999, value=42)
+
+# model selection
+model_name = st.selectbox("Choose model", ["Logistic Regression", "Random Forest", "Baseline Dummy"])
+if model_name == "Logistic Regression":
+    model = LogisticRegression(max_iter=1000)
+elif model_name == "Random Forest":
+    model = RandomForestClassifier(n_estimators=100, random_state=random_state)
+else:
+    model = DummyClassifier(strategy="most_frequent")
+
+# prepare data
+X = df[features].copy()
+y = df[target].copy()
+
+# basic imputing and scaling pipeline
+pipeline = Pipeline([
+    ("imputer", SimpleImputer(strategy="mean")),
+    ("scaler", StandardScaler()),
+    ("clf", model)
+])
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)
+
+with st.spinner("Training model..."):
+    pipeline.fit(X_train, y_train)
+
+y_pred = pipeline.predict(X_test)
+metrics = {
+    "accuracy": accuracy_score(y_test, y_pred),
+    "precision_macro": precision_score(y_test, y_pred, average="macro", zero_division=0),
+    "recall_macro": recall_score(y_test, y_pred, average="macro", zero_division=0),
+    "f1_macro": f1_score(y_test, y_pred, average="macro", zero_division=0)
+}
+
+st.subheader("Metrics")
+col1, col2, col3, col4 = st.columns(4)
+col1.metric("Accuracy", f"{metrics['accuracy']:.4f}")
+col2.metric("Precision (macro)", f"{metrics['precision_macro']:.4f}")
+col3.metric("Recall (macro)", f"{metrics['recall_macro']:.4f}")
+col4.metric("F1 (macro)", f"{metrics['f1_macro']:.4f}")
+
+st.subheader("Classification report")
+st.text(classification_report(y_test, y_pred, zero_division=0))
+
+st.subheader("Confusion matrix")
+cm = confusion_matrix(y_test, y_pred)
+fig, ax = plt.subplots()
+sns.heatmap(cm, annot=True, fmt="d", ax=ax)
+ax.set_xlabel("Predicted")
+ax.set_ylabel("Actual")
+st.pyplot(fig)
+
+# ROC AUC for binary problems
+if len(np.unique(y_test)) == 2:
+    try:
+        y_score = pipeline.predict_proba(X_test)[:, 1]
+        auc = roc_auc_score(y_test, y_score)
+        st.write(f"ROC AUC: **{auc:.4f}**")
+        fpr, tpr, _ = roc_curve(y_test, y_score)
+        fig2, ax2 = plt.subplots()
+        ax2.plot(fpr, tpr)
+        ax2.plot([0,1],[0,1],"--")
+        ax2.set_xlabel("FPR")
+        ax2.set_ylabel("TPR")
+        ax2.set_title("ROC curve")
+        st.pyplot(fig2)
+    except Exception as e:
+        st.info("Model does not provide probability predictions to compute ROC AUC.")
+
+# feature importance (if model supports it)
+st.subheader("Feature importances (if available)")
+base_model = pipeline.named_steps["clf"]
+if hasattr(base_model, "feature_importances_"):
+    importances = base_model.feature_importances_
+    fi = pd.Series(importances, index=features).sort_values(ascending=False)
+    st.bar_chart(fi)
+elif hasattr(base_model, "coef_"):
+    coefs = np.abs(base_model.coef_).ravel()
+    fi = pd.Series(coefs, index=features).sort_values(ascending=False)
+    st.bar_chart(fi)
+else:
+    st.info("Selected model has no feature_importances_ or coef_.")

From 51fb603f1d6b68ea0752057c9141cc08e59ebaf9 Mon Sep 17 00:00:00 2001
From: Dev-Jeff28 <71960243+Dev-Jeff28@users.noreply.github.com>
Date: Sat, 25 Oct 2025 09:24:24 +0530
Subject: [PATCH 3/5] Update Readme.md

---
 Data_Science/streamlit_ml_app/Readme.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Data_Science/streamlit_ml_app/Readme.md b/Data_Science/streamlit_ml_app/Readme.md
index 0194ffe..ff7b146 100644
--- a/Data_Science/streamlit_ml_app/Readme.md
+++ b/Data_Science/streamlit_ml_app/Readme.md
@@ -1,4 +1,4 @@
-R# 🧠 Interactive Streamlit ML App
+## 🧠 Interactive Streamlit ML App
 
 A small Streamlit web application that allows users to **upload a CSV file**, **choose a machine learning model**, and **view model performance metrics interactively**.
 
@@ -24,3 +24,4 @@ pip install -r requirements.txt
 
 ▶️ Running the App
 streamlit run streamlit_app.py
+

From 14a9a39468302a82a97e20a23180ab39e78b5186 Mon Sep 17 00:00:00 2001
From: Dev-Jeff28 <71960243+Dev-Jeff28@users.noreply.github.com>
Date: Sat, 25 Oct 2025 09:25:06 +0530
Subject: [PATCH 4/5] Update Readme.md

---
 Data_Science/streamlit_ml_app/Readme.md | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/Data_Science/streamlit_ml_app/Readme.md b/Data_Science/streamlit_ml_app/Readme.md
index ff7b146..8326d56 100644
--- a/Data_Science/streamlit_ml_app/Readme.md
+++ b/Data_Science/streamlit_ml_app/Readme.md
@@ -2,11 +2,6 @@
 
 A small Streamlit web application that allows users to **upload a CSV file**, **choose a machine learning model**, and **view model performance metrics interactively**.
 
-This fulfills the GitHub issue:
-
-> **"Build a small Streamlit app in `/Data_Science/` that allows uploading a CSV, choosing a model, and viewing metrics interactively."**
-
----
 
 ## 🚀 Features
 
@@ -25,3 +20,4 @@ pip install -r requirements.txt
 ▶️ Running the App
 streamlit run streamlit_app.py
 
+

From 56e3697200bdc6f055edee1a11131b144178f013 Mon Sep 17 00:00:00 2001
From: Dev-Jeff28 <71960243+Dev-Jeff28@users.noreply.github.com>
Date: Sat, 25 Oct 2025 09:31:42 +0530
Subject: [PATCH 5/5] added a test.csv file for testing

---
 Data_Science/streamlit_ml_app/test.csv | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 Data_Science/streamlit_ml_app/test.csv

diff --git a/Data_Science/streamlit_ml_app/test.csv b/Data_Science/streamlit_ml_app/test.csv
new file mode 100644
index 0000000..a0da63d
--- /dev/null
+++ b/Data_Science/streamlit_ml_app/test.csv
@@ -0,0 +1,21 @@
+age,income,loan_approved
+25,40000,0
+35,60000,1
+45,80000,1
+30,50000,0
+50,90000,1
+28,42000,0
+42,75000,1
+39,65000,1
+33,48000,0
+55,100000,1
+26,41000,0
+31,53000,0
+48,85000,1
+29,46000,0
+41,70000,1
+38,62000,1
+52,95000,1
+34,56000,0
+47,88000,1
+40,72000,1