From 838743a358d2c8a85ad381dafb1913523de7787a Mon Sep 17 00:00:00 2001
From: Desmond <deskaygraphics@gmail.com>
Date: Fri, 18 Apr 2025 01:18:52 -0500
Subject: [PATCH 1/4] add ignore new

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 15201ac..88432a6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,7 +5,6 @@ __pycache__/
 
 # C extensions
 *.so
-
 # Distribution / packaging
 .Python
 build/
@@ -169,3 +168,4 @@ cython_debug/
 
 # PyPI configuration file
 .pypirc
+data/Extracted_values_4yearys.csv

From 25c41a6c2d5b70b4e685985ab06533bdfd52ed61 Mon Sep 17 00:00:00 2001
From: Desmond <deskaygraphics@gmail.com>
Date: Fri, 18 Apr 2025 01:19:11 -0500
Subject: [PATCH 2/4] add data

---
 .pre-commit-config.yaml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..84af8e5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,32 @@
+repos:
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v5.0.0
+      hooks:
+          - id: check-toml
+          - id: check-yaml
+          - id: end-of-file-fixer
+            types: [python]
+          - id: trailing-whitespace
+          - id: requirements-txt-fixer
+          - id: check-added-large-files
+            args: ["--maxkb=500"]
+
+    - repo: https://github.com/psf/black
+      rev: 25.1.0
+      hooks:
+          - id: black-jupyter
+
+    - repo: https://github.com/codespell-project/codespell
+      rev: v2.4.1
+      hooks:
+          - id: codespell
+            args:
+                [
+                    "--ignore-words-list=aci,acount,acounts,fallow,ges,hart,hist,nd,ned,ois,wqs,watermask,tre,mape",
+                    "--skip=*.csv,*.geojson,*.json,*.yml*.js,*.html,*cff,*.pdf",
+                ]
+
+    - repo: https://github.com/kynan/nbstripout
+      rev: 0.8.1
+      hooks:
+          - id: nbstripout

From 915fe7f0ec8eb350444f9433de6854545ae649a8 Mon Sep 17 00:00:00 2001
From: Desmond <deskaygraphics@gmail.com>
Date: Sun, 20 Apr 2025 04:08:07 -0500
Subject: [PATCH 3/4] uodate code

---
 data/insar.ipynb | 502 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 502 insertions(+)
 create mode 100644 data/insar.ipynb

diff --git a/data/insar.ipynb b/data/insar.ipynb
new file mode 100644
index 0000000..9b051b2
--- /dev/null
+++ b/data/insar.ipynb
@@ -0,0 +1,502 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import geopandas as gpd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import matplotlib.dates as mdates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = pd.read_csv(\n",
+    "    \"/home/kangah/Desktop/GIS_programming/Geospatial/data/Extracted_values_4yearys.csv\"\n",
+    ")\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Real = data.iloc[:, 3:13]\n",
+    "Real"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "missing_percent = (Real.isnull().sum() / len(Real)) * 100\n",
+    "print(missing_percent)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Real_clean = Real.dropna()\n",
+    "Real_clean"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Real_clean.columns = ['Velocity', 'Top_Wetness_Index', 'Precipitation', 'LULC', 'DistanceFromFault', 'DistanceFromRoad', 'DistanceFromRiver','DEM' 'Geology', 'Aspect']\n",
+    "Real_clean.rename(\n",
+    "    columns={\n",
+    "        \"velocity\": \"Velocity\",\n",
+    "        \"TWI\": \"Top_Wetness_Index\",\n",
+    "        \"extract_prec1\": \"Precipitation\",\n",
+    "        \"extract_lulc1\": \"LULC\",\n",
+    "        \"eucdist_faul1\": \"DistanceFromFault\",\n",
+    "        \"distanceFromRoad\": \"DistanceFromRoad\",\n",
+    "        \"distanceFromriver\": \"DistanceFromRiver\",\n",
+    "        \"dem\": \"DEM\",\n",
+    "        \"Geology_CONUS_Clip_PolygonToRaster1\": \"Geology\",\n",
+    "        \"Aspect_DEM2\": \"Aspect\",\n",
+    "    },\n",
+    "    inplace=True,\n",
+    ")\n",
+    "Real_clean"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pearson_correlation_matrix = Real_clean.corr(method=\"pearson\")\n",
+    "\n",
+    "plt.figure(figsize=(12, 10))\n",
+    "sns.heatmap(\n",
+    "    pearson_correlation_matrix, annot=True, fmt=\".2f\", cmap=\"coolwarm\", cbar=True\n",
+    ")\n",
+    "plt.title(\"Pearson Correlation Matrix Heatmap\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import StandardScaler"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = Real_clean.drop(columns=[\"Velocity\"])\n",
+    "y = Real_clean[\"Velocity\"]\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=0.2, random_state=42\n",
+    ")\n",
+    "scaler = StandardScaler()\n",
+    "\n",
+    "## By Kangah (Surveyor, Civil and Geospatial Engineer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_train.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "11",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_test.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_test.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_train = scaler.fit_transform(X_train)\n",
+    "x_test = scaler.transform(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "14",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import RandomForestRegressor\n",
+    "from sklearn.metrics import mean_squared_error, r2_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "RS_model = RandomForestRegressor(n_estimators=100, random_state=42)\n",
+    "RS_model.fit(x_train, y_train)\n",
+    "y_pred = RS_model.predict(x_test)\n",
+    "mse = mean_squared_error(y_test, y_pred)\n",
+    "r2 = r2_score(y_test, y_pred)\n",
+    "print(f\"Mean Squared Error: {mse}\")\n",
+    "print(f\"R^2 Score: {r2}\")  # By Kangah"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Plotting the training and testing curves\n",
+    "plt.figure(figsize=(12, 6))\n",
+    "\n",
+    "# Training data\n",
+    "plt.scatter(\n",
+    "    y_train, RS_model.predict(x_train), color=\"blue\", alpha=0.5, label=\"Train Data\"\n",
+    ")\n",
+    "# Testing data\n",
+    "plt.scatter(y_test, y_pred, color=\"red\", alpha=0.5, label=\"Test Data\")\n",
+    "\n",
+    "# Plotting the ideal line\n",
+    "plt.plot([y.min(), y.max()], [y.min(), y.max()], \"k--\", lw=2, label=\"Ideal Fit\")\n",
+    "\n",
+    "# Labels and legend\n",
+    "plt.xlabel(\"True Values\")\n",
+    "plt.ylabel(\"Predicted Values\")\n",
+    "plt.title(\"Train and Test Curves\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "17",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Get feature importances from the model\n",
+    "importances = RS_model.feature_importances_\n",
+    "feature_names = X.columns\n",
+    "\n",
+    "# Create DataFrame\n",
+    "gini_df = pd.DataFrame(\n",
+    "    {\"Feature\": feature_names, \"Importance\": importances}\n",
+    ").sort_values(by=\"Importance\", ascending=True)\n",
+    "\n",
+    "# Plot\n",
+    "fig, ax = plt.subplots(figsize=(10, 6))\n",
+    "bars = ax.barh(\n",
+    "    gini_df[\"Feature\"], gini_df[\"Importance\"], color=\"coral\", alpha=0.8, height=0.4\n",
+    ")\n",
+    "\n",
+    "# Add central black dot\n",
+    "for i, imp in enumerate(gini_df[\"Importance\"]):\n",
+    "    ax.plot(imp, i, \"ko\")\n",
+    "\n",
+    "# Add a box showing the method\n",
+    "ax.text(\n",
+    "    0.95,\n",
+    "    0.05,\n",
+    "    \"■ Mean Decrease Gini\",\n",
+    "    transform=ax.transAxes,\n",
+    "    fontsize=12,\n",
+    "    verticalalignment=\"bottom\",\n",
+    "    horizontalalignment=\"right\",\n",
+    "    color=\"OrangeRed\",\n",
+    ")\n",
+    "\n",
+    "# Labels\n",
+    "ax.set_xlabel(\"Mean Decrease in Gini (Feature Importance)\", fontsize=12)\n",
+    "ax.set_ylabel(\"Land Susceptibility Influencing Factors\", fontsize=12)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.inspection import permutation_importance\n",
+    "\n",
+    "# Evaluate permutation importance\n",
+    "result = permutation_importance(\n",
+    "    RS_model, x_test, y_test, n_repeats=10, random_state=42, n_jobs=-1\n",
+    ")\n",
+    "\n",
+    "# Create DataFrame\n",
+    "perm_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"Feature\": X.columns,\n",
+    "        \"Importance\": result.importances_mean,\n",
+    "        \"Std\": result.importances_std,\n",
+    "    }\n",
+    ").sort_values(by=\"Importance\", ascending=True)\n",
+    "\n",
+    "# Create plot\n",
+    "fig, ax = plt.subplots(figsize=(10, 6))\n",
+    "\n",
+    "# Plot bars with error bars\n",
+    "ax.barh(\n",
+    "    perm_df[\"Feature\"],\n",
+    "    perm_df[\"Importance\"],\n",
+    "    xerr=perm_df[\"Std\"],\n",
+    "    alpha=0.7,\n",
+    "    height=0.4,\n",
+    "    color=\"coral\",\n",
+    ")\n",
+    "\n",
+    "# Add label with square bullet\n",
+    "ax.text(\n",
+    "    0.95,\n",
+    "    0.05,\n",
+    "    \"■ Mean Decrease Accuracy\",\n",
+    "    transform=ax.transAxes,\n",
+    "    fontsize=12,\n",
+    "    verticalalignment=\"bottom\",\n",
+    "    horizontalalignment=\"right\",\n",
+    "    color=\"OrangeRed\",\n",
+    ")\n",
+    "\n",
+    "# Labels\n",
+    "ax.set_xlabel(\"Mean Decrease in Accuracy (Permutation Importance)\")\n",
+    "ax.set_title(\"Permutation Feature Importance\")\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVR\n",
+    "\n",
+    "# Initialize the SVR model\n",
+    "svr_model = SVR(kernel=\"rbf\", C=1.0, epsilon=0.1)\n",
+    "\n",
+    "# Fit the model to the training data\n",
+    "svr_model.fit(x_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "20",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred_svr = svr_model.predict(x_test)\n",
+    "mse_svr = mean_squared_error(y_test, y_pred_svr)\n",
+    "r2_svr = r2_score(y_test, y_pred_svr)\n",
+    "print(f\"SVR Mean Squared Error: {mse_svr}\")\n",
+    "print(f\"SVR R^2 Score: {r2_svr}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from xgboost import XGBRegressor\n",
+    "\n",
+    "# Initialize the XGBoost Regressor\n",
+    "xgb_model = XGBRegressor(\n",
+    "    objective=\"reg:squarederror\", n_estimators=100, random_state=42\n",
+    ")\n",
+    "\n",
+    "# Fit the model to the training data\n",
+    "xgb_model.fit(x_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_pred_xgb = xgb_model.predict(x_test)\n",
+    "mse_xgb = mean_squared_error(y_test, y_pred_xgb)\n",
+    "r2_xgb = r2_score(y_test, y_pred_xgb)\n",
+    "print(f\"XGBoost Mean Squared Error: {mse_xgb}\")\n",
+    "print(f\"XGBoost R^2 Score: {r2_xgb}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import roc_curve, auc\n",
+    "from sklearn.preprocessing import Binarizer\n",
+    "\n",
+    "# Binarize the y_test and predictions\n",
+    "threshold = 0  # Define a threshold for binarization\n",
+    "binarizer = Binarizer(threshold=threshold)\n",
+    "\n",
+    "y_test_binary = binarizer.fit_transform(y_test.values.reshape(-1, 1)).ravel()\n",
+    "y_pred_binary = binarizer.transform(y_pred.reshape(-1, 1)).ravel()\n",
+    "y_pred_svr_binary = binarizer.transform(y_pred_svr.reshape(-1, 1)).ravel()\n",
+    "y_pred_xgb_binary = binarizer.transform(y_pred_xgb.reshape(-1, 1)).ravel()\n",
+    "\n",
+    "# Compute ROC curve and AUC for Random Forest\n",
+    "fpr_rf, tpr_rf, _ = roc_curve(y_test_binary, y_pred_binary)\n",
+    "roc_auc_rf = auc(fpr_rf, tpr_rf)\n",
+    "\n",
+    "# Compute ROC curve and AUC for SVR\n",
+    "fpr_svr, tpr_svr, _ = roc_curve(y_test_binary, y_pred_svr_binary)\n",
+    "roc_auc_svr = auc(fpr_svr, tpr_svr)\n",
+    "\n",
+    "# Compute ROC curve and AUC for XGBoost\n",
+    "fpr_xgb, tpr_xgb, _ = roc_curve(y_test_binary, y_pred_xgb_binary)\n",
+    "roc_auc_xgb = auc(fpr_xgb, tpr_xgb)\n",
+    "\n",
+    "# Plot the ROC curves\n",
+    "plt.figure(figsize=(10, 6))\n",
+    "plt.plot(\n",
+    "    fpr_rf, tpr_rf, color=\"blue\", lw=2, label=f\"Random Forest (AUC = {roc_auc_rf:.2f})\"\n",
+    ")\n",
+    "plt.plot(fpr_svr, tpr_svr, color=\"green\", lw=2, label=f\"SVR (AUC = {roc_auc_svr:.2f})\")\n",
+    "plt.plot(\n",
+    "    fpr_xgb, tpr_xgb, color=\"red\", lw=2, label=f\"XGBoost (AUC = {roc_auc_xgb:.2f})\"\n",
+    ")\n",
+    "\n",
+    "# Plot the diagonal line\n",
+    "plt.plot([0, 1], [0, 1], \"k--\", lw=2)\n",
+    "\n",
+    "# Labels and legend\n",
+    "plt.xlabel(\"1 - False Positive Rate(Specificity)\")\n",
+    "plt.ylabel(\"Sensitivity (True Positive Rate)\")\n",
+    "plt.title(\"Receiver Operating Characteristic (ROC) Curves\")\n",
+    "plt.legend(loc=\"lower right\")\n",
+    "plt.legend(loc=\"lower right\")\n",
+    "plt.grid()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import shap"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "25",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "explainer = shap.TreeExplainer(RS_model)\n",
+    "shap_values = explainer.shap_values(X_test)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "geo",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 2933ca837091c2678cd1a2af42835cf446b02d84 Mon Sep 17 00:00:00 2001
From: Desmond <deskaygraphics@gmail.com>
Date: Wed, 14 May 2025 12:52:58 -0500
Subject: [PATCH 4/4] add code new

---
 .gitignore                          |   4 +
 data/{insar.ipynb => insar_2.ipynb} | 277 +++++++++++-----------------
 2 files changed, 110 insertions(+), 171 deletions(-)
 rename data/{insar.ipynb => insar_2.ipynb} (59%)

diff --git a/.gitignore b/.gitignore
index 88432a6..1d45c3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,3 +169,7 @@ cython_debug/
 # PyPI configuration file
 .pypirc
 data/Extracted_values_4yearys.csv
+data/insar.ipynb
+data/final_results.csv
+data/data2.dbf-20250420T151602Z-001/data2.dbf
+data/insar.ipynb
diff --git a/data/insar.ipynb b/data/insar_2.ipynb
similarity index 59%
rename from data/insar.ipynb
rename to data/insar_2.ipynb
index 9b051b2..30c2a50 100644
--- a/data/insar.ipynb
+++ b/data/insar_2.ipynb
@@ -22,10 +22,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data = pd.read_csv(\n",
-    "    \"/home/kangah/Desktop/GIS_programming/Geospatial/data/Extracted_values_4yearys.csv\"\n",
+    "data2 = gpd.read_file(\n",
+    "    \"/home/kangah/Desktop/GIS_programming/Geospatial/data/data2.dbf-20250420T151602Z-001/data2.dbf\"\n",
     ")\n",
-    "data"
+    "data2"
    ]
   },
   {
@@ -35,7 +35,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "Real = data.iloc[:, 3:13]\n",
+    "Real = data2.iloc[:, 2:13]\n",
     "Real"
    ]
   },
@@ -68,23 +68,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Real_clean.columns = ['Velocity', 'Top_Wetness_Index', 'Precipitation', 'LULC', 'DistanceFromFault', 'DistanceFromRoad', 'DistanceFromRiver','DEM' 'Geology', 'Aspect']\n",
-    "Real_clean.rename(\n",
-    "    columns={\n",
-    "        \"velocity\": \"Velocity\",\n",
-    "        \"TWI\": \"Top_Wetness_Index\",\n",
-    "        \"extract_prec1\": \"Precipitation\",\n",
-    "        \"extract_lulc1\": \"LULC\",\n",
-    "        \"eucdist_faul1\": \"DistanceFromFault\",\n",
-    "        \"distanceFromRoad\": \"DistanceFromRoad\",\n",
-    "        \"distanceFromriver\": \"DistanceFromRiver\",\n",
-    "        \"dem\": \"DEM\",\n",
-    "        \"Geology_CONUS_Clip_PolygonToRaster1\": \"Geology\",\n",
-    "        \"Aspect_DEM2\": \"Aspect\",\n",
-    "    },\n",
-    "    inplace=True,\n",
-    ")\n",
-    "Real_clean"
+    "# # Real_clean.columns = ['Velocity', 'Top_Wetness_Index', 'Precipitation', 'LULC', 'DistanceFromFault', 'DistanceFromRoad', 'DistanceFromRiver','DEM' 'Geology', 'Aspect']\n",
+    "# Real_clean.rename(\n",
+    "#     columns={\n",
+    "#         \"velocity\": \"Velocity\",\n",
+    "#         \"TWI\": \"Top_Wetness_Index\",\n",
+    "#         \"extract_prec1\": \"Precipitation\",\n",
+    "#         \"extract_lulc1\": \"LULC\",\n",
+    "#         \"eucdist_faul1\": \"DistanceFromFault\",\n",
+    "#         \"distanceFromRoad\": \"DistanceFromRoad\",\n",
+    "#         \"distanceFromriver\": \"DistanceFromRiver\",\n",
+    "#         \"dem\": \"DEM\",\n",
+    "#         \"Geology_CONUS_Clip_PolygonToRaster1\": \"Geology\",\n",
+    "#         \"Aspect_DEM2\": \"Aspect\",\n",
+    "#     },\n",
+    "#     inplace=True,\n",
+    "# )\n",
+    "# Real_clean"
    ]
   },
   {
@@ -122,8 +122,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X = Real_clean.drop(columns=[\"Velocity\"])\n",
-    "y = Real_clean[\"Velocity\"]\n",
+    "X = Real_clean.drop(columns=[\"velocity\"])\n",
+    "y = Real_clean[\"velocity\"]\n",
     "X_train, X_test, y_train, y_test = train_test_split(\n",
     "    X, y, test_size=0.2, random_state=42\n",
     ")\n",
@@ -216,34 +216,6 @@
    "id": "16",
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Plotting the training and testing curves\n",
-    "plt.figure(figsize=(12, 6))\n",
-    "\n",
-    "# Training data\n",
-    "plt.scatter(\n",
-    "    y_train, RS_model.predict(x_train), color=\"blue\", alpha=0.5, label=\"Train Data\"\n",
-    ")\n",
-    "# Testing data\n",
-    "plt.scatter(y_test, y_pred, color=\"red\", alpha=0.5, label=\"Test Data\")\n",
-    "\n",
-    "# Plotting the ideal line\n",
-    "plt.plot([y.min(), y.max()], [y.min(), y.max()], \"k--\", lw=2, label=\"Ideal Fit\")\n",
-    "\n",
-    "# Labels and legend\n",
-    "plt.xlabel(\"True Values\")\n",
-    "plt.ylabel(\"Predicted Values\")\n",
-    "plt.title(\"Train and Test Curves\")\n",
-    "plt.legend()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "17",
-   "metadata": {},
-   "outputs": [],
    "source": [
     "# Get feature importances from the model\n",
     "importances = RS_model.feature_importances_\n",
@@ -287,57 +259,32 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "18",
+   "id": "17",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sklearn.inspection import permutation_importance\n",
-    "\n",
-    "# Evaluate permutation importance\n",
-    "result = permutation_importance(\n",
-    "    RS_model, x_test, y_test, n_repeats=10, random_state=42, n_jobs=-1\n",
-    ")\n",
-    "\n",
-    "# Create DataFrame\n",
-    "perm_df = pd.DataFrame(\n",
+    "# Combine the actual and predicted values with coordinates\n",
+    "results_df = pd.DataFrame(\n",
     "    {\n",
-    "        \"Feature\": X.columns,\n",
-    "        \"Importance\": result.importances_mean,\n",
-    "        \"Std\": result.importances_std,\n",
+    "        \"Longitude\": data2.loc[y_test.index, \"long\"],\n",
+    "        \"Latitude\": data2.loc[y_test.index, \"lat\"],\n",
+    "        \"Actual Velocity\": y_test.values,\n",
+    "        \"Predicted Velocity\": y_pred,\n",
     "    }\n",
-    ").sort_values(by=\"Importance\", ascending=True)\n",
-    "\n",
-    "# Create plot\n",
-    "fig, ax = plt.subplots(figsize=(10, 6))\n",
-    "\n",
-    "# Plot bars with error bars\n",
-    "ax.barh(\n",
-    "    perm_df[\"Feature\"],\n",
-    "    perm_df[\"Importance\"],\n",
-    "    xerr=perm_df[\"Std\"],\n",
-    "    alpha=0.7,\n",
-    "    height=0.4,\n",
-    "    color=\"coral\",\n",
-    ")\n",
-    "\n",
-    "# Add label with square bullet\n",
-    "ax.text(\n",
-    "    0.95,\n",
-    "    0.05,\n",
-    "    \"■ Mean Decrease Accuracy\",\n",
-    "    transform=ax.transAxes,\n",
-    "    fontsize=12,\n",
-    "    verticalalignment=\"bottom\",\n",
-    "    horizontalalignment=\"right\",\n",
-    "    color=\"OrangeRed\",\n",
     ")\n",
     "\n",
-    "# Labels\n",
-    "ax.set_xlabel(\"Mean Decrease in Accuracy (Permutation Importance)\")\n",
-    "ax.set_title(\"Permutation Feature Importance\")\n",
-    "\n",
-    "plt.tight_layout()\n",
-    "plt.show()"
+    "# Display the DataFrame\n",
+    "print(results_df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results_df.to_csv(\"final_results.csv\", index=False)"
    ]
   },
   {
@@ -347,13 +294,53 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sklearn.svm import SVR\n",
+    "# from sklearn.inspection import permutation_importance\n",
     "\n",
-    "# Initialize the SVR model\n",
-    "svr_model = SVR(kernel=\"rbf\", C=1.0, epsilon=0.1)\n",
+    "# # Evaluate permutation importance\n",
+    "# result = permutation_importance(\n",
+    "#     RS_model, x_test, y_test, n_repeats=10, random_state=42, n_jobs=-1\n",
+    "# )\n",
     "\n",
-    "# Fit the model to the training data\n",
-    "svr_model.fit(x_train, y_train)"
+    "# # Create DataFrame\n",
+    "# perm_df = pd.DataFrame(\n",
+    "#     {\n",
+    "#         \"Feature\": X.columns,\n",
+    "#         \"Importance\": result.importances_mean,\n",
+    "#         \"Std\": result.importances_std,\n",
+    "#     }\n",
+    "# ).sort_values(by=\"Importance\", ascending=True)\n",
+    "\n",
+    "# # Create plot\n",
+    "# fig, ax = plt.subplots(figsize=(10, 6))\n",
+    "\n",
+    "# # Plot bars with error bars\n",
+    "# ax.barh(\n",
+    "#     perm_df[\"Feature\"],\n",
+    "#     perm_df[\"Importance\"],\n",
+    "#     xerr=perm_df[\"Std\"],\n",
+    "#     alpha=0.7,\n",
+    "#     height=0.4,\n",
+    "#     color=\"coral\",\n",
+    "# )\n",
+    "\n",
+    "# # Add label with square bullet\n",
+    "# ax.text(\n",
+    "#     0.95,\n",
+    "#     0.05,\n",
+    "#     \"■ Mean Decrease Accuracy\",\n",
+    "#     transform=ax.transAxes,\n",
+    "#     fontsize=12,\n",
+    "#     verticalalignment=\"bottom\",\n",
+    "#     horizontalalignment=\"right\",\n",
+    "#     color=\"OrangeRed\",\n",
+    "# )\n",
+    "\n",
+    "# # Labels\n",
+    "# ax.set_xlabel(\"Mean Decrease in Accuracy (Permutation Importance)\")\n",
+    "# ax.set_title(\"Permutation Feature Importance\")\n",
+    "\n",
+    "# plt.tight_layout()\n",
+    "# plt.show()"
    ]
   },
   {
@@ -363,11 +350,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "y_pred_svr = svr_model.predict(x_test)\n",
-    "mse_svr = mean_squared_error(y_test, y_pred_svr)\n",
-    "r2_svr = r2_score(y_test, y_pred_svr)\n",
-    "print(f\"SVR Mean Squared Error: {mse_svr}\")\n",
-    "print(f\"SVR R^2 Score: {r2_svr}\")"
+    "from sklearn.inspection import permutation_importance\n",
+    "\n",
+    "# Evaluate permutation importance\n",
+    "result = permutation_importance(\n",
+    "    RS_model, x_test, y_test, n_repeats=10, random_state=42, n_jobs=-1\n",
+    ")\n",
+    "\n",
+    "# Create DataFrame\n",
+    "perm_df = pd.DataFrame(\n",
+    "    {\n",
+    "        \"Feature\": X.columns,\n",
+    "        \"Importance\": result.importances_mean,\n",
+    "        \"Std\": result.importances_std,\n",
+    "    }\n",
+    ").sort_values(by=\"Importance\", ascending=True)\n",
+    "\n",
+    "# Display the DataFrame\n",
+    "print(perm_df)"
    ]
   },
   {
@@ -409,72 +409,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sklearn.metrics import roc_curve, auc\n",
-    "from sklearn.preprocessing import Binarizer\n",
-    "\n",
-    "# Binarize the y_test and predictions\n",
-    "threshold = 0  # Define a threshold for binarization\n",
-    "binarizer = Binarizer(threshold=threshold)\n",
-    "\n",
-    "y_test_binary = binarizer.fit_transform(y_test.values.reshape(-1, 1)).ravel()\n",
-    "y_pred_binary = binarizer.transform(y_pred.reshape(-1, 1)).ravel()\n",
-    "y_pred_svr_binary = binarizer.transform(y_pred_svr.reshape(-1, 1)).ravel()\n",
-    "y_pred_xgb_binary = binarizer.transform(y_pred_xgb.reshape(-1, 1)).ravel()\n",
-    "\n",
-    "# Compute ROC curve and AUC for Random Forest\n",
-    "fpr_rf, tpr_rf, _ = roc_curve(y_test_binary, y_pred_binary)\n",
-    "roc_auc_rf = auc(fpr_rf, tpr_rf)\n",
-    "\n",
-    "# Compute ROC curve and AUC for SVR\n",
-    "fpr_svr, tpr_svr, _ = roc_curve(y_test_binary, y_pred_svr_binary)\n",
-    "roc_auc_svr = auc(fpr_svr, tpr_svr)\n",
-    "\n",
-    "# Compute ROC curve and AUC for XGBoost\n",
-    "fpr_xgb, tpr_xgb, _ = roc_curve(y_test_binary, y_pred_xgb_binary)\n",
-    "roc_auc_xgb = auc(fpr_xgb, tpr_xgb)\n",
-    "\n",
-    "# Plot the ROC curves\n",
-    "plt.figure(figsize=(10, 6))\n",
-    "plt.plot(\n",
-    "    fpr_rf, tpr_rf, color=\"blue\", lw=2, label=f\"Random Forest (AUC = {roc_auc_rf:.2f})\"\n",
-    ")\n",
-    "plt.plot(fpr_svr, tpr_svr, color=\"green\", lw=2, label=f\"SVR (AUC = {roc_auc_svr:.2f})\")\n",
-    "plt.plot(\n",
-    "    fpr_xgb, tpr_xgb, color=\"red\", lw=2, label=f\"XGBoost (AUC = {roc_auc_xgb:.2f})\"\n",
-    ")\n",
-    "\n",
-    "# Plot the diagonal line\n",
-    "plt.plot([0, 1], [0, 1], \"k--\", lw=2)\n",
-    "\n",
-    "# Labels and legend\n",
-    "plt.xlabel(\"1 - False Positive Rate(Specificity)\")\n",
-    "plt.ylabel(\"Sensitivity (True Positive Rate)\")\n",
-    "plt.title(\"Receiver Operating Characteristic (ROC) Curves\")\n",
-    "plt.legend(loc=\"lower right\")\n",
-    "plt.legend(loc=\"lower right\")\n",
-    "plt.grid()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "24",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import shap"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "25",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "explainer = shap.TreeExplainer(RS_model)\n",
-    "shap_values = explainer.shap_values(X_test)"
+    "y_train"
    ]
   }
  ],