diff --git a/Assignment 3/QV_Assignment3_Archi (1).ipynb b/Assignment 3/QV_Assignment3_Archi (1).ipynb new file mode 100644 index 00000000..9528b09e --- /dev/null +++ b/Assignment 3/QV_Assignment3_Archi (1).ipynb @@ -0,0 +1,333 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2cveR_jqy2Tm" + }, + "outputs": [], + "source": [ + "#Downloading dataset\n", + "import os\n", + "!mkdir -p ~/.kaggle\n", + "!cp kaggle.json ~/.kaggle/\n", + "!chmod 600 ~/.kaggle/kaggle.json\n", + "!kaggle datasets download -d raimiazeezbabatunde/candle-image-data\n", + "!unzip -q candle-image-data.zip -d ./candlestick_data\n", + "print(\"DONE\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "STEPS:\n", + "1. Data collection\n", + "2. Preprocessing\n", + "3. Building the model\n", + "4. Training the model\n", + "5. Evaluating its performance\n", + "6. Visualizing the results\n" + ], + "metadata": { + "id": "fa9SmoaX4bES" + } + }, + { + "cell_type": "code", + "source": [ + "# Importing Libraries\n", + "import torch\n", + "from torch import nn\n", + "from torch.utils.data import DataLoader, random_split\n", + "import torchvision.transforms as transforms\n", + "from torchvision.datasets import ImageFolder\n" + ], + "metadata": { + "id": "_-n46hUU7QJb" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Resizing data and Data Augmentation\n", + "train_transform = transforms.Compose([\n", + " transforms.Resize((64,64)),\n", + " transforms.ColorJitter(brightness=0.2),\n", + " transforms.RandomCrop(64, padding=4),\n", + " transforms.ToTensor()\n", + "])\n", + "\n", + "test_transform = transforms.Compose([\n", + " transforms.Resize((64,64)),\n", + " transforms.ToTensor()\n", + "])\n", + "\n", + "# Loading dataset\n", + "train_dataset = ImageFolder(\n", + " root = '/content/candlestick_data',\n", + " transform=train_transform\n", + ")\n", + "test_dataset = ImageFolder(\n", + " root = '/content/candlestick_data',\n", + " transform=test_transform\n", + ")\n", + "\n", + "# Train-Test split\n", + "train_size = int(0.8*len(train_dataset))\n", + "test_size = len(train_dataset) - train_size\n", + "\n", + "generator = torch.Generator().manual_seed(11)\n", + "\n", + "train_data, _ = random_split(train_dataset, [train_size, test_size], generator=generator)\n", + "_, test_data = random_split(train_dataset, [train_size, test_size], generator=generator)\n", + "\n", + "\n", + "print(f\"Training Data: {len(train_data)} images\")\n", + "print(f\"Testing Data: {len(test_data)} images\")" + ], + "metadata": { + "id": "ugQMZC8ahkXN" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "batch_size = 32\n", + "\n", + "train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)\n", + "test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)\n", + "\n", + "for X,y in train_dataloader:\n", + " print(f\"Shape of X: {X.shape}\") #[batchsize, channels(rbg), height, width]\n", + " print(f\"Shape of y: {y.shape}\")\n", + " break" + ], + "metadata": { + "id": "KWvqtg-Ieiyb" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import torch.nn as nn" + ], + "metadata": { + "id": "raHID3YHviqh" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "DEFINING THE CNN MODEL" + ], + "metadata": { + "id": "HmnK9wLo0dVy" + } + }, + { + "cell_type": "code", + "source": [ + "# Defining CNN model\n", + "class CNN(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " # Feature Extractor\n", + " self.feature_extractor = nn.Sequential(\n", + " # Layer 1\n", + " nn.Conv2d(3, 32, kernel_size=3, padding=1),\n", + " nn.ReLU(),\n", + " nn.MaxPool2d(kernel_size=2),\n", + " # Layer 2\n", + " nn.Conv2d(32, 64, kernel_size=3, padding=1),\n", + " nn.ReLU(),\n", + " nn.MaxPool2d(kernel_size=2),\n", + " # Layer 3\n", + " nn.Conv2d(64, 32, kernel_size=3, padding=1),\n", + " nn.ReLU(),\n", + " nn.MaxPool2d(kernel_size=2),\n", + " # Flattening\n", + " nn.Flatten()\n", + " )\n", + " # Classifier\n", + " self.classifier = nn.Sequential(\n", + " nn.Dropout(p=0.2),\n", + " nn.Linear(32*8*8, 2)\n", + " )\n", + " def forward(self, x):\n", + " x = self.feature_extractor(x)\n", + " x = self.classifier(x)\n", + " return x\n", + "\n" + ], + "metadata": { + "id": "h5N7SeVFj34n" + }, + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "TRAINING THE MODEL" + ], + "metadata": { + "id": "sgOfeqp-0hR-" + } + }, + { + "cell_type": "code", + "source": [ + "import torch.optim as optim" + ], + "metadata": { + "id": "Elu59fnQ1EeT" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Deciding values of Hyperparamters\n", + "lr = 0.001\n", + "num_epochs = 50" + ], + "metadata": { + "id": "fGqQwdfA2qwT" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Device configuration\n", + "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" + ], + "metadata": { + "id": "QWTok6-puSqf" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Training and Evaluation of model\n", + "train_losses = []\n", + "val_accuracies = []\n", + "\n", + "model = CNN().to(device)\n", + "class_weights = torch.tensor([0.83, 0.17]).to(device)\n", + "# Choosing loss function and optimizer\n", + "criterion = nn.CrossEntropyLoss(weight = class_weights)\n", + "optimizer = optim.Adam(model.parameters(), lr)\n", + "\n", + "for epoch in range(num_epochs):\n", + " model.train()\n", + " running_loss = 0.0\n", + " for X,y in train_dataloader:\n", + " X = X.to(device)\n", + " y = y.to(device)\n", + " # Forward pass\n", + " pred = model(X)\n", + " # Evaluate loss\n", + " loss = criterion(pred, y)\n", + " # Backpropagation\n", + " loss.backward()\n", + " optimizer.step()\n", + " optimizer.zero_grad()\n", + "\n", + " running_loss += loss.item()\n", + "\n", + " avg_loss = running_loss / len(train_dataloader)\n", + " train_losses.append(avg_loss)\n", + "\n", + " model.eval()\n", + " correct = 0\n", + " total = 0\n", + "\n", + " with torch.no_grad():\n", + " for X,y in test_dataloader:\n", + " X = X.to(device)\n", + " y = y.to(device)\n", + " output = model(X)\n", + " _, pred = torch.max(output.data, 1)\n", + " total += y.size(0)\n", + " correct += (pred == y).sum().item()\n", + "\n", + " accuracy = 100*correct/total\n", + " val_accuracies.append(accuracy)\n", + " print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Val Accuracy: {accuracy:.2f}%')" + ], + "metadata": { + "id": "sjVYD2yS0jX_", + "collapsed": true + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Plotting results\n", + "import matplotlib.pyplot as plt\n", + "\n", + "def plot_results(train_losses, val_accuracies):\n", + " plt.figure(figsize=(12, 5))\n", + "\n", + " # Plotting Training loss curve\n", + " plt.subplot(1, 2, 1)\n", + " plt.plot(train_losses, label='Training Loss', color='tab:blue')\n", + " plt.title('Training Loss per Epoch')\n", + " plt.xlabel('Epoch')\n", + " plt.ylabel('Loss')\n", + " plt.grid()\n", + " plt.legend()\n", + "\n", + " # Plotting accuracy curve\n", + " plt.subplot(1, 2, 2)\n", + " plt.plot(val_accuracies, label='Validation Accuracy', color='tab:green')\n", + " plt.title('Validation Accuracy per Epoch')\n", + " plt.xlabel('Epoch')\n", + " plt.ylabel('Accuracy (%)')\n", + " plt.grid()\n", + " plt.legend()\n", + "\n", + " plt.tight_layout()\n", + " plt.show()\n", + "plot_results(train_losses, val_accuracies)\n", + "\n" + ], + "metadata": { + "id": "sW-ml_Hur_e6" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/MidEval Code/QV_MidEvals_ARCHI.ipynb b/MidEval Code/QV_MidEvals_ARCHI.ipynb new file mode 100644 index 00000000..e8c43aba --- /dev/null +++ b/MidEval Code/QV_MidEvals_ARCHI.ipynb @@ -0,0 +1,791 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "#DATA PREPROCESSING" + ], + "metadata": { + "id": "JXD-Zjoqg7ss" + } + }, + { + "cell_type": "code", + "source": [ + "# Importing libraries\n", + "\n", + "import pandas as pd\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.model_selection import train_test_split" + ], + "metadata": { + "id": "F7wA0pwYZS5n" + }, + "execution_count": 19, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "## Data loading\n", + "\n", + "df = pd.read_csv('quantvision_financial_dataset_200.csv')\n", + "print(df.head())\n", + "print(df.info())\n", + "# Assuming your dataframe is named 'df' or 'X_train'\n", + "print(df.columns)\n", + "\n", + "## One-Hot Encoding\n", + "\n", + "columns_to_encode = ['asset_type','market_regime']\n", + "df_encoded = pd.get_dummies(df, columns=columns_to_encode, dtype=int)\n", + "print(df_encoded.head())\n", + "\n", + "## Scaling Numerical features\n", + "\n", + "columns_to_scale = ['lookback_days','technical_score','edge_density','slope_strength','candlestick_variance','pattern_symmetry']\n", + "scaler = StandardScaler()\n", + "df_encoded[columns_to_scale] = scaler.fit_transform(df_encoded[columns_to_scale])\n", + "print(df_encoded.head())\n", + "\n", + "## Data Splitting\n", + "\n", + "X = df_encoded.drop('future_trend', axis=1)\n", + "y = df_encoded['future_trend']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=9, stratify=y)\n", + "\n", + "print(f\"Training data sample: {X_train.shape[0]}\")\n", + "print(f\"Testing data sample: {X_test.shape[0]}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "5ex89d4pebTp", + "outputId": "564128ef-18d1-4aec-a828-a89f88e47ec3" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " lookback_days asset_type market_regime high_volatility \\\n", + "0 48 equity bullish 0 \n", + "1 38 index bullish 1 \n", + "2 24 equity bullish 1 \n", + "3 52 equity bullish 0 \n", + "4 17 equity bullish 1 \n", + "\n", + " trend_continuation technical_score edge_density slope_strength \\\n", + "0 1 59.99 0.504 0.298 \n", + "1 1 78.54 0.559 0.037 \n", + "2 0 56.03 0.617 0.212 \n", + "3 0 66.51 0.360 0.347 \n", + "4 1 61.21 0.492 0.144 \n", + "\n", + " candlestick_variance pattern_symmetry future_trend \n", + "0 1.572 0.768 1 \n", + "1 0.692 0.538 1 \n", + "2 1.419 0.301 1 \n", + "3 0.699 0.498 1 \n", + "4 2.520 0.828 1 \n", + "\n", + "RangeIndex: 200 entries, 0 to 199\n", + "Data columns (total 11 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 lookback_days 200 non-null int64 \n", + " 1 asset_type 200 non-null object \n", + " 2 market_regime 200 non-null object \n", + " 3 high_volatility 200 non-null int64 \n", + " 4 trend_continuation 200 non-null int64 \n", + " 5 technical_score 200 non-null float64\n", + " 6 edge_density 200 non-null float64\n", + " 7 slope_strength 200 non-null float64\n", + " 8 candlestick_variance 200 non-null float64\n", + " 9 pattern_symmetry 200 non-null float64\n", + " 10 future_trend 200 non-null int64 \n", + "dtypes: float64(5), int64(4), object(2)\n", + "memory usage: 17.3+ KB\n", + "None\n", + "Index(['lookback_days', 'asset_type', 'market_regime', 'high_volatility',\n", + " 'trend_continuation', 'technical_score', 'edge_density',\n", + " 'slope_strength', 'candlestick_variance', 'pattern_symmetry',\n", + " 'future_trend'],\n", + " dtype='object')\n", + " lookback_days high_volatility trend_continuation technical_score \\\n", + "0 48 0 1 59.99 \n", + "1 38 1 1 78.54 \n", + "2 24 1 0 56.03 \n", + "3 52 0 0 66.51 \n", + "4 17 1 1 61.21 \n", + "\n", + " edge_density slope_strength candlestick_variance pattern_symmetry \\\n", + "0 0.504 0.298 1.572 0.768 \n", + "1 0.559 0.037 0.692 0.538 \n", + "2 0.617 0.212 1.419 0.301 \n", + "3 0.360 0.347 0.699 0.498 \n", + "4 0.492 0.144 2.520 0.828 \n", + "\n", + " future_trend asset_type_crypto asset_type_equity asset_type_index \\\n", + "0 1 0 1 0 \n", + "1 1 0 0 1 \n", + "2 1 0 1 0 \n", + "3 1 0 1 0 \n", + "4 1 0 1 0 \n", + "\n", + " market_regime_bearish market_regime_bullish market_regime_sideways \n", + "0 0 1 0 \n", + "1 0 1 0 \n", + "2 0 1 0 \n", + "3 0 1 0 \n", + "4 0 1 0 \n", + " lookback_days high_volatility trend_continuation technical_score \\\n", + "0 0.914164 0 1 -0.123866 \n", + "1 0.230166 1 1 1.113556 \n", + "2 -0.727433 1 0 -0.388027 \n", + "3 1.187764 0 0 0.311066 \n", + "4 -1.206232 1 1 -0.042483 \n", + "\n", + " edge_density slope_strength candlestick_variance pattern_symmetry \\\n", + "0 0.344636 0.076793 0.509942 0.890424 \n", + "1 0.819757 -0.345254 -1.022907 -0.437964 \n", + "2 1.320794 -0.062272 0.243436 -1.806780 \n", + "3 -0.899318 0.156028 -1.010714 -0.668988 \n", + "4 0.240973 -0.172231 2.161239 1.236960 \n", + "\n", + " future_trend asset_type_crypto asset_type_equity asset_type_index \\\n", + "0 1 0 1 0 \n", + "1 1 0 0 1 \n", + "2 1 0 1 0 \n", + "3 1 0 1 0 \n", + "4 1 0 1 0 \n", + "\n", + " market_regime_bearish market_regime_bullish market_regime_sideways \n", + "0 0 1 0 \n", + "1 0 1 0 \n", + "2 0 1 0 \n", + "3 0 1 0 \n", + "4 0 1 0 \n", + "Training data sample: 160\n", + "Testing data sample: 40\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# MODEL TRAINING & EVALUATION" + ], + "metadata": { + "id": "Efbj9Cm7odkN" + } + }, + { + "cell_type": "markdown", + "source": [ + "LOGISTIC REGRESSION" + ], + "metadata": { + "id": "jjUhwzlsokGp" + } + }, + { + "cell_type": "code", + "source": [ + "# Importing libraries\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix\n", + "import numpy as np" + ], + "metadata": { + "id": "suYvu7mqo17P" + }, + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Training logistic regression model\n", + "log_model = LogisticRegression(\n", + " solver='liblinear',\n", + " max_iter=100,\n", + " class_weight='balanced',\n", + " C=0.1,\n", + " random_state=9\n", + ")\n", + "\n", + "log_model.fit(X_train, y_train)\n", + "pred_log = log_model.predict(X_test)" + ], + "metadata": { + "id": "7pM2Z5Q_gDaX", + "collapsed": true + }, + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Defining function for model evaluation\n", + "def metrics(y_true,y_pred):\n", + " return{\n", + " \"Accuracy\":accuracy_score(y_true,y_pred),\n", + " \"Precision\":precision_score(y_true, y_pred),\n", + " \"recall\":recall_score(y_true,y_pred),\n", + " \"f1 score\":f1_score(y_true,y_pred),\n", + " \"Confusion matrix\":confusion_matrix(y_true,y_pred)\n", + " }" + ], + "metadata": { + "id": "hR8nSwdTXHxr" + }, + "execution_count": 23, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Evaluating logistic regression model\n", + "log_metrics = metrics(y_test,pred_log)" + ], + "metadata": { + "id": "w66W5nD0XJt2" + }, + "execution_count": 24, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "SIMPLE MLP" + ], + "metadata": { + "id": "-aqpCMi1ynV_" + } + }, + { + "cell_type": "code", + "source": [ + "# Importing libraries\n", + "\n", + "from sklearn.neural_network import MLPClassifier" + ], + "metadata": { + "id": "lxDBKd-KyoGu" + }, + "execution_count": 25, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Training neural network\n", + "mlp = MLPClassifier(\n", + " hidden_layer_sizes=(100, 50),\n", + " activation='relu',\n", + " solver='adam',\n", + " max_iter = 100,\n", + " random_state = 9\n", + ")\n", + "\n", + "mlp.fit(X_train, y_train)\n", + "pred_mlp = mlp.predict(X_test)" + ], + "metadata": { + "id": "1U5mWWV1zChY", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "outputId": "22f52c86-882e-4526-d996-d0ed994c1dc2" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (100) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Evaluating neural network\n", + "mlp_metrics = metrics(y_test,pred_mlp)" + ], + "metadata": { + "id": "rVoxcTMSXP5w" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#COMPARING BOTH MODELS" + ], + "metadata": { + "id": "EOSiMHgGX48Q" + } + }, + { + "cell_type": "code", + "source": [ + "# Comparing Logistic regression model and Neural Network\n", + "comparision_table = pd.DataFrame(\n", + " [log_metrics,mlp_metrics],\n", + " index = [\"Logistic Regression\", \"Neural Network\"]\n", + ")\n", + "comparision_table" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 125 + }, + "id": "8MoIGJ5nX7UI", + "outputId": "1692b42f-f3c1-415d-c6b1-e46cd4792cbb" + }, + "execution_count": 28, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Accuracy Precision recall f1 score \\\n", + "Logistic Regression 0.75 1.000000 0.729730 0.843750 \n", + "Neural Network 0.90 0.923077 0.972973 0.947368 \n", + "\n", + " Confusion matrix \n", + "Logistic Regression [[3, 0], [10, 27]] \n", + "Neural Network [[0, 3], [1, 36]] " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AccuracyPrecisionrecallf1 scoreConfusion matrix
Logistic Regression0.751.0000000.7297300.843750[[3, 0], [10, 27]]
Neural Network0.900.9230770.9729730.947368[[0, 3], [1, 36]]
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "comparision_table", + "summary": "{\n \"name\": \"comparision_table\",\n \"rows\": 2,\n \"fields\": [\n {\n \"column\": \"Accuracy\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.10606601717798214,\n \"min\": 0.75,\n \"max\": 0.9,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.9,\n 0.75\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Precision\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.054392829322042084,\n \"min\": 0.9230769230769231,\n \"max\": 1.0,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.9230769230769231,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"recall\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.17199894677510622,\n \"min\": 0.7297297297297297,\n \"max\": 0.972972972972973,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.972972972972973,\n 0.7297297297297297\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"f1 score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.07326928818215867,\n \"min\": 0.84375,\n \"max\": 0.9473684210526315,\n \"num_unique_values\": 2,\n \"samples\": [\n 0.9473684210526315,\n 0.84375\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Confusion matrix\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# ANALYSIS AND FINANCIAL INTERPRETATION" + ], + "metadata": { + "id": "UsQh03d9iA2M" + } + }, + { + "cell_type": "markdown", + "source": [ + "Q1) Why Logistic Regression performs reasonably good or bad?\n", + "\n", + "ANS: The logistic regression model was very cautious and performed very well on safety (100% precision) but because of this we missed some good opportunities (73% precision).\n", + "\n", + "Because it uses a linear boundary it couldn't perfectly separate the 'UP' days from the 'DOWN' days. It focused on catching all the 'DOWN' days correctly and in that process ended up catching 10 'UP' days as well.\n", + "\n" + ], + "metadata": { + "id": "qpxSWKzzl7_5" + } + }, + { + "cell_type": "markdown", + "source": [ + "Q2) Why Neural Network performs better or worse?\n", + "\n", + "ANS:The neural network was very greedy. It blindly assumes the market will go up. It captured almost all the profit but failed to predict a single market crash.\n", + "\n", + "Looking at the Confusion Matrix [0, 3] we can say that it failed to predict any of the 3 \"DOWN\" days (0 True Negatives).\n", + "\n", + "The neural network only focuses on minimizing the loss and here it did so always predicting 'UP' because it realized that ignoring the down signals had very little impact on its overall accuracy score.This way is goes and settles down in a local minima easily instead of doing the hard work of finding the global minima.\n", + "\n" + ], + "metadata": { + "id": "imEXoHsgmHeZ" + } + }, + { + "cell_type": "markdown", + "source": [ + "Q3) The effect of volatility on predictions\n", + "\n", + "ANS: High volatility usually creates noise and confuses the model.\n", + "\n", + "The Logistic Regression model likely treated high volatility as a negative signal. It interpreted volatile data as risky causing it to stay out of the market and miss 10 ups.\n", + "\n", + "But the neural network likely ignored volatility and focused mostly on trend continuation signals making it behave in a highly bullish way.\n" + ], + "metadata": { + "id": "cXo6qxoCmRoT" + } + }, + { + "cell_type": "markdown", + "source": [ + "Q4) The role of trend continuation\n", + "\n", + "ANS: Our dataset is heavily skewed towards 'UP' (37 vs 3). This implies that we have a strong up trend. The neural network likely assigned a massive weight to trend continuation feature and thus learned that if the trend is continuing just keep buying. This works good here and in general in a Bullish market but the model completely breaks if we give it a different dataset where the trend is completely opposite." + ], + "metadata": { + "id": "CwaTbnLNmViN" + } + }, + { + "cell_type": "markdown", + "source": [ + "Q5) Situations where the model fails and why\n", + "\n", + "ANS: The logistic regression model fails in a Bullish market because the model is very cautious and says sell even if the stocks look slightly risky. This was we miss out on profit.\n", + "\n", + "The neural network fails when the market trends shifts because it keeps saying buy even when the market is going down because it has learned that saying up all the time works pretty well." + ], + "metadata": { + "id": "JLukkXwNmaNo" + } + } + ] +} \ No newline at end of file