From d0481d1dd27d9048a1b71257dedd3532205e536d Mon Sep 17 00:00:00 2001 From: rohinish103 Date: Mon, 22 Dec 2025 20:14:05 +0530 Subject: [PATCH 1/5] Mideval Code folder and file --- Mideval Code/mideval_Rohinish.py | 129 +++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 Mideval Code/mideval_Rohinish.py diff --git a/Mideval Code/mideval_Rohinish.py b/Mideval Code/mideval_Rohinish.py new file mode 100644 index 00000000..e8010310 --- /dev/null +++ b/Mideval Code/mideval_Rohinish.py @@ -0,0 +1,129 @@ +import pandas as pd +import numpy as np + +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler, OneHotEncoder +from sklearn.compose import ColumnTransformer +from sklearn.pipeline import Pipeline +from sklearn.neural_network import MLPClassifier +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import ( + classification_report, + confusion_matrix, + roc_auc_score +) + +# 1. Load data ---------------------------------------------------------- +csv_path = "quantvision_financial_dataset_200.csv" # adjust path if needed +df = pd.read_csv(csv_path) + +target_col = "future_trend" # label column in your CSV +X = df.drop(columns=[target_col]) +y = df[target_col].values + +# Identify numeric & categorical columns (from file header) +numeric_features = [ + "lookback_days", + "technical_score", + "edge_density", + "slope_strength", + "candlestick_variance", + "pattern_symmetry", +] +categorical_features = [ + "asset_type", + "market_regime", + "high_volatility", + "trend_continuation", +] + +# 2. Preprocessor (no leakage) ----------------------------------------- +numeric_transformer = StandardScaler() +categorical_transformer = OneHotEncoder(handle_unknown="ignore") + +preprocessor = ColumnTransformer( + transformers=[ + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] +) + +# 3. Train–test split --------------------------------------------------- +X_train, X_test, y_train, y_test = train_test_split( + X, + y, + test_size=0.2, + random_state=42, + stratify=y +) + +# 4. Pipelines for both models ----------------------------------------- +nn_model = Pipeline([ + ("preprocess", preprocessor), + ("clf", MLPClassifier( + hidden_layer_sizes=(16, 8), + activation="relu", + solver="adam", + max_iter=1000, + random_state=42 + )) +]) + +log_model = Pipeline([ + ("preprocess", preprocessor), + ("clf", LogisticRegression( + max_iter=1000, + random_state=42 + )) +]) + +# 5. Fit models --------------------------------------------------------- +nn_model.fit(X_train, y_train) +log_model.fit(X_train, y_train) + +# 6. Evaluate NN -------------------------------------------------------- +print("=== Neural Network (MLPClassifier) ===") +y_pred_nn = nn_model.predict(X_test) +y_proba_nn = nn_model.predict_proba(X_test)[:, 1] + +print(classification_report(y_test, y_pred_nn, digits=3)) +print("Confusion matrix:") +print(confusion_matrix(y_test, y_pred_nn)) +print("ROC-AUC:", roc_auc_score(y_test, y_proba_nn)) +print() + +# 7. Evaluate Logistic Regression -------------------------------------- +print("=== Logistic Regression ===") +y_pred_log = log_model.predict(X_test) +y_proba_log = log_model.predict_proba(X_test)[:, 1] + +print(classification_report(y_test, y_pred_log, digits=3)) +print("Confusion matrix:") +print(confusion_matrix(y_test, y_pred_log)) +print("ROC-AUC:", roc_auc_score(y_test, y_proba_log)) +print() + +# 8. Simple comparison table ------------------------------------------- +def get_main_metrics(y_true, y_pred, y_proba): + from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score + return { + "accuracy": accuracy_score(y_true, y_pred), + "precision": precision_score(y_true, y_pred), + "recall": recall_score(y_true, y_pred), + "f1": f1_score(y_true, y_pred), + "roc_auc": roc_auc_score(y_true, y_proba), + } + +nn_metrics = get_main_metrics(y_test, y_pred_nn, y_proba_nn) +log_metrics = get_main_metrics(y_test, y_pred_log, y_proba_log) + +comparison_df = pd.DataFrame.from_dict( + { + "nn_model": nn_metrics, + "log_model": log_metrics, + }, + orient="index" +) + +print("=== Final Comparison Table ===") +print(comparison_df) From 3b56604ab5659d02c5c4e20c4de1308611582920 Mon Sep 17 00:00:00 2001 From: rohinish103 Date: Sat, 3 Jan 2026 20:59:38 +0530 Subject: [PATCH 2/5] Assignment3 uploaded --- .../rohinish_Assignment3.py | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 Assignment3/Assignment3_Rohinish/rohinish_Assignment3.py diff --git a/Assignment3/Assignment3_Rohinish/rohinish_Assignment3.py b/Assignment3/Assignment3_Rohinish/rohinish_Assignment3.py new file mode 100644 index 00000000..07da0bce --- /dev/null +++ b/Assignment3/Assignment3_Rohinish/rohinish_Assignment3.py @@ -0,0 +1,125 @@ +import os +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +import matplotlib.pyplot as plt + +# 1. Paths and basic config + +data_dir = r"/kaggle/input/candlestick-image-data" # <-- CHANGE THIS ON YOUR MACHINE +train_dir = os.path.join(data_dir, "train") +val_dir = os.path.join(data_dir, "test") # dataset already split into train/test + +img_size = (224, 224) +batch_size = 32 +seed = 42 + +# 2. Load datasets +train_ds = keras.utils.image_dataset_from_directory( + train_dir, + labels="inferred", + label_mode="int", # 0 or 1 for Down/Up + image_size=img_size, + batch_size=batch_size, + shuffle=True, + seed=seed, +) + +val_ds = keras.utils.image_dataset_from_directory( + val_dir, + labels="inferred", + label_mode="int", + image_size=img_size, + batch_size=batch_size, + shuffle=True, + seed=seed, +) + +class_names = train_ds.class_names +num_classes = len(class_names) +print("Classes:", class_names) +print("Num classes:", num_classes) + +# 3. Data augmentation +data_augmentation = keras.Sequential( + [ + layers.RandomFlip("horizontal"), + layers.RandomRotation(0.1), + layers.RandomZoom(0.1), + layers.RandomTranslation(0.1, 0.1), + layers.RandomContrast(0.2), + ], + name="data_augmentation", +) + +# 4. Build the CNN model +inputs = keras.Input(shape=img_size + (3,)) + +x = data_augmentation(inputs) +x = layers.Rescaling(1.0 / 255)(x) + +# Simple CNN backbone +x = layers.Conv2D(32, 3, padding="same", activation="relu")(x) +x = layers.MaxPooling2D()(x) + +x = layers.Conv2D(64, 3, padding="same", activation="relu")(x) +x = layers.MaxPooling2D()(x) + +x = layers.Conv2D(128, 3, padding="same", activation="relu")(x) +x = layers.GlobalAveragePooling2D()(x) + +x = layers.Dropout(0.4)(x) +outputs = layers.Dense(num_classes, activation="softmax")(x) + +model = keras.Model(inputs, outputs) + +model.compile( + optimizer="adam", + loss="sparse_categorical_crossentropy", # labels are integer 0/1 + metrics=["accuracy"], +) + +model.summary() + +# 5. Prefetch and train +AUTOTUNE = tf.data.AUTOTUNE + +train_ds = train_ds.shuffle(1000).prefetch(AUTOTUNE) +val_ds = val_ds.prefetch(AUTOTUNE) + +epochs = 20 +history = model.fit( + train_ds, + validation_data=val_ds, + epochs=epochs, +) + +# 6. Plot Training vs Validation Accuracy/Loss +acc = history.history["accuracy"] +val_acc = history.history["val_accuracy"] +loss = history.history["loss"] +val_loss = history.history["val_loss"] +epochs_range = range(1, epochs + 1) + +plt.figure(figsize=(12, 5)) + +# Accuracy plot +plt.subplot(1, 2, 1) +plt.plot(epochs_range, acc, label="Training Accuracy") +plt.plot(epochs_range, val_acc, label="Validation Accuracy") +plt.title("Training vs Validation Accuracy") +plt.xlabel("Epoch") +plt.ylabel("Accuracy") +plt.legend(loc="lower right") + +# Loss plot +plt.subplot(1, 2, 2) +plt.plot(epochs_range, loss, label="Training Loss") +plt.plot(epochs_range, val_loss, label="Validation Loss") +plt.title("Training vs Validation Loss") +plt.xlabel("Epoch") +plt.ylabel("Loss") +plt.legend(loc="upper right") + +plt.tight_layout() +plt.show() From c7cff9cc49095346b4f8df84b98271f1ff19d60d Mon Sep 17 00:00:00 2001 From: rohinish103 Date: Sun, 11 Jan 2026 21:20:58 +0530 Subject: [PATCH 3/5] Create Rohinish_Assignment4 --- Assignment 4/Rohinish_Assignment4 | 1 + 1 file changed, 1 insertion(+) create mode 100644 Assignment 4/Rohinish_Assignment4 diff --git a/Assignment 4/Rohinish_Assignment4 b/Assignment 4/Rohinish_Assignment4 new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/Assignment 4/Rohinish_Assignment4 @@ -0,0 +1 @@ + From a35720999f10dd26de81bef23800c0ed8e71756a Mon Sep 17 00:00:00 2001 From: rohinish103 Date: Sun, 11 Jan 2026 21:22:19 +0530 Subject: [PATCH 4/5] Add files via upload --- Assignment 4/rohinish_Assignment4.py | 267 +++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 Assignment 4/rohinish_Assignment4.py diff --git a/Assignment 4/rohinish_Assignment4.py b/Assignment 4/rohinish_Assignment4.py new file mode 100644 index 00000000..b2238f9b --- /dev/null +++ b/Assignment 4/rohinish_Assignment4.py @@ -0,0 +1,267 @@ +import os, math, glob, random, json +from collections import Counter +from datetime import datetime + +import numpy as np +import pandas as pd +import yfinance as yf +import mplfinance as mpf +import matplotlib.pyplot as plt +import tensorflow as tf +from tensorflow.keras import layers, models +from sklearn.metrics import classification_report, confusion_matrix + +# CONFIG +DATA_DIR = "data" +IMG_DIR = "images" +os.makedirs(DATA_DIR, exist_ok=True) +os.makedirs(IMG_DIR, exist_ok=True) + +# 5 requested assets +tickers = [ + "BTC-USD", # Bitcoin + "GC=F", # Gold futures + "SI=F", # Silver futures + "RELIANCE.NS", # Reliance Industries + "AAPL", # Apple Inc. +] + +start = "2022-01-01" +end = "2024-12-31" + +PATTERNS = ["none", "head_shoulders", "doji", "hammer"] +IMG_SIZE = (128, 128) +BATCH_SIZE = 32 + +# PART 1: DATA PIPELINE +def download_ohlc(ticker): + df = yf.download(ticker, start=start, end=end, interval="1d") + df = df[["Open","High","Low","Close","Volume"]].dropna() + df.to_csv(f"{DATA_DIR}/{ticker}.csv") + return df + +def save_candle_images(df, ticker, window=30, step=5): + out_dir = f"{IMG_DIR}/{ticker}" + os.makedirs(out_dir, exist_ok=True) + for i in range(window, len(df), step): + sub = df.iloc[i-window:i] + fname = f"{out_dir}/{ticker}_{i}.png" + mpf.plot( + sub, + type="candle", + style="charles", + volume=False, + axisoff=True, + savefig=dict(fname=fname, dpi=64, bbox_inches="tight", pad_inches=0), + ) + +def build_images(): + all_dfs = {} + for t in tickers: + print("Downloading:", t) + df = download_ohlc(t) + all_dfs[t] = df + print("Saving images:", t) + save_candle_images(df, t) + + all_imgs = glob.glob(f"{IMG_DIR}/**/*.png", recursive=True) + random.shuffle(all_imgs) + n = len(all_imgs) + train_files = all_imgs[: int(0.7*n)] + val_files = all_imgs[int(0.7*n): int(0.85*n)] + test_files = all_imgs[int(0.85*n):] + + splits = {"train": train_files, "val": val_files, "test": test_files} + with open("splits.json", "w") as f: + json.dump(splits, f, indent=2) + print("Total images:", n) + return splits + +# PART 2: LABELING & DATASET +def load_labels(): + # labels.csv: path,label with label in PATTERNS + labels = pd.read_csv("labels.csv") + label_to_idx = {p:i for i,p in enumerate(PATTERNS)} + labels["y"] = labels["label"].map(label_to_idx) + return labels, label_to_idx + +def compute_class_weights(labels): + counter = Counter(labels["y"]) + total = sum(counter.values()) + class_weights = {cls: total/(len(counter)*count) for cls, count in counter.items()} + print("Class weights:", class_weights) + return class_weights + +# TF DATASET HELPERS +def decode_img(path, label): + img = tf.io.read_file(path) + img = tf.image.decode_png(img, channels=3) + img = tf.image.resize(img, IMG_SIZE) + img = tf.cast(img, tf.float32)/255.0 + return img, label + +def augment(img, label): + img = tf.image.random_flip_left_right(img) + img = tf.image.random_brightness(img, 0.1) + img = tf.image.random_contrast(img, 0.9, 1.1) + return img, label + +def make_dataset(split_name, splits, labels_df): + split_files = splits[split_name] + df = pd.DataFrame({"path": split_files}) + df = df.merge(labels_df[["path","y"]], on="path") + paths = df["path"].values + ys = df["y"].values + ds = tf.data.Dataset.from_tensor_slices((paths, ys)) + ds = ds.map(lambda p,y: decode_img(p,y), num_parallel_calls=tf.data.AUTOTUNE) + if split_name == "train": + ds = ds.map(augment, num_parallel_calls=tf.data.AUTOTUNE) + ds = ds.shuffle(1000) + ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE) + return ds + +# PART 3: CNN MODEL +def build_model(num_classes=len(PATTERNS)): + inputs = layers.Input(shape=(*IMG_SIZE, 3)) + x = layers.Conv2D(32, (3,3), activation="relu", padding="same")(inputs) + x = layers.MaxPooling2D(2)(x) + x = layers.Conv2D(64, (3,3), activation="relu", padding="same")(x) + x = layers.MaxPooling2D(2)(x) + x = layers.Conv2D(128, (3,3), activation="relu", padding="same")(x) + x = layers.MaxPooling2D(2)(x) + x = layers.Flatten()(x) + x = layers.Dense(256, activation="relu")(x) + x = layers.Dropout(0.5)(x) + outputs = layers.Dense(num_classes, activation="softmax")(x) + model = models.Model(inputs, outputs) + model.compile( + optimizer=tf.keras.optimizers.Adam(1e-3), + loss="sparse_categorical_crossentropy", + metrics=[ + "accuracy", + tf.keras.metrics.Precision(name="precision"), + tf.keras.metrics.Recall(name="recall"), + ], + ) + return model + +def train_model(splits, labels_df, class_weights): + train_ds = make_dataset("train", splits, labels_df) + val_ds = make_dataset("val", splits, labels_df) + + model = build_model() + model.summary() + + callbacks = [ + tf.keras.callbacks.ReduceLROnPlateau(patience=3, factor=0.5, verbose=1), + tf.keras.callbacks.EarlyStopping(patience=7, restore_best_weights=True, verbose=1), + tf.keras.callbacks.ModelCheckpoint("best_model.h5", save_best_only=True), + ] + + history = model.fit( + train_ds, + validation_data=val_ds, + epochs=40, + class_weight=class_weights, + ) + return model, history + +def evaluate_model(model, splits, labels_df): + test_ds = make_dataset("test", splits, labels_df) + + test_imgs, test_labels = [], [] + for x, y in test_ds: + test_imgs.append(x) + test_labels.append(y) + test_imgs = tf.concat(test_imgs, axis=0) + test_labels = tf.concat(test_labels, axis=0) + + y_pred_prob = model.predict(test_imgs) + y_pred = np.argmax(y_pred_prob, axis=1) + + print(classification_report(test_labels, y_pred, target_names=PATTERNS)) + cm = confusion_matrix(test_labels, y_pred) + print("Confusion matrix:\n", cm) + return cm + +# PART 4: BACKTESTING +def backtest_pattern(df, img_paths, preds, bullish_labels, hold_days=3): + df = df.copy() + df["ret"] = df["Close"].pct_change() + signals = [] + + for path, pred in zip(img_paths, preds): + if PATTERNS[pred] in bullish_labels: + idx = int(path.split("_")[-1].split(".")[0]) + if idx >= len(df): + continue + entry = df.iloc[idx].name + exit_idx = min(idx+hold_days, len(df)-1) + exit_ = df.iloc[exit_idx].name + entry_price = df.loc[entry, "Open"] + exit_price = df.loc[exit_, "Close"] + r = (exit_price/entry_price) - 1 + signals.append(r) + + if not signals: + return {"n_trades":0, "mean_ret":0, "win_rate":0, + "cum_ret":0, "sharpe":0} + + rets = np.array(signals) + win_rate = (rets > 0).mean() + cum_ret = (1 + rets).prod() - 1 + sharpe = rets.mean() / (rets.std() + 1e-8) * math.sqrt(252/hold_days) + return { + "n_trades": len(rets), + "mean_ret": rets.mean(), + "win_rate": win_rate, + "cum_ret": cum_ret, + "sharpe": sharpe + } + +def random_strategy(df, n_trades, hold_days=3): + idxs = np.random.randint(0, len(df)-hold_days, size=n_trades) + rets = [] + for idx in idxs: + entry_price = df["Open"].iloc[idx] + exit_price = df["Close"].iloc[idx+hold_days] + rets.append(exit_price/entry_price - 1) + rets = np.array(rets) + win_rate = (rets > 0).mean() + cum_ret = (1+rets).prod() - 1 + sharpe = rets.mean() / (rets.std()+1e-8) * math.sqrt(252/hold_days) + return {"n_trades":n_trades, "mean_ret":rets.mean(), + "win_rate":win_rate, "cum_ret":cum_ret, "sharpe":sharpe} + +def run_backtest(model, splits): + for ticker in tickers: + print("\n=== Backtest for", ticker, "===") + df = pd.read_csv(f"{DATA_DIR}/{ticker}.csv", index_col=0, parse_dates=True) + stock_imgs = [p for p in splits["test"] if ticker in p] + if not stock_imgs: + print("No test images for", ticker) + continue + ds = tf.data.Dataset.from_tensor_slices(stock_imgs) + ds = ds.map(lambda p: decode_img(p, 0)[0]).batch(BATCH_SIZE) + preds = np.argmax(model.predict(ds), axis=1) + + stats = backtest_pattern(df, stock_imgs, preds, + bullish_labels=["hammer","doji"]) + rand_stats = random_strategy(df, stats["n_trades"]) + print("Model strategy:", stats) + print("Random strategy:", rand_stats) + +# MAIN +if __name__ == "__main__": + if not os.path.exists("splits.json"): + splits = build_images() + else: + with open("splits.json") as f: + splits = json.load(f) + + labels_df, label_to_idx = load_labels() + class_weights = compute_class_weights(labels_df) + + model, history = train_model(splits, labels_df, class_weights) + cm = evaluate_model(model, splits, labels_df) + run_backtest(model, splits) From 43fcea633f95c229a4b3fdca34fc2dfaefe940c5 Mon Sep 17 00:00:00 2001 From: rohinish103 Date: Sun, 11 Jan 2026 21:22:52 +0530 Subject: [PATCH 5/5] Delete Assignment 4/Rohinish_Assignment4 --- Assignment 4/Rohinish_Assignment4 | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Assignment 4/Rohinish_Assignment4 diff --git a/Assignment 4/Rohinish_Assignment4 b/Assignment 4/Rohinish_Assignment4 deleted file mode 100644 index 8b137891..00000000 --- a/Assignment 4/Rohinish_Assignment4 +++ /dev/null @@ -1 +0,0 @@ -