diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml new file mode 100644 index 0000000000..1fe722d7b1 --- /dev/null +++ b/.github/workflows/style.yml @@ -0,0 +1,43 @@ +name: Style check + +on: + push: + branches: + - main + - master + + pull_request: + branches: + - main + - master + +jobs: + flake8_py3: + permissions: write-all + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install flake8 and plugins + run: | + pip install flake8 flake8-docstrings flake8-annotations + + - name: Configure Flake8 + run: | + echo "[flake8]" > .flake8 + echo "extend-ignore = E402,W504" >> .flake8 + echo "exclude = .github,autoop/tests,tests/" >> .flake8 + # exclude A101, A102, D100 and everything that starts with D2 and D4 + echo "ignore = ANN101,ANN102,D100,D2,D4,ANN002,ANN003" >> .flake8 + + - name: Run flake8 + uses: suo/flake8-github-action@releases/v1 + with: + checkName: "flake8_py3" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index ad6a340693..e08b61c21b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,20 @@ -Pipfile.lock .idea +.DS_Store +.env +.vscode/ +Pipfile.lock __pycache__/ .DS_Store venv/ -.env +project_name/data/full_data/ +project_name/data/subset_data/ +project_name/data/val/ +project_name/data/train/ +project_name/data/train_subset/ +project_name/data/__pycache__/ +project_name/data/val_subset/ +project_name/data/train_subset.7z +project_name/data/val_subset.7z +cnn_best.pth +project_name/trained_linear_model.pkl +/zoedepth/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 9de345d0fe..0000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks -repos: -- repo: https://github.com/pycqa/flake8 - rev: 6.1.0 - hooks: - - id: flake8 -- repo: local - hooks: - - id: run-unittests - name: Run unittests - entry: python -m unittest discover tests - language: python - pass_filenames: false diff --git a/FastAPI.py b/FastAPI.py new file mode 100644 index 0000000000..44bbc251af --- /dev/null +++ b/FastAPI.py @@ -0,0 +1,89 @@ +import io +import torch + +from fastapi import FastAPI, File, UploadFile, HTTPException +from fastapi.responses import StreamingResponse +from PIL import Image +from project_name.models.cnn import CNNBackbone +from project_name.models.Preprocessing_class import Preprocessing +from numpy import array + + +app = FastAPI(title="Depth Prediction API", + description="Uploads an image and " + "returns a predicted depth map.") + +# Setup +MODEL_PATH = "cnn_best.pth" +model = CNNBackbone(pretrained=False) +model.load_state_dict(torch.load(MODEL_PATH, map_location="cpu")) +model.eval() +preprocessor = Preprocessing(tile_size=(256, 256)) + + +def process_image(file_bytes: bytes, + model: torch.nn.Module, + preprocessor: Preprocessing) -> io.BytesIO: + """Function to process image + + Args: + file_bytes (bytes): file in byte representation + model (torch.nn.Module): neural network model + preprocessor (Preprocessing): preprocessing class + + Returns: + bytes: depth image in byte format + """ + img_array = preprocessor.load_image(io.BytesIO(file_bytes)) + tiles = preprocessor.tile_with_padding(img_array) + depth_tiles = [] + + for tile in tiles: + input_tensor = preprocessor.to_tensor(tile).unsqueeze(0) + with torch.no_grad(): + output = model(input_tensor) + depth_tiles.append(output.squeeze().cpu().numpy()) + + depth_map = preprocessor.reconstruct_depth(array(depth_tiles)) + depth_rgb = preprocessor.depth_to_rgb(depth_map, invert=True) + + result_image = Image.fromarray(depth_rgb) + byte_io = io.BytesIO() + result_image.save(byte_io, format="PNG") + byte_io.seek(0) + return byte_io + + +@app.post("/predict_depth/", summary="Predict depth from image") +async def predict_depth(file: UploadFile = File(...)) -> StreamingResponse: + """Function to generate depth from image given + + Args: + file (UploadFile, optional): image file that is uploaded. + Defaults to File(...). + + Raises: + HTTPException: image format is not supported + HTTPException: when there is an error processing the image + + Returns: + StreamingResponse: response with processed image + """ + if not file.content_type.startswith("image/"): + raise HTTPException(status_code=400, detail="Invalid image format") + try: + contents = await file.read() + image_bytes = process_image(contents, model, preprocessor) + return StreamingResponse(image_bytes, media_type="image/png") + except Exception: + raise HTTPException(status_code=500, detail="Error processing image.") + + +@app.get("/", summary="Health check") +def read_root() -> dict: + """Health check function + + Returns: + dict: dict of the status + """ + return {"status": "healthy"} diff --git a/README.md b/README.md index fdf58bcafa..b34019471b 100644 --- a/README.md +++ b/README.md @@ -1,77 +1,68 @@ -# Applied ML Template 🛠️ +# Applied ML Team 16 -**Welcome to Applied Machine Learning!** This template is designed to streamline the development process and boost the quality of your code. +Welcome to our repo for our project for Applied machine learning project. -Before getting started with your projects, we encourage you to carefully read the sections below and familiarise yourselves with the proposed tools. +Our project is to train an model to approximate depth data from RGB images in a tilling pattern. ## Prerequisites Make sure you have the following software and tools installed: -- **PyCharm**: We recommend using PyCharm as your IDE, since it offers a highly tailored experience for Python development. You can get a free student license [here](https://www.jetbrains.com/community/education/#students/). - -- **Pipenv**: Pipenv is used for dependency management. This tools enables users to easily create and manage virtual environments. To install Pipenv, use the following command: - ```bash - $ pip install --user pipenv - ``` - For detailed installation instructions, [click here](https://pipenv.pypa.io/en/latest/installation.html). - -- **Git LFS**: Instead of committing large files to your repository, you should store and manage them using Git LFS. For installation information, [click here](https://github.com/git-lfs/git-lfs?utm_source=gitlfs_site&utm_medium=installation_link&utm_campaign=gitlfs#installing). +- **Conda**: Conda is used for dependency management. This tools is a standard for a lot of machine learning libraries and has support for pip packages as well. "conda install --yes --file conda_requirements.txt" +- **python 3.11**: a tested version of python that this repository works in. ## Getting Started -### Setting up your own repository -1. Fork this repository. -2. Clone your fork locally. -3. Configure a remote pointing to the upstream repository to sync changes between your fork and the original repository. - ```bash - git remote add upstream https://github.com/ivopascal/Applied-ML-Template - ``` - **Don't skip this step.** We might update the original repository, so you should be able to easily pull our changes. - - To update your forked repo follow these steps: - 1. `git fetch upstream` - 2. `git rebase upstream/main` - 3. `git push origin main` - - Sometimes you may need to use `git push --force origin main`. Only use this flag the first time you push after you rebased, and be careful as you might overwrite your teammates' changes. -### Git LFS -1. Set it up for your user account (only once, not each time you want to use it). - ```bash - git lfs install - ``` -2. Select the files that Git LFS should manage. To track all files of a certain type, you can use a wildcard as in the command below. - ```bash - git lfs track "*.psd" - ``` -3. Add _.gitattributes_ to the staging area. - ```bash - git add .gitattributes - ``` -That's all, you can commit and push as always. The tracked files will be automatically stored with Git LFS. - -### Pipenv -This tool is incredibly easy to use. Let's **install** our first package, which you will all need in your projects. - +### general +1. Clone this repository. +2. Get the training and validation data and place in the data folder in the project_name folder. + * Data used to train our model + 1. download from google drive. [drive](https://drive.google.com/drive/folders/1k_WsVOjaULgb3N2JebxjTVqJjVsw85dP) + * original data + 1. download from original data. [website](https://diode-dataset.org/) + 2. run subset_maker.py with amount sample and "val or "train" to get workable data folder. +3. Create conda environment with python 3.11 +4. Install the packages from "conda_requirements.txt" using the command under. ```bash -pipenv install pre-commit +conda install --yes --file conda_requirements.txt ``` +5. Install pytorch using the command from their [website](https://pytorch.org/). +6. Download the model from the release page and place it in the root folder of the repository -After running this command, you will notice that two files were created, namely, _Pipfile_ and _Pipfile.lock_. _Pipfile_ is the configuration file that specifies all the dependencies in your virtual environment. +### Train and validate +Both the commands below should be ran in the root folder of the repository -To **uninstall** a package, you can run the command: +* #### Train ```bash -pipenv uninstall +python main.py --epochs (amount epoch) --batch-size (batch size) --lr (learning rate) --freeze (amount before freeze) cnn ``` -To **activate** the virtual environment, run `pipenv shell`. You can now use the environment as you wish. To **deactivate** the environment run the command `exit`. +* #### Validate +```bash +python main.py evaluate (model file name with extension) --batch-size (batch size) +``` -If you **already have access to a Pipfile**, you can install the dependencies using `pipenv install`. +### API +1. Run the following command in the root directory of the repository. +```bash +uvicorn FastAPI:app --reload +``` +2. In a new tab in the terminal, run the following, replacing image_path.jpg with the path and file name for the input image: +``` +curl -X 'POST' \ + 'http://127.0.0.1:8000/predict_depth/' \ + -H 'accept: application/json' \ + -H 'Content-Type: multipart/form-data' \ + -F 'file=@image_path.jpg;type=image/jpeg' +-- output output.png +``` -For a comprehensive list of commands, consult the [official documentation](https://pipenv.pypa.io/en/latest/cli.html). +### Streamlit +1. Run the following command in the root directory of the repository. +```bash +streamlit run streamlit_main.py" when in the main folder of repository. +``` +2. Follow instruction on the web demo. ### Unit testing -You are expected to test your code using unit testing, which is a technique where small individual components of your code are tested in isolation. - -An **example** is given in _tests/test_main.py_, which uses the standard _unittest_ Python module to test whether the function _hello_world_ from _main.py_ works as expected. To run all the tests developed using _unittest_, simply use: ```bash @@ -82,45 +73,36 @@ If you wish to see additional details, run it in verbose mode: python -m unittest discover -v tests ``` -### Pre-commit -Another good coding practice is using pre-commit hooks. This is used to inspect the code before committing to ensure it matches your standards. - -In this course, we will be using two hooks (already configured in _.pre-commit-config.yaml_): -- Unit testing -- Flake8 (checks your code for errors, styling issues and complexity) - -Since we have already configured the hooks, all you need to do is run: +repository map: ```bash -pre-commit install -``` -Now `pre-commit` will automatically run whenever you want to commit something to the repository. - -## Get Coding -You are now ready to start working on your projects. - -We recommend following the same folder structure as in the original repository. This will make it easier for you to have cleaner and consistent code, and easier for us to follow your progress and help you. - -Your repository should look something like this: -```bash -├───data # Stores .csv -├───models # Stores .pkl -├───notebooks # Contains experimental .ipynbs +├───.github +│ └────workflows +│ └──── style.yml ├───project_name -│ ├───data # For data processing, not storing .csv -│ ├───features -│ └───models # For model creation, not storing .pkl -├───reports +│ ├───data +│ │ ├───val_subset # only there when validating +│ │ ├───train_subset # only there when training +│ │ ├───data_loader.py +│ │ ├───data_test.py +│ │ ├───path_grapper.py +│ │ └───subset_maker.py +│ ├───models +│ │ ├───cnn.py +│ │ └───Preprocessing_class.py +│ └───Training +│ ├───Evaluation +│ │ ├───evaluate.py +│ │ └───validation.py +│ └───model_trainer.py ├───tests │ ├───data │ ├───features │ └───models ├───.gitignore -├───.pre-commit-config.yaml +├───cnn_best.pth # download from release page ├───main.py -├───train_model.py -├───Pipfile -├───Pipfile.lock -├───README.md +├───streamlit_main.py +├───FastAPI.py +├───conda_requirements.txt +└───README.md ``` - -**Good luck and happy coding! 🚀** \ No newline at end of file diff --git a/cnn_best.pth b/cnn_best.pth new file mode 100644 index 0000000000..0a3aad194e Binary files /dev/null and b/cnn_best.pth differ diff --git a/conda_requirements.txt b/conda_requirements.txt new file mode 100644 index 0000000000..6733dbb85a Binary files /dev/null and b/conda_requirements.txt differ diff --git a/main.py b/main.py index e6e9af30ea..e98dde7daf 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,49 @@ -# This is a sample Python script. +import sys +from pathlib import Path +import argparse +from project_name.Training.model_trainer import train_cnn +from project_name.Training.Evaluation.evaluate import run_evaluation -def hello_world(): - return "Hello, World!" +project_dir = Path(__file__).parent.resolve() +sys.path.insert(0, str(project_dir)) -if __name__ == '__main__': - hello_world() +def main() -> None: + """Main function to run or validate model. + """ + parser = argparse.ArgumentParser("Depth Estimation Pipeline") + subs = parser.add_subparsers(dest="cmd", required=True) + + # train + sub1 = subs.add_parser("train") + sub1.add_argument("model", choices=["cnn"], help="Which model to train") + sub1.add_argument("--epochs", "-e", type=int, default=20) + sub1.add_argument("--batch-size", "-b", type=int, default=8) + sub1.add_argument("--lr", type=float, default=1e-4) + sub1.add_argument("--freeze", type=int, default=5) + # python main.py train --epochs 20 --batch-size 8 --lr 1e-4 --freeze 5 cnn + + # evaluate + sub2 = subs.add_parser("evaluate") + sub2.add_argument("checkpoint", help="Path to .pth file") + sub2.add_argument("--batch-size", "-b", type=int, default=8) + # python main.py evaluate cnn_best.pth --batch-size 8 + + args = parser.parse_args() + + if args.cmd == "train": + train_cnn( + epochs=args.epochs, + batch_size=args.batch_size, + lr=args.lr, + freeze_epochs=args.freeze + ) + else: + run_evaluation( + checkpoint=args.checkpoint, + batch_size=args.batch_size + ) + + +if __name__ == "__main__": + main() diff --git a/project_name/Training/Evaluation/evaluate.py b/project_name/Training/Evaluation/evaluate.py new file mode 100644 index 0000000000..746356da46 --- /dev/null +++ b/project_name/Training/Evaluation/evaluate.py @@ -0,0 +1,71 @@ +import sys +from pathlib import Path + +import numpy as np +import torch +from torch.utils.data import DataLoader + +from ...models.cnn import CNNBackbone +from ...Training.model_trainer import CNNDataset +from ..Evaluation.validation import validate_model + +# Add project src to path +project_dir = Path(__file__).parent.parent +sys.path.insert(0, str(project_dir / 'src')) + + +def run_evaluation(checkpoint: str, batch_size: int) -> None: + """ + Evaluate CNN model against a mean-depth baseline on the validation set. + + Args: + checkpoint: Path to model checkpoint. + batch_size: Batch size for DataLoader. + """ + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + # Load CNN model + model = CNNBackbone(pretrained=False).to(device) + model.load_state_dict(torch.load(checkpoint, map_location=device)) + model.eval() + + # Prepare validation dataset and loader + val_ds = CNNDataset('val_subset') + loader = DataLoader(val_ds, batch_size=1, shuffle=False, num_workers=4) + + # Compute global mean ground-truth depth + all_gt = [] + for _, y in loader: + all_gt.append(y.numpy().ravel()) + mean_depth = np.concatenate(all_gt).mean() + + # Mean-depth predictor + class MeanPredictor(torch.nn.Module): + def __init__(self, m: float) -> None: + super().__init__() + self.m = torch.tensor(m, dtype=torch.float32) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + batch, _, height, width = x.shape + return self.m.expand(batch, 1, height, width) + + baseline = MeanPredictor(mean_depth).to(device) + + # Run validation + metrics_base = validate_model(baseline, val_ds, batch_size, device) + metrics_cnn = validate_model(model, val_ds, batch_size, device) + + # Print results + print("\nBaseline (mean-depth) metrics:") + for name, value in metrics_base.items(): + if name.startswith('delta') or name.startswith('δ'): + print(f" {name:6} : {value * 100:5.1f}%") + else: + print(f" {name:6} : {value:7.4f}") + + print("\nCNN-backbone metrics:") + for name, value in metrics_cnn.items(): + if name.startswith('delta') or name.startswith('δ'): + print(f" {name:6} : {value * 100:5.1f}%") + else: + print(f" {name:6} : {value:7.4f}") diff --git a/project_name/Training/Evaluation/validation.py b/project_name/Training/Evaluation/validation.py new file mode 100644 index 0000000000..76b1252257 --- /dev/null +++ b/project_name/Training/Evaluation/validation.py @@ -0,0 +1,86 @@ +import numpy as np +import torch +from torch.utils.data import DataLoader +from typing import Dict + + +def validate_model( + model: torch.nn.Module, + dataset: torch.utils.data.Dataset, + batch_size: int, + device: torch.device, + loss_fn: torch.nn.Module = torch.nn.MSELoss(), + min_depth: float = 1e-3, +) -> Dict[str, float]: + """ + Evaluate depth estimation model on dataset. + + Returns metrics: mse, rmse, mae, absrel, delta thresholds. + """ + model.eval() + loader = DataLoader( + dataset, + batch_size=batch_size, + shuffle=False, + num_workers=4, + ) + + total_mse = 0.0 + total_mae = 0.0 + total_absrel = 0.0 + total_delta1 = 0.0 + total_delta2 = 0.0 + total_delta3 = 0.0 + n_batches = 0 + + with torch.no_grad(): + for x, y in loader: + x, y = x.to(device), y.to(device) + pred = model(x) + + # Basic losses + mse = loss_fn(pred, y).item() + total_mse += mse + + pred_np = pred.cpu().numpy() + y_np = y.cpu().numpy() + + abs_err = np.abs(pred_np - y_np) + total_mae += abs_err.mean() + + # Mask invalid / zero depths + valid = y_np > min_depth + + # Absolute relative error on valid pixels + absrel_map = abs_err / (y_np + 1e-6) + total_absrel += absrel_map[valid].mean() + + # Threshold accuracies + ratio = np.maximum( + pred_np / (y_np + 1e-6), + y_np / (pred_np + 1e-6), + ) + total_delta1 += np.mean(ratio[valid] < 1.25) + total_delta2 += np.mean(ratio[valid] < 1.25 ** 2) + total_delta3 += np.mean(ratio[valid] < 1.25 ** 3) + + n_batches += 1 + + # Compute averages + avg_mse = total_mse / n_batches + rmse = np.sqrt(avg_mse) + mae = total_mae / n_batches + absrel = total_absrel / n_batches + delta1 = total_delta1 / n_batches + delta2 = total_delta2 / n_batches + delta3 = total_delta3 / n_batches + + return { + "mse": avg_mse, + "rmse": rmse, + "mae": mae, + "absrel": absrel, + "delta1": delta1, + "delta2": delta2, + "delta3": delta3, + } diff --git a/project_name/Training/__init__.py b/project_name/Training/__init__.py new file mode 100644 index 0000000000..2646965266 --- /dev/null +++ b/project_name/Training/__init__.py @@ -0,0 +1 @@ +"""make the function callable like modules""" diff --git a/project_name/Training/model_trainer.py b/project_name/Training/model_trainer.py new file mode 100644 index 0000000000..c52e739cfb --- /dev/null +++ b/project_name/Training/model_trainer.py @@ -0,0 +1,171 @@ +from pathlib import Path + +import numpy as np +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader, Dataset +from PIL import Image +from typing import Tuple + +from ..models.cnn import CNNBackbone +from ..models.Preprocessing_class import Preprocessing + +# Locate your Data folder +BASE_DIR = Path(__file__).parent.parent +DATA_DIR = BASE_DIR / "data" + + +class CNNDataset(Dataset): + """Class to load and preprocess images and depth maps for CNN training.""" + def __init__( + self, + split: str, + tile_size: Tuple[int, int] = (256, 256)) -> None: + """ + split: 'train_subset' or 'val_subset' folder under Data/ + """ + folder = DATA_DIR / split + self.samples = sorted(d for d in folder.iterdir() if d.is_dir()) + preprocessing = Preprocessing(tile_size) + self.tileer = preprocessing.tile_with_padding + self.normalizer = preprocessing.normalize + self.tile_h, self.tile_w = tile_size + + def __len__(self) -> int: + """Returns the number of samples in the dataset.""" + return len(self.samples) + + def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]: + """returns a tuple (image, depth) for the given index.""" + sample_dir = self.samples[idx] + image_path = next(sample_dir.glob("*.png")) + depth_path = next(sample_dir.glob("*_depth.npy")) + + image = np.array(Image.open(image_path)) + depth = np.load(depth_path).astype(np.float32) + + # 1. Tile & normalize image as before + img_tile = self.tileer(image)[0] + + # 2. Normalize depth to [0,1] + depth_norm = self.normalizer(depth) + + # Print debugging information for shape + # print(f"Depth shape before normalization: {depth.shape}") + # print(f"Depth shape after normalization: {depth_norm.shape}") + + # Ensure depth_norm is 2D + if depth_norm.ndim > 2: + depth_norm = np.squeeze(depth_norm) + if depth_norm.ndim > 2: + depth_norm = depth_norm[:, :, 0] + + h, w = depth_norm.shape + + # 3. Pad depth so dims % tile == 0 + pad_h = (self.tile_h - (h % self.tile_h)) % self.tile_h + pad_w = (self.tile_w - (w % self.tile_w)) % self.tile_w + + depth_padded = np.pad( + depth_norm, + ((0, pad_h), (0, pad_w)), + mode="constant", + ) + + # 4. Extract first tile + depth_tile = depth_padded[: self.tile_h, : self.tile_w] + + # 5. Convert to tensors + x = torch.from_numpy(img_tile).permute(2, 0, 1).float() + y = torch.from_numpy(depth_tile).unsqueeze(0).float() + + return x, y + + +def train_cnn( + epochs: int, + batch_size: int, + lr: float, + freeze_epochs: int +) -> None: + """Method to train the CNN model.""" + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print( + f"Training CNN: epochs={epochs}, " + f"bs={batch_size}, lr={lr}, freeze={freeze_epochs}" + ) + + model = CNNBackbone(pretrained=True).to(device) + + # Freeze backbone initially + for param in model.backbone.parameters(): + param.requires_grad = False + + optimizer = optim.Adam( + [ + {"params": model.head.parameters(), "lr": lr}, + {"params": model.backbone.parameters(), "lr": lr * 0.1}, + ] + ) + loss_fn = nn.MSELoss() + + train_ds = CNNDataset("train_subset") + val_ds = CNNDataset("val_subset") + train_dl = DataLoader( + train_ds, + batch_size=batch_size, + shuffle=True, + num_workers=4, + ) + val_dl = DataLoader( + val_ds, + batch_size=batch_size, + shuffle=False, + num_workers=4, + ) + + best_val = float("inf") + best_ckpt = None + + for epoch in range(1, epochs + 1): + if epoch == freeze_epochs + 1: + for param in model.backbone.parameters(): + param.requires_grad = True + print("Backbone unfrozen") + + # Training + model.train() + total_loss = 0.0 + for xb, yb in train_dl: + xb, yb = xb.to(device), yb.to(device) + optimizer.zero_grad() + loss = loss_fn(model(xb), yb) + loss.backward() + optimizer.step() + total_loss += loss.item() + train_loss = total_loss / len(train_dl) + + # Validation + model.eval() + total_val_loss = 0.0 + with torch.no_grad(): + for xb, yb in val_dl: + xb, yb = xb.to(device), yb.to(device) + total_val_loss += loss_fn(model(xb), yb).item() + val_loss = total_val_loss / len(val_dl) + + print( + f"Epoch {epoch}/{epochs} " + f"train={train_loss:.4f} val={val_loss:.4f}" + ) + + if val_loss < best_val: + best_val = val_loss + best_ckpt = model.state_dict().copy() + + # Restore best & save + model.load_state_dict(best_ckpt) + output_path = BASE_DIR / "cnn_best.pth" + torch.save(model.state_dict(), output_path) + print(f"Saved best model to {output_path}") diff --git a/project_name/__init__.py b/project_name/__init__.py index e69de29bb2..2646965266 100644 --- a/project_name/__init__.py +++ b/project_name/__init__.py @@ -0,0 +1 @@ +"""make the function callable like modules""" diff --git a/project_name/data/data_loader.py b/project_name/data/data_loader.py new file mode 100644 index 0000000000..5da9565cdc --- /dev/null +++ b/project_name/data/data_loader.py @@ -0,0 +1,75 @@ +import numpy as np +import os +from PIL import Image +from typing import Literal + + +class DataLoader: + """ + Data loader class for loading data from the subsetted data. + """ + def __init__(self, folder: Literal["train", "Val"]) -> None: + """Initialise the class + + Args: + folder (Literal["train", "Val"]): + which subset folder to get the data from. + """ + self.folder = folder + "_subset" + file_directory = __file__ + data_directory = os.path.join(os.path.split(file_directory)[0], + self.folder) + self.file_endings = [".png", "_depth.npy"] + + self.data_paths = [os.path.join(data_directory, _) for _ in + os.listdir(data_directory)] + + self.data_index = 0 + + def increment_index(self) -> None: + """Increments the index of the data path list. + makes sure it doesn't go over the index limit. + """ + self.data_index += 1 + self.data_index %= len(self.data_paths) + + def get_data(self) -> list[np.ndarray]: + """Gets the image and depth data from the list of subset data. + + Returns: + list[np.ndarray]: a size 2 list of image data and then depth data. + """ + current_data_path = self.data_paths[self.data_index] + self.increment_index() + + data = [os.path.join(current_data_path, _) for _ in + os.listdir(current_data_path)] + + return_list: list[np.ndarray] = [] + for file in data: + if file.endswith(self.file_endings[0]): + image_data = np.asarray(Image.open(file)) + if len(return_list) > 0: + return_list.insert(0, image_data) + else: + return_list.append(image_data) + elif file.endswith(self.file_endings[1]): + depth_data = np.load(file) + return_list.append(depth_data) + + return return_list + + +def main() -> None: + """ + simple test and run data loader class + """ + data_load = DataLoader("val") + + data = data_load.get_data() + + print([_.shape for _ in data]) + + +if __name__ == '__main__': + main() diff --git a/project_name/data/data_test.py b/project_name/data/data_test.py new file mode 100644 index 0000000000..69be2e56c1 --- /dev/null +++ b/project_name/data/data_test.py @@ -0,0 +1,69 @@ +import matplotlib.pyplot as plt +from numpy import array, ndarray, load, concatenate +from scipy.stats import normaltest + + +multithread_data_test_output: list[tuple[str, ndarray]] = [] + + +def test_dataset_normality(data: list[str], name: str) -> None: + """Gets the data from given list of data point and plots a histogram and + normality on them. + + Args: + data (list[str]): list of path names to data points + name (str): what name to give the plot + """ + whole_data = array([]) + max_n = 0 + + for data_point in data: + matrix: ndarray = load(data_point + "_depth.npy") + max_n = max(max_n, matrix.max()) + whole_data = concatenate([whole_data, + matrix.flatten()[whole_data < 100]]) + + normality = normaltest(whole_data) + + plt.hist(whole_data) + plt.xlabel("Depth") + plt.ylabel("Frequency") + plt.title(f"{name} | norm: stat:{normality.statistic:0.2f}, " + + f"p:{normality.pvalue:0.2f}") + plt.show() + + +def threaded_make_data_array(data: list[str], name: str) -> None: + """Get the data from list of path names. This is for threaded work. + + Args: + data (list[str]): list of path names + name (str): what name to give the plot that follows. + """ + whole_data = array([]) + max_n = 0 + + for data_point in data: + matrix: ndarray = load(data_point + "_depth.npy") + max_n = max(max_n, matrix.max()) + whole_data = concatenate([whole_data, + matrix.flatten()[whole_data < 100]]) + + multithread_data_test_output.append((name, whole_data)) + + +def test_data(data: ndarray, name: str) -> None: + """test normality and plot the histogram + + Args: + data (ndarray): numpy array of 1 dimension of depth + name (str): name of the data + """ + normality = normaltest(data) + + plt.hist(data) + plt.xlabel("Depth") + plt.ylabel("Frequency") + plt.title(f"{name} | norm: stat:{normality.statistic:0.2f}, " + + f"p:{normality.pvalue:0.2f}") + plt.show() diff --git a/project_name/data/path_grapper.py b/project_name/data/path_grapper.py new file mode 100644 index 0000000000..c80377d71d --- /dev/null +++ b/project_name/data/path_grapper.py @@ -0,0 +1,95 @@ +import os +import threading + +path_multithread_output: list[list[str]] = [] + + +def grab_data_from_folder(main_folder: str) -> None: + """Grabs the datapoint names from the major folder given + + Args: + main_folder (str): the major folder liek 'indoor' or 'outdoor' + """ + datapoint_directories: list[str] = [main_folder] + for _ in range(3): + new_datapoint_directories = [] + for path in datapoint_directories: + sub_folders = os.listdir(path) + for folder in sub_folders: + new_datapoint_directories.append(os.path.join(path, folder)) + + # get rid of all duplicates + datapoint_directories = new_datapoint_directories + + # get rid of file extension and type description. + for ending in [".png", "_depth.npy", "_depth_mask.npy"]: + datapoint_directories = [_.removesuffix(ending) + for _ in datapoint_directories] + + path_multithread_output.append(datapoint_directories) + + +def get_all_data_path_names(data_folder: str) -> list[list[str]]: + """Gets all data point paths from train or val folders. + + Args: + data_folder (str): the main folder to + get the path names like train/val. + + Returns: + list[list[str]]: two list that contain all data point path names. + """ + file_directory = __file__ + data_directory = os.path.join(os.path.split(file_directory)[0], + data_folder) + + datapoint_directories_main = [] + + for folder in os.listdir(data_directory): + datapoint_directories_main.append(os.path.join(data_directory, + folder)) + + threads: list[threading.Thread] = [] + for main_folder in datapoint_directories_main: + threads.append(threading.Thread(target=grab_data_from_folder, + args=(main_folder, ))) + + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + + all_datapoint_folder_pathnames = path_multithread_output.copy() + path_multithread_output.clear() + + return all_datapoint_folder_pathnames + + +def get_train_data_folders() -> list[str]: + """Get the data folders path names in the subsetted data. + + Returns: + list[str]: what subset to the get like 'train' or 'val'. + """ + file_directory = __file__ + data_directory = os.path.join(os.path.split(file_directory)[0], + "subset_data") + + train_data_folders = [] + + for folder in os.listdir(data_directory): + train_data_folders.append(os.path.join(data_directory, folder)) + + return train_data_folders + + +def main() -> None: + """ + To run stand alone functions + """ + print(get_all_data_path_names("train")) + + +if __name__ == '__main__': + main() diff --git a/project_name/data/subset_maker.py b/project_name/data/subset_maker.py new file mode 100644 index 0000000000..9f587a1089 --- /dev/null +++ b/project_name/data/subset_maker.py @@ -0,0 +1,110 @@ +import os +import random +import shutil +import threading +import numpy as np +from psutil import virtual_memory +from shutil import rmtree +from data_test import (test_dataset_normality, + threaded_make_data_array, + test_data, + multithread_data_test_output) +from path_grapper import get_all_data_path_names + + +def subset_full_dataset(amount_samples: int, full_data_folder: str) -> None: + """Subset given major data folder. + + Args: + amount_samples (int): amount of sample you want in the end. + full_data_folder (str): folder name to get the data from + """ + # list all file endings. + file_directory = __file__ + data_directory = os.path.join(os.path.split(file_directory)[0], + full_data_folder) + + print("getting data points") + all_data_points_folder_path_names = get_all_data_path_names( + full_data_folder) + + print("sampling data points") + amount_sample_per_data_folder = (amount_samples // + len(all_data_points_folder_path_names)) + selected_data: list[list[str]] = [ + random.sample(main_folder, amount_sample_per_data_folder) for + main_folder in all_data_points_folder_path_names] + + print("flattening data points") + flatten_selected_data: list[str] = [] + + for foldered_data in selected_data: + flatten_selected_data += foldered_data + + print("running normality tests") + # test normality. + # check if there is enough memory to do the tests. + if virtual_memory().total > 1600000000: + threads: list[threading.Thread] = [] + # inti threads with data. + for folder in selected_data: + threads.append(threading.Thread(target=threaded_make_data_array, + args=(folder, "sub folder",))) + + threads.append(threading.Thread(target=threaded_make_data_array, + args=(flatten_selected_data, + "whole data",))) + + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + + for thread_output in multithread_data_test_output: + name, data = thread_output + + test_data(data, name) + + test_data(np.concat([_[1] for _ in multithread_data_test_output]), + "whole data") + else: + for folder in selected_data: + test_dataset_normality(folder, "sub folder") + + test_dataset_normality(flatten_selected_data, "whole selected data") + + print("copying over data to subset data") + output_data_directory = data_directory + "_subset" + try: + os.mkdir(output_data_directory) + print(f"made data folder {full_data_folder + '_subset'}") + except FileExistsError: + # carfull will delete any path given + rmtree(output_data_directory) + os.mkdir(output_data_directory) + + # copy over the subset data in their own folders. + for index, data_path in enumerate(flatten_selected_data): + data_point_folder = os.path.join(output_data_directory, str(index)) + try: + os.mkdir(data_point_folder) + print(f"made data point {index}") + except FileExistsError: + # carfull will delete any path given + rmtree(data_point_folder) + os.mkdir(data_point_folder) + + for endings in [".png", "_depth.npy", "_depth_mask.npy"]: + shutil.copy(data_path + endings, data_point_folder) + + +def main() -> None: + """ + This main to to run the data subset maker on it's own + """ + subset_full_dataset(500, "train") + + +if __name__ == '__main__': + main() diff --git a/__init__.py b/project_name/features/temp.py similarity index 100% rename from __init__.py rename to project_name/features/temp.py diff --git a/project_name/models/Preprocessing_class.py b/project_name/models/Preprocessing_class.py new file mode 100644 index 0000000000..7a44dfe8ed --- /dev/null +++ b/project_name/models/Preprocessing_class.py @@ -0,0 +1,197 @@ +import numpy as np +import matplotlib.pyplot as plt +from typing import Tuple, Union +from PIL import Image +import torch +from io import BytesIO + + +class Preprocessing: + """ + Preprocessing class for preprocessing image and depth data. + """ + def __init__(self, tile_size: Tuple[int, int] = (256, 256)) -> None: + """ + Initialize the Preprocessing class with a tile size. + """ + self.tile_size = tile_size + self.last_padding_info: dict[int, dict] = {} + + def load_image(self, img: Union[str, Image.Image, BytesIO]) -> np.ndarray: + """ + Load a PIL image, a path to image, or a BytesIO stream, and + convert to numpy array (uint8). + """ + if isinstance(img, str): + img = Image.open(img) + elif isinstance(img, BytesIO): + img = Image.open(img) + if not isinstance(img, Image.Image): + raise TypeError("Input must be a PIL Image, " + "a BytesIO stream, or a path to one.") + + return np.array(img.convert("RGB")) + + def is_8_bit(self, np_array: np.ndarray) -> bool: + """ + Check if a numpy array is of type uint8. + """ + return np_array.dtype == np.uint8 + + def normalize(self, np_array: np.ndarray) -> np.ndarray: + """ + Normalize a numpy array to [0, 1]. + """ + if self.is_8_bit(np_array): + normalized = np_array.astype(np.float32) / 255.0 + else: + # Handle other types if necessary + normalized = (np_array / + np.finfo(np_array.dtype).max).astype(np.float32) + + # Apply ImageNet mean and std (as used during training) + mean = np.array([0.485, 0.456, 0.406]) + std = np.array([0.229, 0.224, 0.225]) + normalized = (normalized - mean) / std + return normalized + + def to_tensor(self, np_image: np.ndarray) -> torch.Tensor: + """ + Convert a numpy image (H, W, C) or (H, W) to a + PyTorch tensor (C, H, W) or (1, H, W). + Normalizes to float32. + """ + norm = self.normalize(np_image) + + if norm.ndim == 2: # grayscale + return torch.from_numpy(norm).unsqueeze(0).float() + elif norm.ndim == 3: # RGB + return torch.from_numpy(norm).permute(2, 0, 1).float() + else: + raise ValueError("Invalid input shape for tensor conversion") + + def to_numpy(self, tensor: torch.Tensor) -> np.ndarray: + """ + Convert a PyTorch tensor (C, H, W) or + (1, H, W) to a numpy array (H, W, C) or (H, W). + Assumes tensor is already on CPU and detached. + """ + if tensor.ndim == 3: + if tensor.shape[0] == 1: + return tensor.squeeze(0).numpy() + return tensor.permute(1, 2, 0).numpy() + elif tensor.ndim == 2: + return tensor.numpy() + else: + raise ValueError("Invalid tensor shape for numpy conversion") + + def tile_with_padding(self, + np_arrays: list[np.ndarray], + pad_mode: str = 'constant') -> np.ndarray: + """Split input array into tiles and pad if part is smaller than + the tile + + Args: + np_arrays (list[np.ndarray]): ndarray of image + pad_mode (str, optional): mode of padding. Defaults to 'constant'. + + Raises: + TypeError: if given type is not a ndarray + + Returns: + _type_: array of tiles + """ + if not isinstance(np_arrays, (list, tuple)): + np_arrays = [np_arrays] + + all_tiles = [] + for idx, np_array in enumerate(np_arrays): + if not isinstance(np_array, np.ndarray): + raise TypeError("Input must be numpy array") + original_shape = np_array.shape + tile_h, tile_w = self.tile_size + h, w = original_shape[:2] + + # Store padding info before any processing + self.last_padding_info[idx] = { + 'original_shape': original_shape, + 'pad_h': (tile_h - (h % tile_h)) % tile_h, + 'pad_w': (tile_w - (w % tile_w)) % tile_w, + 'is_grayscale': len(original_shape) == 2 + } + pad_h = self.last_padding_info[idx]['pad_h'] + pad_w = self.last_padding_info[idx]['pad_w'] + + # Pad before normalization + if len(original_shape) == 3: + pad_width = ((0, pad_h), (0, pad_w), (0, 0)) + else: + pad_width = ((0, pad_h), (0, pad_w)) + + padded = np.pad(np_array, pad_width, mode=pad_mode) + tiles = [] + for i in range(0, padded.shape[0], tile_h): + for j in range(0, padded.shape[1], tile_w): + tile = padded[i:i + tile_h, j:j + tile_w] + if tile.shape[:2] != (tile_h, tile_w): + tile = np.pad(tile, + ((0, tile_h - tile.shape[0]), + (0, tile_w - tile.shape[1])), + mode=pad_mode) + tiles.append(tile) + + all_tiles.extend(tiles) + + return np.array(all_tiles) + + def reconstruct_depth(self, depth_tiles: np.ndarray, + original_idx: int = 0) -> np.ndarray: + """ + Special reconstruction for 1-channel depth outputs + """ + info = self.last_padding_info[original_idx] + h, w = info['original_shape'][:2] + tile_h, tile_w = self.tile_size + + # Create padded canvas + padded_h = h + info['pad_h'] + padded_w = w + info['pad_w'] + reconstructed = np.zeros((padded_h, padded_w), dtype=np.float32) + cols = padded_w // tile_w + rows = padded_h // tile_h + + # Reconstruct depth map + for i in range(rows): + for j in range(cols): + idx = i * cols + j + if idx >= len(depth_tiles): + break + y_start = i * tile_h + y_end = y_start + tile_h + x_start = j * tile_w + x_end = x_start + tile_w + tile = depth_tiles[idx] + actual_h = min(tile_h, padded_h - y_start) + actual_w = min(tile_w, padded_w - x_start) + reconstructed[y_start:y_end, x_start:x_end] =\ + tile[:actual_h, :actual_w] + + # Crop to original dimensions + return reconstructed[:h, :w] + + def depth_to_rgb(self, + depth_map: np.ndarray, + cmap: str = 'plasma', + invert: bool = False) -> np.ndarray: + """ + Depth map to RGB. + """ + # Normalize based on percentiles (robust to outliers) + p1, p99 = np.percentile(depth_map, [1, 99]) + scaled = np.clip((depth_map - p1) / (p99 - p1), 0, 1) + + if invert: + scaled = 1 - scaled + + cmap = plt.get_cmap(cmap) + return (cmap(scaled)[..., :3] * 255).astype(np.uint8) diff --git a/project_name/models/__init__.py b/project_name/models/__init__.py index e69de29bb2..2646965266 100644 --- a/project_name/models/__init__.py +++ b/project_name/models/__init__.py @@ -0,0 +1 @@ +"""make the function callable like modules""" diff --git a/project_name/models/baseline/ZoeDepth_NK/data_loader.py b/project_name/models/baseline/ZoeDepth_NK/data_loader.py new file mode 100644 index 0000000000..350de5580b --- /dev/null +++ b/project_name/models/baseline/ZoeDepth_NK/data_loader.py @@ -0,0 +1,45 @@ +import torch # type: ignore +from torchvision.transforms import functional as TF # type: ignore +from typing import Tuple +from torch.utils.data import Dataset # type: ignore +from data.data_loader import DataLoader as OriginalDataLoader + + +class ZoeDepthDataset(Dataset): + """Zoedepth dataloader + + Args: + Dataset (Dataset): Base dataset object + """ + def __init__(self, split: str) -> None: + """Init dataset""" + if split.lower() not in {"train", "val"}: + raise ValueError("Split must be 'train' or 'val'") + self.loader = OriginalDataLoader(split.lower()) + self.total_samples = len(self.loader.data_paths) + + def __len__(self) -> int: + """get amount samples""" + return self.total_samples + + def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]: + """Get Item magic method""" + self.loader.data_index = idx + image_np, depth_np = self.loader.get_data() + + image = torch.from_numpy(image_np).permute(2, 0, 1).float() / 255.0 + + depth_np = depth_np.squeeze(-1) + + depth = torch.from_numpy(depth_np).float() + depth = depth.unsqueeze(0) + depth = TF.resize( + depth, + [384, 512], + interpolation=TF.InterpolationMode.BILINEAR + ) + depth = depth.squeeze(0) + + depth = depth.unsqueeze(0) + + return image, depth diff --git a/project_name/models/baseline/ZoeDepth_NK/eval.py b/project_name/models/baseline/ZoeDepth_NK/eval.py new file mode 100644 index 0000000000..dca1d8160c --- /dev/null +++ b/project_name/models/baseline/ZoeDepth_NK/eval.py @@ -0,0 +1,134 @@ +import time +import numpy as np +import torch # type: ignore +import torch.nn.functional as F # type: ignore +import os +from PIL import Image +import cv2 # type: ignore + +from typing import type_check_only + +if type_check_only: + from torch.utils.data import Dataset + import torch.nn as nn + + +def save_prediction_images(img_tensor: torch.Tensor, + pred_tensor: torch.Tensor, + index: int, + save_dir: str = "pred") -> None: + """Save the prediction""" + + os.makedirs(save_dir, exist_ok=True) + + img_norm = normalize(img_tensor) + pred_norm = normalize(pred_tensor.squeeze().cpu()) + + input_np = (img_norm.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8) + + pred_np = (pred_norm.cpu().numpy() * 255).astype(np.uint8) + + h, w = input_np.shape[:2] + pred_np_resized = cv2.resize(pred_np, (w, h), cv2.INTER_LINEAR) + + if input_np.shape[2] == 1: + input_np = np.repeat(input_np, 3, axis=2) + + pred_img_3ch = np.stack([pred_np_resized] * 3, axis=2) + + combined_img = np.concatenate((input_np, pred_img_3ch), axis=1) + + combined_img_pil = Image.fromarray(combined_img) + combined_img_pil.save(os.path.join(save_dir, f"combined_{index}.png")) + + +def normalize(img: torch.Tensor) -> torch.Tensor: + """Normalize array for visualization.""" + if isinstance(img, torch.Tensor): + img_min = img.min() + img_max = img.max() + return (img - img_min) / (img_max - img_min + 1e-8) + else: + img_min = np.min(img) + img_max = np.max(img) + return (img - img_min) / (img_max - img_min + 1e-8) + + +def evaluate_zoedepth_model(model: nn.module, + dataloader: Dataset, + device: torch.device) -> dict: + """Evaluate model + + Args: + model (model): the model to be evaluated + dataloader (dataloader with data):data loader with the data to use + device (torch.device): device to run the model on + + Returns: + dict: the eval results + """ + model.eval() + model.to(device) + + maes, rmses, mses, absrels = [], [], [], [] + delta1s, delta2s, delta3s = [], [], [] + times = [] + + with torch.no_grad(): + for i, (images, depths_gt) in enumerate(dataloader): + print(f"\n--- Batch {i} ---") + + images = images.to(device) + depths_gt = depths_gt.to(device) + + start = time.time() + preds = model(images) + times.append(time.time() - start) + + if isinstance(preds, dict): + preds = preds["metric_depth"] + elif isinstance(preds, (list, tuple)): + preds = preds[0] + + if preds.shape[-2:] != depths_gt.shape[-2:]: + i, s = F.interpolate, depths_gt.shape[-2:] + preds = i(preds, s, "bilinear", False) + + preds_np = preds.cpu().numpy().reshape(-1) + gt_np = depths_gt.cpu().numpy().reshape(-1) + + valid_mask = (gt_np > 0) & (preds_np > 0) + gt_np = gt_np[valid_mask] + preds_np = preds_np[valid_mask] + + mae = np.mean(np.abs(gt_np - preds_np)) + mse = np.mean((gt_np - preds_np) ** 2) + rmse = np.sqrt(mse) + absrel = np.mean(np.abs(gt_np - preds_np) / gt_np) + + thresh = np.maximum(gt_np / preds_np, preds_np / gt_np) + delta1 = np.mean(thresh < 1.25) + delta2 = np.mean(thresh < 1.25 ** 2) + delta3 = np.mean(thresh < 1.25 ** 3) + + maes.append(mae) + mses.append(mse) + rmses.append(rmse) + absrels.append(absrel) + delta1s.append(delta1) + delta2s.append(delta2) + delta3s.append(delta3) + + img, pred = images[0].cpu(), preds[0].cpu() + save_prediction_images(img, pred, index=i, save_dir='save_dir') + + return { + "MAE": np.mean(maes), + "MSE": np.mean(mses), + "RMSE": np.mean(rmses), + "AbsRel": np.mean(absrels), + "Delta1": np.mean(delta1s), + "Delta2": np.mean(delta2s), + "Delta3": np.mean(delta3s), + "Inference Time (s)": np.mean(times), + } diff --git a/project_name/models/baseline/ZoeDepth_NK/main.py b/project_name/models/baseline/ZoeDepth_NK/main.py new file mode 100644 index 0000000000..5ab5f38558 --- /dev/null +++ b/project_name/models/baseline/ZoeDepth_NK/main.py @@ -0,0 +1,89 @@ +import torch # type: ignore +from torch.utils.data import DataLoader # type: ignore +from .data_loader import ZoeDepthDataset +from .eval import evaluate_zoedepth_model + + +class ZoeDepthEvaluator: + """Zoedepth model + """ + def __init__( + self, + split: str = "val", + batch_size: int = 1, + num_workers: int = 4 + ) -> None: + """init the model + + Args: + split (str, optional): mode. Defaults to "val". + batch_size (int, optional): batch size. Defaults to 1. + num_workers (int, optional): amount worker. Defaults to 4. + """ + d = torch.device("cuda"if torch.cuda.is_available() else "cpu") + self.device = d + + self.split = split + self.batch_size = batch_size + self.num_workers = num_workers + + print(f"Using device: {self.device}") + self.model = self._load_model() + self.dataloader = self._load_data() + + def _load_model(self) -> torch.nn.Module: + """load model + + Returns: + torch.nn.Module: the zoe model + """ + print("Loading ZoeDepth model from torch.hub...") + model = torch.hub.load("isl-org/ZoeDepth", "ZoeD_NK", pretrained=True) + return model + + def _load_data(self) -> DataLoader: + """load data loader + + Returns: + DataLoader: the dataloader for model + """ + print(f"Loading dataset split: {self.split}") + dataset = ZoeDepthDataset(self.split) + dataloader = DataLoader(dataset, batch_size=self.batch_size, + shuffle=False, num_workers=self.num_workers) + return dataloader + + def evaluate(self) -> dict: + """eval the model + + Returns: + dict: results + """ + print("Evaluating model...") + results = evaluate_zoedepth_model( + self.model, self.dataloader, self.device + ) + return results + + +def main() -> None: + """run and eval the model + """ + evaluator = ZoeDepthEvaluator(split="val", batch_size=1, num_workers=4) + results = evaluator.evaluate() + + print("\n--- Final Results ---") + for metric in [ + "MAE", "MSE", "RMSE", "AbsRel", "Delta1", + "Delta2", "Delta3", "Inference Time (s)" + ]: + value = results.get(metric) + try: + value_float = float(value) + print(f"{metric}: {value_float:.4f}") + except (ValueError, TypeError): + print(f"{metric}: {value}") + + +if __name__ == "__main__": + main() diff --git a/project_name/models/baseline/linear_regression/data_loader.py b/project_name/models/baseline/linear_regression/data_loader.py new file mode 100644 index 0000000000..af9de5ee5e --- /dev/null +++ b/project_name/models/baseline/linear_regression/data_loader.py @@ -0,0 +1,65 @@ +from typing import Tuple, Any +import numpy as np +import cv2 # type: ignore +from data.data_loader import DataLoader + + +class LinearRegressionDataset: + """Dataset loader for Linear Regression training & testing data.""" + + def __init__(self, + split: str, + tile_size: Tuple[int, int] = (64, 64)) -> None: + """ + Args: + split (str): "train" or "val" + tile_size (tuple): (height, width) of tiles + """ + mapped_split = "Val" if split.lower() == "val" else "train" + self.data_loader = DataLoader(mapped_split) + + self.X = [] + self.y = [] + self.original_images = [] + self.original_depth_maps = [] + + resize_dim = (384, 512) + tile_h, tile_w = tile_size + + for _ in range(len(self.data_loader.data_paths)): + image, depth = self.data_loader.get_data() + + i = cv2.INTER_AREA + image_small = cv2.resize(image, resize_dim, i) + depth_small = cv2.resize(depth, resize_dim, i) + + self.original_images.append(image_small) + self.original_depth_maps.append(depth_small) + + img_h, img_w = image_small.shape[:2] + + assert img_h % tile_h == 0 and img_w % tile_w == 0, \ + f"Image {img_h}x{img_w} not divisible by {tile_h}x{tile_w}" + + for y in range(0, img_h, tile_h): + for x in range(0, img_w, tile_w): + img_tile = image_small[y:y + tile_h, x:x + tile_w] + depth_tile = depth_small[y:y + tile_h, x:x + tile_w] + + self.X.append(img_tile.flatten()) + self.y.append(depth_tile.flatten()) + + self.X = np.array(self.X) + self.y = np.array(self.y) + + def __len__(self) -> int: + """Return the length of the dataset (number of tiles).""" + return len(self.X) + + def __getitem__(self, idx: int) -> Tuple[Any, Any]: + """Return an item at a specified index.""" + return self.X[idx], self.y[idx] + + def get_all(self) -> Tuple[np.ndarray, np.ndarray]: + """Return full dataset as (X, y) numpy arrays.""" + return self.X, self.y diff --git a/project_name/models/baseline/linear_regression/main.py b/project_name/models/baseline/linear_regression/main.py new file mode 100644 index 0000000000..a6b961272c --- /dev/null +++ b/project_name/models/baseline/linear_regression/main.py @@ -0,0 +1,175 @@ +from .model import LinearModelHandler +from .data_loader import LinearRegressionDataset + +import numpy as np +import matplotlib.pyplot as plt + + +class LinearRegressionPipeline: + """ + Pipeline to handle data loading, training, + and evaluation of a linear regression model. + """ + + def __init__(self, tile_size: tuple[int, int] = (64, 64)) -> None: + """init pipeline + + Args: + tile_size (tuple, optional): tile size. Defaults to (64, 64). + """ + self.model_handler = LinearModelHandler() + self.X_train: np.ndarray = None + self.y_train: np.ndarray = None + self.X_test: np.ndarray = None + self.y_test: np.ndarray = None + self.tile_size = tile_size + + def load_data(self) -> None: + """load data + """ + print("Loading training data...") + ts = self.tile_size + train_dataset = LinearRegressionDataset("train", tile_size=ts) + + self.X_train, self.y_train = train_dataset.get_all() + + print("Loading test data...") + test_dataset = LinearRegressionDataset("val", tile_size=self.tile_size) + self.X_test, self.y_test = test_dataset.get_all() + + def train_model(self) -> None: + """train model + """ + print("Training model...") + self.model_handler.train(self.X_train, self.y_train) + self.model_handler.save_model("trained_linear_model.pkl") + + def evaluate_model(self) -> None: + """eval model + """ + print("Evaluating model...") + ( + mse_score, rmse_score, mae_score, abs_rel, + delta1, delta2, delta3, inference_time + ) = self.model_handler.evaluate( + self.X_test, self.y_test) + print(f"MSE: {mse_score:.4f}") + print(f"RMSE: {rmse_score:.4f}") + print(f"MAE: {mae_score:.4f}") + print(f"Absolute Relative Error: {abs_rel:.4f}") + print(f"d1={delta1:.3f}, d2={delta2:.3f}, d3={delta3:.3f}") + print(f"Inference Time: {inference_time:.4f} s") + + ts = self.tile_size + self.visualize_images_and_depths(num_samples=5, tile_size=ts) + + def run(self) -> None: + """run pipeline + """ + self.load_data() + self.train_model() + self.evaluate_model() + + def load_and_evaluate_saved_model(self, model_path: str) -> None: + """Load a saved model and evaluate it on test data.""" + print("Loading test data for evaluation...") + test_dataset = LinearRegressionDataset("val", tile_size=self.tile_size) + self.X_test, self.y_test = test_dataset.get_all() + + self.model_handler.load_model(model_path) + self.evaluate_model() + + def visualize_images_and_depths(self, + num_samples: int = 5, + tile_size: tuple[int, int] = (64, 64) + ) -> None: + """predict and visualize output + + Args: + num_samples (int, optional): amount samples. Defaults to 5. + tile_size (tuple, optional): tile size. Defaults to (64, 64). + """ + if self.model_handler.model is None: + raise ValueError("Model not trained or loaded.") + if self.X_test is None or self.y_test is None: + raise ValueError("Test data not loaded.") + + y_pred = self.model_handler.predict(self.X_test) + + test_dataset = LinearRegressionDataset("val", tile_size=tile_size) + + n_images = len(test_dataset.original_images) + if num_samples > n_images: + num_samples = n_images + + sample_image_indices = np.random.choice( + n_images, num_samples, replace=False + ) + + tile_h, tile_w = tile_size + img_h, img_w = test_dataset.original_images[0].shape[:2] + tiles_per_row = img_w // tile_w + tiles_per_col = img_h // tile_h + tiles_per_image = tiles_per_row * tiles_per_col + + for img_idx in sample_image_indices: + # Tiles corresponding to this image + start_tile_idx = img_idx * tiles_per_image + end_tile_idx = start_tile_idx + tiles_per_image + + pred_tiles = y_pred[start_tile_idx:end_tile_idx] + + # Rebuild predicted depth map from tiles + pred_depth_map = np.zeros((img_h, img_w)) + tile_idx = 0 + for row in range(tiles_per_col): + for col in range(tiles_per_row): + tile_pred_flat = pred_tiles[tile_idx] + tile_pred = tile_pred_flat.reshape((tile_h, tile_w)) + y_start = row * tile_h + y_end = y_start + tile_h + x_start = col * tile_w + x_end = x_start + tile_w + + pred_depth_map[y_start:y_end, x_start:x_end] = tile_pred + tile_idx += 1 + + img = test_dataset.original_images[img_idx] + true_depth = test_dataset.original_depth_maps[img_idx] + + plt.figure(figsize=(12, 4)) + + plt.subplot(1, 3, 1) + plt.imshow(img, cmap='gray') + plt.title("Input Image") + plt.axis('off') + + plt.subplot(1, 3, 2) + plt.imshow(true_depth, cmap='inferno') + plt.title("Ground Truth Depth Map") + plt.colorbar(shrink=0.6) + plt.axis('off') + + plt.subplot(1, 3, 3) + plt.imshow(pred_depth_map, cmap='inferno') + plt.title("Predicted Depth Map") + plt.colorbar(shrink=0.6) + plt.axis('off') + + plt.tight_layout() + plt.show() + + +def main() -> None: + """main function to run + """ + pipeline = LinearRegressionPipeline(tile_size=(64, 64)) + # To train and evaluate from scratch: + # pipeline.run() + + # Or to load saved model and evaluate: + pipeline.load_and_evaluate_saved_model("trained_linear_model.pkl") + + +if __name__ == "__main__": + main() diff --git a/project_name/models/baseline/linear_regression/model.py b/project_name/models/baseline/linear_regression/model.py new file mode 100644 index 0000000000..277e5555d8 --- /dev/null +++ b/project_name/models/baseline/linear_regression/model.py @@ -0,0 +1,104 @@ +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_absolute_error, mean_squared_error +from typing import Tuple +import time +import numpy as np +import joblib + + +class LinearModelHandler: + """model handeler for linear regression""" + def __init__(self) -> None: + """Init handler + """ + self.model: LinearRegression = None + + def train(self, X_train: np.ndarray, y_train: np.ndarray) -> None: + """train model + + Args: + X_train (np.ndarray): input + y_train (np.ndarray): ground truth + """ + self.model = LinearRegression() + self.model.fit(X_train, y_train) + + def predict(self, X_test: np.ndarray) -> np.ndarray: + """predict + + Args: + X_test (np.ndarray): observation + + Returns: + np.ndarray: prediction + """ + if self.model is None: + raise ValueError("Model has not been trained yet.") + return self.model.predict(X_test) + + def evaluate( + self, + X_test: np.ndarray, + y_test: np.ndarray + ) -> Tuple[float, float, float, float, float, float, float, float]: + """eval model + + Args: + X_test (np.ndarray): observation + y_test (np.ndarray): ground truth + + Returns: + Tuple[float, float, float, float, float, float, float, float]: + results + """ + if self.model is None: + raise ValueError("Model has not been trained yet.") + + start_time = time.time() + y_pred = self.model.predict(X_test) + end_time = time.time() + + mse_score = mean_squared_error(y_test, y_pred) + rmse_score = np.sqrt(mse_score) + mae_score = mean_absolute_error(y_test, y_pred) + + # Avoid division by zero by adding a small epsilon + epsilon = 1e-8 + abs_rel = np.mean(np.abs(y_pred - y_test) / (np.abs(y_test) + epsilon)) + + # Threshold Accuracy Metrics + denom1 = y_test + epsilon + denom2 = y_pred + epsilon + ratios = np.maximum(y_pred / denom1, y_test / denom2) + + delta1 = np.mean(ratios < 1.25) + delta2 = np.mean(ratios < 1.25 ** 2) + delta3 = np.mean(ratios < 1.25 ** 3) + + inference_time = end_time - start_time + + return (mse_score, rmse_score, mae_score, + abs_rel, delta1, delta2, delta3, inference_time + ) + + def save_model(self, path: str = "trained_linear_model.pkl") -> None: + """save model + + Args: + path (str, optional): path to save. + Defaults to "trained_linear_model.pkl". + """ + if self.model is None: + raise ValueError("Model has not been trained yet.") + joblib.dump(self.model, path) + print(f"Model saved to {path}") + + def load_model(self, path: str = "trained_linear_model.pkl") -> None: + """load model + + Args: + path (str, optional): path to load model. + Defaults to "trained_linear_model.pkl". + """ + self.model = joblib.load(path) + print(f"Model loaded from {path}") diff --git a/project_name/models/cnn.py b/project_name/models/cnn.py new file mode 100644 index 0000000000..6d739c0d1a --- /dev/null +++ b/project_name/models/cnn.py @@ -0,0 +1,75 @@ +import torch.nn as nn +import torchvision.models as models +from torch import Tensor + + +class CNNBackbone(nn.Module): + """ + A ResNet-34 backbone with an upsampling head for depth estimation. + """ + + def __init__(self, pretrained: bool = True) -> None: + """initialize the CNN backbone.""" + super().__init__() + # Load ResNet-34 and remove classifier + resnet = models.resnet34(pretrained=pretrained) + modules = list(resnet.children())[:-2] + self.backbone = nn.Sequential(*modules) + + # Head + self.head = nn.Sequential( + nn.Upsample( + scale_factor=2, + mode="bilinear", + align_corners=False + ), + nn.Conv2d( + in_channels=512, + out_channels=256, + kernel_size=3, + padding=1 + ), + nn.ReLU(inplace=True), + nn.Upsample( + scale_factor=2, + mode="bilinear", + align_corners=False + ), + nn.Conv2d( + in_channels=256, + out_channels=128, + kernel_size=3, + padding=1 + ), + nn.ReLU(inplace=True), + nn.Upsample( + scale_factor=4, + mode="bilinear", + align_corners=False + ), + nn.Conv2d( + in_channels=128, + out_channels=1, + kernel_size=3, + padding=1 + ) + ) + + def forward(self, x: Tensor) -> Tensor: + """ + Forward pass through the backbone and head. + Ensures output matches input resolution. + """ + features = self.backbone(x) + output = self.head(features) + + # Resize to match input resolution if needed + if output.shape[2:] != x.shape[2:]: + output = nn.functional.interpolate( + output, + size=x.shape[2:], + mode="bilinear", + align_corners=False + ) + + return output diff --git a/project_name/save_dir/combined_0.png b/project_name/save_dir/combined_0.png new file mode 100644 index 0000000000..02d31ec932 Binary files /dev/null and b/project_name/save_dir/combined_0.png differ diff --git a/project_name/save_dir/combined_1.png b/project_name/save_dir/combined_1.png new file mode 100644 index 0000000000..e7bf48713d Binary files /dev/null and b/project_name/save_dir/combined_1.png differ diff --git a/project_name/save_dir/combined_10.png b/project_name/save_dir/combined_10.png new file mode 100644 index 0000000000..546522bad2 Binary files /dev/null and b/project_name/save_dir/combined_10.png differ diff --git a/project_name/save_dir/combined_11.png b/project_name/save_dir/combined_11.png new file mode 100644 index 0000000000..43297c9799 Binary files /dev/null and b/project_name/save_dir/combined_11.png differ diff --git a/project_name/save_dir/combined_12.png b/project_name/save_dir/combined_12.png new file mode 100644 index 0000000000..f51c82ecc0 Binary files /dev/null and b/project_name/save_dir/combined_12.png differ diff --git a/project_name/save_dir/combined_13.png b/project_name/save_dir/combined_13.png new file mode 100644 index 0000000000..d165acdde4 Binary files /dev/null and b/project_name/save_dir/combined_13.png differ diff --git a/project_name/save_dir/combined_14.png b/project_name/save_dir/combined_14.png new file mode 100644 index 0000000000..a33968a954 Binary files /dev/null and b/project_name/save_dir/combined_14.png differ diff --git a/project_name/save_dir/combined_15.png b/project_name/save_dir/combined_15.png new file mode 100644 index 0000000000..b6d33aab31 Binary files /dev/null and b/project_name/save_dir/combined_15.png differ diff --git a/project_name/save_dir/combined_16.png b/project_name/save_dir/combined_16.png new file mode 100644 index 0000000000..3a7b04fb20 Binary files /dev/null and b/project_name/save_dir/combined_16.png differ diff --git a/project_name/save_dir/combined_17.png b/project_name/save_dir/combined_17.png new file mode 100644 index 0000000000..9e4f31c6a2 Binary files /dev/null and b/project_name/save_dir/combined_17.png differ diff --git a/project_name/save_dir/combined_18.png b/project_name/save_dir/combined_18.png new file mode 100644 index 0000000000..d5c5b50250 Binary files /dev/null and b/project_name/save_dir/combined_18.png differ diff --git a/project_name/save_dir/combined_19.png b/project_name/save_dir/combined_19.png new file mode 100644 index 0000000000..5e42708532 Binary files /dev/null and b/project_name/save_dir/combined_19.png differ diff --git a/project_name/save_dir/combined_2.png b/project_name/save_dir/combined_2.png new file mode 100644 index 0000000000..d4a1aecc8c Binary files /dev/null and b/project_name/save_dir/combined_2.png differ diff --git a/project_name/save_dir/combined_20.png b/project_name/save_dir/combined_20.png new file mode 100644 index 0000000000..e2e1896293 Binary files /dev/null and b/project_name/save_dir/combined_20.png differ diff --git a/project_name/save_dir/combined_21.png b/project_name/save_dir/combined_21.png new file mode 100644 index 0000000000..fbac013961 Binary files /dev/null and b/project_name/save_dir/combined_21.png differ diff --git a/project_name/save_dir/combined_22.png b/project_name/save_dir/combined_22.png new file mode 100644 index 0000000000..3be43ef63a Binary files /dev/null and b/project_name/save_dir/combined_22.png differ diff --git a/project_name/save_dir/combined_23.png b/project_name/save_dir/combined_23.png new file mode 100644 index 0000000000..3bced0c438 Binary files /dev/null and b/project_name/save_dir/combined_23.png differ diff --git a/project_name/save_dir/combined_24.png b/project_name/save_dir/combined_24.png new file mode 100644 index 0000000000..8db194ed03 Binary files /dev/null and b/project_name/save_dir/combined_24.png differ diff --git a/project_name/save_dir/combined_25.png b/project_name/save_dir/combined_25.png new file mode 100644 index 0000000000..9dc5ba285e Binary files /dev/null and b/project_name/save_dir/combined_25.png differ diff --git a/project_name/save_dir/combined_26.png b/project_name/save_dir/combined_26.png new file mode 100644 index 0000000000..7ef7dff0a0 Binary files /dev/null and b/project_name/save_dir/combined_26.png differ diff --git a/project_name/save_dir/combined_27.png b/project_name/save_dir/combined_27.png new file mode 100644 index 0000000000..b8e26e31ce Binary files /dev/null and b/project_name/save_dir/combined_27.png differ diff --git a/project_name/save_dir/combined_28.png b/project_name/save_dir/combined_28.png new file mode 100644 index 0000000000..5547f170ad Binary files /dev/null and b/project_name/save_dir/combined_28.png differ diff --git a/project_name/save_dir/combined_29.png b/project_name/save_dir/combined_29.png new file mode 100644 index 0000000000..1b5e38fe8f Binary files /dev/null and b/project_name/save_dir/combined_29.png differ diff --git a/project_name/save_dir/combined_3.png b/project_name/save_dir/combined_3.png new file mode 100644 index 0000000000..38ed7b7ded Binary files /dev/null and b/project_name/save_dir/combined_3.png differ diff --git a/project_name/save_dir/combined_30.png b/project_name/save_dir/combined_30.png new file mode 100644 index 0000000000..06c44e57f1 Binary files /dev/null and b/project_name/save_dir/combined_30.png differ diff --git a/project_name/save_dir/combined_31.png b/project_name/save_dir/combined_31.png new file mode 100644 index 0000000000..02d31ec932 Binary files /dev/null and b/project_name/save_dir/combined_31.png differ diff --git a/project_name/save_dir/combined_32.png b/project_name/save_dir/combined_32.png new file mode 100644 index 0000000000..17a9c3ec71 Binary files /dev/null and b/project_name/save_dir/combined_32.png differ diff --git a/project_name/save_dir/combined_33.png b/project_name/save_dir/combined_33.png new file mode 100644 index 0000000000..c6f173e6d0 Binary files /dev/null and b/project_name/save_dir/combined_33.png differ diff --git a/project_name/save_dir/combined_34.png b/project_name/save_dir/combined_34.png new file mode 100644 index 0000000000..b10d891fbc Binary files /dev/null and b/project_name/save_dir/combined_34.png differ diff --git a/project_name/save_dir/combined_35.png b/project_name/save_dir/combined_35.png new file mode 100644 index 0000000000..170626d044 Binary files /dev/null and b/project_name/save_dir/combined_35.png differ diff --git a/project_name/save_dir/combined_36.png b/project_name/save_dir/combined_36.png new file mode 100644 index 0000000000..aa284c3af7 Binary files /dev/null and b/project_name/save_dir/combined_36.png differ diff --git a/project_name/save_dir/combined_37.png b/project_name/save_dir/combined_37.png new file mode 100644 index 0000000000..3b559bcb19 Binary files /dev/null and b/project_name/save_dir/combined_37.png differ diff --git a/project_name/save_dir/combined_38.png b/project_name/save_dir/combined_38.png new file mode 100644 index 0000000000..1862e93c9b Binary files /dev/null and b/project_name/save_dir/combined_38.png differ diff --git a/project_name/save_dir/combined_39.png b/project_name/save_dir/combined_39.png new file mode 100644 index 0000000000..92155d8d55 Binary files /dev/null and b/project_name/save_dir/combined_39.png differ diff --git a/project_name/save_dir/combined_4.png b/project_name/save_dir/combined_4.png new file mode 100644 index 0000000000..aa284c3af7 Binary files /dev/null and b/project_name/save_dir/combined_4.png differ diff --git a/project_name/save_dir/combined_40.png b/project_name/save_dir/combined_40.png new file mode 100644 index 0000000000..21fcfbd8cf Binary files /dev/null and b/project_name/save_dir/combined_40.png differ diff --git a/project_name/save_dir/combined_41.png b/project_name/save_dir/combined_41.png new file mode 100644 index 0000000000..94c4c77233 Binary files /dev/null and b/project_name/save_dir/combined_41.png differ diff --git a/project_name/save_dir/combined_42.png b/project_name/save_dir/combined_42.png new file mode 100644 index 0000000000..005355cc0d Binary files /dev/null and b/project_name/save_dir/combined_42.png differ diff --git a/project_name/save_dir/combined_43.png b/project_name/save_dir/combined_43.png new file mode 100644 index 0000000000..729a1e0f4c Binary files /dev/null and b/project_name/save_dir/combined_43.png differ diff --git a/project_name/save_dir/combined_44.png b/project_name/save_dir/combined_44.png new file mode 100644 index 0000000000..74fb5b0224 Binary files /dev/null and b/project_name/save_dir/combined_44.png differ diff --git a/project_name/save_dir/combined_45.png b/project_name/save_dir/combined_45.png new file mode 100644 index 0000000000..1d1dd06d5b Binary files /dev/null and b/project_name/save_dir/combined_45.png differ diff --git a/project_name/save_dir/combined_46.png b/project_name/save_dir/combined_46.png new file mode 100644 index 0000000000..853496e31f Binary files /dev/null and b/project_name/save_dir/combined_46.png differ diff --git a/project_name/save_dir/combined_47.png b/project_name/save_dir/combined_47.png new file mode 100644 index 0000000000..4e208ec98f Binary files /dev/null and b/project_name/save_dir/combined_47.png differ diff --git a/project_name/save_dir/combined_48.png b/project_name/save_dir/combined_48.png new file mode 100644 index 0000000000..4cc0422d98 Binary files /dev/null and b/project_name/save_dir/combined_48.png differ diff --git a/project_name/save_dir/combined_49.png b/project_name/save_dir/combined_49.png new file mode 100644 index 0000000000..28743444a2 Binary files /dev/null and b/project_name/save_dir/combined_49.png differ diff --git a/project_name/save_dir/combined_5.png b/project_name/save_dir/combined_5.png new file mode 100644 index 0000000000..9dee582864 Binary files /dev/null and b/project_name/save_dir/combined_5.png differ diff --git a/project_name/save_dir/combined_50.png b/project_name/save_dir/combined_50.png new file mode 100644 index 0000000000..8b13b741b2 Binary files /dev/null and b/project_name/save_dir/combined_50.png differ diff --git a/project_name/save_dir/combined_51.png b/project_name/save_dir/combined_51.png new file mode 100644 index 0000000000..a05cf6f341 Binary files /dev/null and b/project_name/save_dir/combined_51.png differ diff --git a/project_name/save_dir/combined_52.png b/project_name/save_dir/combined_52.png new file mode 100644 index 0000000000..0b8527e224 Binary files /dev/null and b/project_name/save_dir/combined_52.png differ diff --git a/project_name/save_dir/combined_53.png b/project_name/save_dir/combined_53.png new file mode 100644 index 0000000000..e437f58e68 Binary files /dev/null and b/project_name/save_dir/combined_53.png differ diff --git a/project_name/save_dir/combined_54.png b/project_name/save_dir/combined_54.png new file mode 100644 index 0000000000..6ed9a433df Binary files /dev/null and b/project_name/save_dir/combined_54.png differ diff --git a/project_name/save_dir/combined_55.png b/project_name/save_dir/combined_55.png new file mode 100644 index 0000000000..9dca41d60e Binary files /dev/null and b/project_name/save_dir/combined_55.png differ diff --git a/project_name/save_dir/combined_56.png b/project_name/save_dir/combined_56.png new file mode 100644 index 0000000000..d00f686e60 Binary files /dev/null and b/project_name/save_dir/combined_56.png differ diff --git a/project_name/save_dir/combined_57.png b/project_name/save_dir/combined_57.png new file mode 100644 index 0000000000..24ed7604ac Binary files /dev/null and b/project_name/save_dir/combined_57.png differ diff --git a/project_name/save_dir/combined_58.png b/project_name/save_dir/combined_58.png new file mode 100644 index 0000000000..cf58349fd1 Binary files /dev/null and b/project_name/save_dir/combined_58.png differ diff --git a/project_name/save_dir/combined_59.png b/project_name/save_dir/combined_59.png new file mode 100644 index 0000000000..3660c09b66 Binary files /dev/null and b/project_name/save_dir/combined_59.png differ diff --git a/project_name/save_dir/combined_6.png b/project_name/save_dir/combined_6.png new file mode 100644 index 0000000000..9e160b3ac0 Binary files /dev/null and b/project_name/save_dir/combined_6.png differ diff --git a/project_name/save_dir/combined_60.png b/project_name/save_dir/combined_60.png new file mode 100644 index 0000000000..bedb053e35 Binary files /dev/null and b/project_name/save_dir/combined_60.png differ diff --git a/project_name/save_dir/combined_61.png b/project_name/save_dir/combined_61.png new file mode 100644 index 0000000000..09d27d7845 Binary files /dev/null and b/project_name/save_dir/combined_61.png differ diff --git a/project_name/save_dir/combined_62.png b/project_name/save_dir/combined_62.png new file mode 100644 index 0000000000..b4599d9660 Binary files /dev/null and b/project_name/save_dir/combined_62.png differ diff --git a/project_name/save_dir/combined_63.png b/project_name/save_dir/combined_63.png new file mode 100644 index 0000000000..205e7a8423 Binary files /dev/null and b/project_name/save_dir/combined_63.png differ diff --git a/project_name/save_dir/combined_64.png b/project_name/save_dir/combined_64.png new file mode 100644 index 0000000000..9b3612fd7d Binary files /dev/null and b/project_name/save_dir/combined_64.png differ diff --git a/project_name/save_dir/combined_65.png b/project_name/save_dir/combined_65.png new file mode 100644 index 0000000000..5c43387807 Binary files /dev/null and b/project_name/save_dir/combined_65.png differ diff --git a/project_name/save_dir/combined_66.png b/project_name/save_dir/combined_66.png new file mode 100644 index 0000000000..100432c3dc Binary files /dev/null and b/project_name/save_dir/combined_66.png differ diff --git a/project_name/save_dir/combined_67.png b/project_name/save_dir/combined_67.png new file mode 100644 index 0000000000..9ffbacdef9 Binary files /dev/null and b/project_name/save_dir/combined_67.png differ diff --git a/project_name/save_dir/combined_68.png b/project_name/save_dir/combined_68.png new file mode 100644 index 0000000000..6dc4ed7038 Binary files /dev/null and b/project_name/save_dir/combined_68.png differ diff --git a/project_name/save_dir/combined_69.png b/project_name/save_dir/combined_69.png new file mode 100644 index 0000000000..4c7a648624 Binary files /dev/null and b/project_name/save_dir/combined_69.png differ diff --git a/project_name/save_dir/combined_7.png b/project_name/save_dir/combined_7.png new file mode 100644 index 0000000000..5e19be8c20 Binary files /dev/null and b/project_name/save_dir/combined_7.png differ diff --git a/project_name/save_dir/combined_70.png b/project_name/save_dir/combined_70.png new file mode 100644 index 0000000000..81c31f5233 Binary files /dev/null and b/project_name/save_dir/combined_70.png differ diff --git a/project_name/save_dir/combined_71.png b/project_name/save_dir/combined_71.png new file mode 100644 index 0000000000..a1e778d50d Binary files /dev/null and b/project_name/save_dir/combined_71.png differ diff --git a/project_name/save_dir/combined_72.png b/project_name/save_dir/combined_72.png new file mode 100644 index 0000000000..29e9c778d1 Binary files /dev/null and b/project_name/save_dir/combined_72.png differ diff --git a/project_name/save_dir/combined_73.png b/project_name/save_dir/combined_73.png new file mode 100644 index 0000000000..05d8bb6ea0 Binary files /dev/null and b/project_name/save_dir/combined_73.png differ diff --git a/project_name/save_dir/combined_74.png b/project_name/save_dir/combined_74.png new file mode 100644 index 0000000000..e01580ebbf Binary files /dev/null and b/project_name/save_dir/combined_74.png differ diff --git a/project_name/save_dir/combined_75.png b/project_name/save_dir/combined_75.png new file mode 100644 index 0000000000..82a47d15c6 Binary files /dev/null and b/project_name/save_dir/combined_75.png differ diff --git a/project_name/save_dir/combined_76.png b/project_name/save_dir/combined_76.png new file mode 100644 index 0000000000..b0c1b9d740 Binary files /dev/null and b/project_name/save_dir/combined_76.png differ diff --git a/project_name/save_dir/combined_77.png b/project_name/save_dir/combined_77.png new file mode 100644 index 0000000000..4528ff6532 Binary files /dev/null and b/project_name/save_dir/combined_77.png differ diff --git a/project_name/save_dir/combined_78.png b/project_name/save_dir/combined_78.png new file mode 100644 index 0000000000..0f962f0604 Binary files /dev/null and b/project_name/save_dir/combined_78.png differ diff --git a/project_name/save_dir/combined_79.png b/project_name/save_dir/combined_79.png new file mode 100644 index 0000000000..06feb860cb Binary files /dev/null and b/project_name/save_dir/combined_79.png differ diff --git a/project_name/save_dir/combined_8.png b/project_name/save_dir/combined_8.png new file mode 100644 index 0000000000..7de97343be Binary files /dev/null and b/project_name/save_dir/combined_8.png differ diff --git a/project_name/save_dir/combined_80.png b/project_name/save_dir/combined_80.png new file mode 100644 index 0000000000..d64f6c53aa Binary files /dev/null and b/project_name/save_dir/combined_80.png differ diff --git a/project_name/save_dir/combined_81.png b/project_name/save_dir/combined_81.png new file mode 100644 index 0000000000..b1906bb4a2 Binary files /dev/null and b/project_name/save_dir/combined_81.png differ diff --git a/project_name/save_dir/combined_82.png b/project_name/save_dir/combined_82.png new file mode 100644 index 0000000000..3d24058dcf Binary files /dev/null and b/project_name/save_dir/combined_82.png differ diff --git a/project_name/save_dir/combined_83.png b/project_name/save_dir/combined_83.png new file mode 100644 index 0000000000..2a319cbf03 Binary files /dev/null and b/project_name/save_dir/combined_83.png differ diff --git a/project_name/save_dir/combined_84.png b/project_name/save_dir/combined_84.png new file mode 100644 index 0000000000..e16ff8642d Binary files /dev/null and b/project_name/save_dir/combined_84.png differ diff --git a/project_name/save_dir/combined_85.png b/project_name/save_dir/combined_85.png new file mode 100644 index 0000000000..fccaf67eb2 Binary files /dev/null and b/project_name/save_dir/combined_85.png differ diff --git a/project_name/save_dir/combined_86.png b/project_name/save_dir/combined_86.png new file mode 100644 index 0000000000..2587aff604 Binary files /dev/null and b/project_name/save_dir/combined_86.png differ diff --git a/project_name/save_dir/combined_87.png b/project_name/save_dir/combined_87.png new file mode 100644 index 0000000000..a86fc4aba4 Binary files /dev/null and b/project_name/save_dir/combined_87.png differ diff --git a/project_name/save_dir/combined_88.png b/project_name/save_dir/combined_88.png new file mode 100644 index 0000000000..bda8c8164a Binary files /dev/null and b/project_name/save_dir/combined_88.png differ diff --git a/project_name/save_dir/combined_89.png b/project_name/save_dir/combined_89.png new file mode 100644 index 0000000000..6d6a384cad Binary files /dev/null and b/project_name/save_dir/combined_89.png differ diff --git a/project_name/save_dir/combined_9.png b/project_name/save_dir/combined_9.png new file mode 100644 index 0000000000..a8585c42fc Binary files /dev/null and b/project_name/save_dir/combined_9.png differ diff --git a/project_name/save_dir/combined_90.png b/project_name/save_dir/combined_90.png new file mode 100644 index 0000000000..f3b948ec36 Binary files /dev/null and b/project_name/save_dir/combined_90.png differ diff --git a/project_name/save_dir/combined_91.png b/project_name/save_dir/combined_91.png new file mode 100644 index 0000000000..c0ca64602c Binary files /dev/null and b/project_name/save_dir/combined_91.png differ diff --git a/project_name/save_dir/combined_92.png b/project_name/save_dir/combined_92.png new file mode 100644 index 0000000000..70489181c5 Binary files /dev/null and b/project_name/save_dir/combined_92.png differ diff --git a/project_name/save_dir/combined_93.png b/project_name/save_dir/combined_93.png new file mode 100644 index 0000000000..105720244e Binary files /dev/null and b/project_name/save_dir/combined_93.png differ diff --git a/project_name/save_dir/combined_94.png b/project_name/save_dir/combined_94.png new file mode 100644 index 0000000000..4d0212e181 Binary files /dev/null and b/project_name/save_dir/combined_94.png differ diff --git a/project_name/save_dir/combined_95.png b/project_name/save_dir/combined_95.png new file mode 100644 index 0000000000..e28d3e5125 Binary files /dev/null and b/project_name/save_dir/combined_95.png differ diff --git a/project_name/save_dir/combined_96.png b/project_name/save_dir/combined_96.png new file mode 100644 index 0000000000..c8dd145711 Binary files /dev/null and b/project_name/save_dir/combined_96.png differ diff --git a/project_name/save_dir/combined_97.png b/project_name/save_dir/combined_97.png new file mode 100644 index 0000000000..6774ec8a8f Binary files /dev/null and b/project_name/save_dir/combined_97.png differ diff --git a/project_name/save_dir/combined_98.png b/project_name/save_dir/combined_98.png new file mode 100644 index 0000000000..c090313842 Binary files /dev/null and b/project_name/save_dir/combined_98.png differ diff --git a/project_name/save_dir/combined_99.png b/project_name/save_dir/combined_99.png new file mode 100644 index 0000000000..fccaf67eb2 Binary files /dev/null and b/project_name/save_dir/combined_99.png differ diff --git a/project_name/data/__init__.py b/project_name/temp.py similarity index 100% rename from project_name/data/__init__.py rename to project_name/temp.py diff --git a/streamlit_main.py b/streamlit_main.py new file mode 100644 index 0000000000..fa051262a2 --- /dev/null +++ b/streamlit_main.py @@ -0,0 +1,138 @@ +import streamlit as st +import numpy as np +import torch +from PIL import Image +from io import BytesIO +from os.path import split, join + +from project_name.models.cnn import CNNBackbone +from project_name.models.Preprocessing_class import Preprocessing + + +def main() -> None: + """ + Main function where the stream lit app runs. + """ + if not ("device" in st.session_state): + st.session_state["device"] = torch.device("cuda" + if torch.cuda.is_available() + else "cpu") + + if not ("model" in st.session_state): + file_location = split(__file__)[0] + st.session_state["model"] = CNNBackbone(pretrained=True).to( + st.session_state["device"] + ) + st.session_state["model"].load_state_dict(torch.load( + join(file_location, "cnn_best.pth"), weights_only=True)) + st.session_state["model"].eval() + + if not ("preprocess" in st.session_state): + st.session_state["preprocess"] = Preprocessing((200, 200)) + + st.title("Streamlit demo for Applied Machine Learning: Depth prediction.") + st.divider() + intro_paragraph = """ + This is a stream lit demo for the AML project of group 16. + The project is a depth estimating model from a RGB image. + + In this demo you will be allowed to upload an image covert it using our + model and download the resulting depth image from it. + """ + st.markdown(intro_paragraph) + st.divider() + upload_text = """ + Here you will upload an RGb 8-bit colour image for depth estimation. + """ + st.markdown(upload_text) + st.session_state["upload_image"] = st.file_uploader(label="Upload image", + type=[".png", ".jpeg"]) + + if ("upload_image" in st.session_state and + st.session_state["upload_image"] is not None): + rgb_image = np.array(Image.open(st.session_state["upload_image"])) + image_size = rgb_image.shape + st.image(rgb_image) + st.write(f"height: {image_size[0]}, width: {image_size[1]}, " + + f"channels: {image_size[2]}") + st.session_state["rgb_image"] = rgb_image + + if image_size[2] > 3: + st.error("We do not accept RGBa images.") + + st.divider() + if (not ("upload_image" in st.session_state) or + st.session_state["upload_image"] is None): + if "depth_output" in st.session_state: + st.session_state.pop("depth_output") + return + + tile_size = st.number_input("tiling size of model", + 1, + step=1, + value=200) + st.write(f" the current tile size is {tile_size}") + + if st.session_state["preprocess"].tile_size[0] != tile_size: + st.session_state["preprocess"].tile_size = (tile_size, tile_size) + + run_model = st.button("Start conversion.") + + if run_model and "rgb_image" in st.session_state: + st.write("work in progress") + + model = st.session_state["model"] + input_image = st.session_state["rgb_image"] + pre_post_process = st.session_state["preprocess"] + + tiles = pre_post_process.tile_with_padding(input_image) + + tensor_input_image = torch.tensor( + tiles, + device=st.session_state["device"], + dtype=torch.float).permute(0, 3, 1, 2) + + with torch.no_grad(): + tiles_output = model(tensor_input_image) + + depth_output = pre_post_process.reconstruct_depth( + tiles_output.squeeze().cpu().numpy()) + + st.session_state["depth_output"] = depth_output + + if "depth_output" in st.session_state: + pre_post_process = st.session_state["preprocess"] + image_output = pre_post_process.depth_to_rgb( + st.session_state["depth_output"]) + + st.image(image_output, clamp=True) + + depth_image_size = st.session_state["depth_output"].shape + st.write(f"height: {depth_image_size[0]}," + + f" width: {depth_image_size[1]}, " + + "channels: 1") + + st.divider() + if not ('depth_output' in st.session_state): + return + + file_name = st.text_input("file name", + "depth_map") + prepare_export = st.button("prepare depth map export") + + if file_name == "": + st.error("empty name can not generate file") + return + + if prepare_export and st.session_state["depth_output"].any(): + with BytesIO() as buffer: + np.save(buffer, st.session_state["depth_output"]) + st.download_button("download depth map", + buffer, + file_name + ".npy") + + st.divider() + + +if __name__ == '__main__': + main() diff --git a/project_name/features/__init__.py b/tests/data/temp.py similarity index 100% rename from project_name/features/__init__.py rename to tests/data/temp.py diff --git a/tests/data/test_dataloader.py b/tests/data/test_dataloader.py new file mode 100644 index 0000000000..93480db737 --- /dev/null +++ b/tests/data/test_dataloader.py @@ -0,0 +1,21 @@ +from project_name.data.data_loader import DataLoader +import unittest + + +class TestDataloader(unittest.TestCase): + def setUp(self): + self.dataloader = DataLoader("val") + + def test_index(self): + current_index = self.dataloader.data_index + self.dataloader.increment_index() + self.assertEqual((current_index + 1) % len(self.dataloader.data_paths), + self.dataloader.data_index) + + def test_data_getting(self): + data = self.dataloader.get_data() + self.assertEqual(len(data), 2) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/__init__.py b/tests/features/temp.py similarity index 100% rename from tests/__init__.py rename to tests/features/temp.py diff --git a/tests/models/__init__.py b/tests/models/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/data/__init__.py b/tests/models/temp.py similarity index 100% rename from tests/data/__init__.py rename to tests/models/temp.py diff --git a/tests/models/unittest_preprocessing.py b/tests/models/unittest_preprocessing.py new file mode 100644 index 0000000000..b346403e88 --- /dev/null +++ b/tests/models/unittest_preprocessing.py @@ -0,0 +1,103 @@ +import unittest +import numpy as np +from project_name.models.Preprocessing_class import Preprocessing + + +class TestPreprocessing(unittest.TestCase): + def setUp(self): + self.tile_size = (256, 256) + self.preprocessor = Preprocessing(tile_size=self.tile_size) + self.rgb_image = np.random.randint( + 0, 256, (510, 510, 3), dtype=np.uint8 + ) + self.gray_image = np.random.randint( + 0, 256, (510, 510), dtype=np.uint8 + ) + + def test_is_8_bit(self): + self.assertTrue(self.preprocessor.is_8_bit(self.rgb_image)) + self.assertFalse( + self.preprocessor.is_8_bit( + self.rgb_image.astype(np.float32) + ) + ) + + def test_normalize_uint8(self): + norm = self.preprocessor.normalize(self.rgb_image) + self.assertTrue(np.all(norm >= 0.0) and np.all(norm <= 1.0)) + self.assertEqual(norm.dtype, np.float32) + + def test_normalize_float(self): + float_array = self.rgb_image.astype(np.float32) / 255.0 + norm = self.preprocessor.normalize(float_array) + self.assertTrue(np.allclose(float_array, norm)) + self.assertEqual(norm.dtype, np.float32) + + def test_tile_with_padding_rgb(self): + tiles = self.preprocessor.tile_with_padding(self.rgb_image) + expected_num_tiles = ( + (510 + (256 - 510 % 256)) // 256 + ) ** 2 # 2x2 = 4 + self.assertEqual(tiles.shape[0], expected_num_tiles) + self.assertEqual(tiles.shape[1:], (256, 256, 3)) + + def test_tile_with_padding_grayscale(self): + tiles = self.preprocessor.tile_with_padding(self.gray_image) + expected_num_tiles = ( + (510 + (256 - 510 % 256)) // 256 + ) ** 2 # 2x2 = 4 + self.assertEqual(tiles.shape[0], expected_num_tiles) + self.assertEqual(tiles.shape[1:], (256, 256)) + + def test_reconstruction_rgb(self): + tiles = self.preprocessor.tile_with_padding(self.rgb_image) + recon = self.preprocessor.reconstruct_image(tiles) + self.assertEqual(recon.shape, self.rgb_image.shape) + self.assertTrue(np.allclose(recon, self.rgb_image, atol=2)) + + def test_reconstruction_grayscale(self): + tiles = self.preprocessor.tile_with_padding(self.gray_image) + recon = self.preprocessor.reconstruct_image(tiles) + self.assertEqual(recon.shape, self.gray_image.shape) + self.assertTrue(np.allclose(recon, self.gray_image, atol=2)) + + def test_invalid_input_type(self): + with self.assertRaises(TypeError): + self.preprocessor.tile_with_padding("not an array") + + def test_invalid_input_dtype(self): + with self.assertRaises(ValueError): + self.preprocessor.tile_with_padding( + np.ones((100, 100), dtype=np.float32) + ) + + def test_missing_padding_info(self): + with self.assertRaises(ValueError): + self.preprocessor.reconstruct_image( + np.zeros((4, 256, 256, 3)) + ) + + def test_depth_to_rgb(self): + # Create a mock depth map with float32 values from 0 to 10 + depth_map = np.random.uniform( + low=0.0, high=10.0, size=(480, 640) + ).astype(np.float32) + rgb_depth = self.preprocessor.depth_to_rgb( + depth_map, cmap='plasma' + ) + + self.assertEqual(rgb_depth.shape, (480, 640, 3)) + self.assertEqual(rgb_depth.dtype, np.uint8) + self.assertTrue( + np.all(rgb_depth >= 0) and np.all(rgb_depth <= 255) + ) + self.assertGreater(np.std(rgb_depth), 0) + + with self.assertRaises(ValueError): + self.preprocessor.depth_to_rgb( + np.ones((10, 10, 3)) # Not 2D + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/features/__init__.py b/tests/temp.py similarity index 100% rename from tests/features/__init__.py rename to tests/temp.py diff --git a/tests/test_main.py b/tests/test_main.py deleted file mode 100644 index 1a0f0a6355..0000000000 --- a/tests/test_main.py +++ /dev/null @@ -1,11 +0,0 @@ -import unittest -from main import hello_world - - -class MainTest(unittest.TestCase): - def test_hello(self): - self.assertEqual(hello_world(), "Hello, World!") - - -if __name__ == '__main__': - unittest.main()