diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..51e98b1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,64 @@ +name: CI for + +on: + workflow_dispatch: + push: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + python-version: ["3.11.2"] + + steps: + - name: Checkout to mlsd-infra + uses: actions/checkout@v2 + + - name: Checkout to Similar-Design-Finder + uses: actions/checkout@v2 + with: + repository: https://github.com/SimulatorML/Similar-Design-Finder + path: repoB + token: ${{ secrets.REPO_B_ACCESS_TOKEN }} + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Poetry + run: | + curl -sSL https://install.python-poetry.org | python3 - + echo "export PATH=$HOME/.local/bin:\$PATH" >> $GITHUB_ENV + + - name: Install dependencies + working-directory: repoB + run: | + poetry install + + - name: Format with ruff (black replacement) + working-directory: repoB + run: | + poetry run ruff check . --fix + + - name: Sort and clean imports with isort + working-directory: repoB + run: | + poetry run isort . + + - name: Set PYTHONPATH + run: echo "PYTHONPATH=$GITHUB_WORKSPACE/repoB" >> $GITHUB_ENV + + - name: Test with pytest + working-directory: repoB + run: | + poetry run pytest + + - name: Run pre-commit checks + working-directory: repoB + run: | + poetry run pre-commit run --all-files diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2934357 --- /dev/null +++ b/.gitignore @@ -0,0 +1,116 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# DotEnv configuration +.env + +# Database +*.db +*.rdb + +# Pycharm +.idea + +# VS Code +.vscode/ + +# Spyder +.spyproject/ + +# Jupyter NB Checkpoints +.ipynb_checkpoints/ + +# exclude data from source control by default +# /data/ + +# Mac OS-specific storage files +.DS_Store + +# vim +*.swp +*.swo + +# Mypy cache +.mypy_cache/ + +# Pyre cache +.python-version + +# Venv environment +/venv/ + +# Model folder +/models/ +/data/ +# data/* +# !data/processed/ +# data/processed/* +# !data/processed/predictions.csv + + + + +# MLFlow +/mlruns/ + + + + +# MLFlow +/mlruns/ +/data diff --git a/README.md b/README.md index eafe9f5..191762d 100644 --- a/README.md +++ b/README.md @@ -1 +1,9 @@ -# mlsd-infra \ No newline at end of file +# MLSD-Infra +This repository contains the infrastructure code for the MLSD project. + +## Requirements +- Two data bases: one for development and one for production +- One S3 data base in Minio + + +We also have backups that are made once a day in the 3AM using cronjobs and rclone sending the backups to Google Drive. diff --git a/backup/backup_and_upload.sh b/backup/backup_and_upload.sh new file mode 100644 index 0000000..4086e95 --- /dev/null +++ b/backup/backup_and_upload.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Date and time for the filename +timestamp=$(date +'%Y%m%d_%H%M%S') + +# Path to the directory where the script is located +backup_dir="$(dirname "$0")" + +# Path to the log file +log_file="$backup_dir/backup_and_upload.log" + +# Function to write messages to the log file +log_message() { + echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" >> "$log_file" +} + +# Create the log file if it doesn't exist +touch "$log_file" + +# Log the start of the backup process +log_message "Starting backup process." + +# Create a backup of the Similar_Design_Finder_Dev database +log_message "Creating backup of Similar_Design_Finder_Dev database." +docker exec -t $(docker-compose -f "$backup_dir/../docker-compose.yaml" ps -q postgres_dev) pg_dumpall -U admin > "$backup_dir/pg_cluster_backup_dev_$timestamp.sql" +if [ $? -eq 0 ]; then + log_message "Successfully created backup of Similar_Design_Finder_Dev database." +else + log_message "Error creating backup of Similar_Design_Finder_Dev database." +fi + +# Create a backup of the Similar_Design_Finder_Prod database +log_message "Creating backup of Similar_Design_Finder_Prod database." +docker exec -t $(docker-compose -f "$backup_dir/../docker-compose.yaml" ps -q postgres_prod) pg_dumpall -U admin > "$backup_dir/pg_cluster_backup_prod_$timestamp.sql" +if [ $? -eq 0 ]; then + log_message "Successfully created backup of Similar_Design_Finder_Prod database." +else + log_message "Error creating backup of Similar_Design_Finder_Prod database." +fi + +# Create a backup of the MinIO data +log_message "Creating backup of MinIO data." +docker run --rm --volumes-from $(docker-compose -f "$backup_dir/../docker-compose.yaml" ps -q minio) -v "$backup_dir":/backup busybox tar czvf /backup/minio_backup_$timestamp.tar.gz /data +if [ $? -eq 0 ]; then + log_message "Successfully created backup of MinIO data." +else + log_message "Error creating backup of MinIO data." +fi + +# Upload backups to Google Drive +log_message "Uploading Similar_Design_Finder_Dev database backup to Google Drive." +rclone copy "$backup_dir/pg_cluster_backup_dev_$timestamp.sql" gdrive: +if [ $? -eq 0 ]; then + log_message "Successfully uploaded Similar_Design_Finder_Dev database backup to Google Drive." +else + log_message "Error uploading Similar_Design_Finder_Dev database backup to Google Drive." +fi + +log_message "Uploading Similar_Design_Finder_Prod database backup to Google Drive." +rclone copy "$backup_dir/pg_cluster_backup_prod_$timestamp.sql" gdrive: +if [ $? -eq 0 ]; then + log_message "Successfully uploaded Similar_Design_Finder_Prod database backup to Google Drive." +else + log_message "Error uploading Similar_Design_Finder_Prod database backup to Google Drive." +fi + +log_message "Uploading MinIO data backup to Google Drive." +rclone copy "$backup_dir/minio_backup_$timestamp.tar.gz" gdrive: +if [ $? -eq 0 ]; then + log_message "Successfully uploaded MinIO data backup to Google Drive." +else + log_message "Error uploading MinIO data backup to Google Drive." +fi + +# Delete local backups after uploading +log_message "Deleting local backups." +rm "$backup_dir/pg_cluster_backup_dev_$timestamp.sql" +rm "$backup_dir/pg_cluster_backup_prod_$timestamp.sql" +rm "$backup_dir/minio_backup_$timestamp.tar.gz" + +# Log the completion of the backup process +log_message "Backup process completed." + diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..2a400ae --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,46 @@ +version: '3.8' +services: + postgres_dev: + image: pgvector/pgvector:pg16 + environment: + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_DB_DEV_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB_DEV} + volumes: + - pgdata_dev:/var/lib/postgresql/data + ports: + - "5433:5432" + restart: always + + postgres_prod: + image: pgvector/pgvector:pg16 + environment: + POSTGRES_USER: ${POSTGRES_USER} + POSTGRES_PASSWORD: ${POSTGRES_DB_PROD_PASSWORD} + POSTGRES_DB: ${POSTGRES_DB_PROD} + volumes: + - pgdata_prod:/var/lib/postgresql/data + ports: + - "5432:5432" + restart: always + + minio: + image: docker.io/bitnami/minio:2022 + ports: + - '9000:9000' + - '9001:9001' + volumes: + - minio_data:/data + environment: + - MINIO_ROOT_USER=${MINIO_ROOT_USER} + - MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD} + - MINIO_DEFAULT_BUCKETS=${MINIO_DEFAULT_BUCKETS} + restart: always + +volumes: + pgdata_dev: + external: true + pgdata_prod: + external: true + minio_data: + external: true