Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: CI for

on:
workflow_dispatch:
push:
branches:
- main

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
python-version: ["3.11.2"]

steps:
- name: Checkout to mlsd-infra
uses: actions/checkout@v2

- name: Checkout to Similar-Design-Finder
uses: actions/checkout@v2
with:
repository: https://github.com/SimulatorML/Similar-Design-Finder
path: repoB
token: ${{ secrets.REPO_B_ACCESS_TOKEN }}

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
echo "export PATH=$HOME/.local/bin:\$PATH" >> $GITHUB_ENV

- name: Install dependencies
working-directory: repoB
run: |
poetry install

- name: Format with ruff (black replacement)
working-directory: repoB
run: |
poetry run ruff check . --fix

- name: Sort and clean imports with isort
working-directory: repoB
run: |
poetry run isort .

- name: Set PYTHONPATH
run: echo "PYTHONPATH=$GITHUB_WORKSPACE/repoB" >> $GITHUB_ENV

- name: Test with pytest
working-directory: repoB
run: |
poetry run pytest

- name: Run pre-commit checks
working-directory: repoB
run: |
poetry run pre-commit run --all-files
116 changes: 116 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover

# Translations
*.mo
*.pot

# Django stuff:
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# DotEnv configuration
.env

# Database
*.db
*.rdb

# Pycharm
.idea

# VS Code
.vscode/

# Spyder
.spyproject/

# Jupyter NB Checkpoints
.ipynb_checkpoints/

# exclude data from source control by default
# /data/

# Mac OS-specific storage files
.DS_Store

# vim
*.swp
*.swo

# Mypy cache
.mypy_cache/

# Pyre cache
.python-version

# Venv environment
/venv/

# Model folder
/models/
/data/
# data/*
# !data/processed/
# data/processed/*
# !data/processed/predictions.csv




# MLFlow
/mlruns/




# MLFlow
/mlruns/
/data
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
# mlsd-infra
# MLSD-Infra
This repository contains the infrastructure code for the MLSD project.

## Requirements
- Two data bases: one for development and one for production
- One S3 data base in Minio


We also have backups that are made once a day in the 3AM using cronjobs and rclone sending the backups to Google Drive.
83 changes: 83 additions & 0 deletions backup/backup_and_upload.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/bin/bash

# Date and time for the filename
timestamp=$(date +'%Y%m%d_%H%M%S')

# Path to the directory where the script is located
backup_dir="$(dirname "$0")"

# Path to the log file
log_file="$backup_dir/backup_and_upload.log"

# Function to write messages to the log file
log_message() {
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" >> "$log_file"
}

# Create the log file if it doesn't exist
touch "$log_file"

# Log the start of the backup process
log_message "Starting backup process."

# Create a backup of the Similar_Design_Finder_Dev database
log_message "Creating backup of Similar_Design_Finder_Dev database."
docker exec -t $(docker-compose -f "$backup_dir/../docker-compose.yaml" ps -q postgres_dev) pg_dumpall -U admin > "$backup_dir/pg_cluster_backup_dev_$timestamp.sql"
if [ $? -eq 0 ]; then
log_message "Successfully created backup of Similar_Design_Finder_Dev database."
else
log_message "Error creating backup of Similar_Design_Finder_Dev database."
fi

# Create a backup of the Similar_Design_Finder_Prod database
log_message "Creating backup of Similar_Design_Finder_Prod database."
docker exec -t $(docker-compose -f "$backup_dir/../docker-compose.yaml" ps -q postgres_prod) pg_dumpall -U admin > "$backup_dir/pg_cluster_backup_prod_$timestamp.sql"
if [ $? -eq 0 ]; then
log_message "Successfully created backup of Similar_Design_Finder_Prod database."
else
log_message "Error creating backup of Similar_Design_Finder_Prod database."
fi

# Create a backup of the MinIO data
log_message "Creating backup of MinIO data."
docker run --rm --volumes-from $(docker-compose -f "$backup_dir/../docker-compose.yaml" ps -q minio) -v "$backup_dir":/backup busybox tar czvf /backup/minio_backup_$timestamp.tar.gz /data
if [ $? -eq 0 ]; then
log_message "Successfully created backup of MinIO data."
else
log_message "Error creating backup of MinIO data."
fi

# Upload backups to Google Drive
log_message "Uploading Similar_Design_Finder_Dev database backup to Google Drive."
rclone copy "$backup_dir/pg_cluster_backup_dev_$timestamp.sql" gdrive:
if [ $? -eq 0 ]; then
log_message "Successfully uploaded Similar_Design_Finder_Dev database backup to Google Drive."
else
log_message "Error uploading Similar_Design_Finder_Dev database backup to Google Drive."
fi

log_message "Uploading Similar_Design_Finder_Prod database backup to Google Drive."
rclone copy "$backup_dir/pg_cluster_backup_prod_$timestamp.sql" gdrive:
if [ $? -eq 0 ]; then
log_message "Successfully uploaded Similar_Design_Finder_Prod database backup to Google Drive."
else
log_message "Error uploading Similar_Design_Finder_Prod database backup to Google Drive."
fi

log_message "Uploading MinIO data backup to Google Drive."
rclone copy "$backup_dir/minio_backup_$timestamp.tar.gz" gdrive:
if [ $? -eq 0 ]; then
log_message "Successfully uploaded MinIO data backup to Google Drive."
else
log_message "Error uploading MinIO data backup to Google Drive."
fi

# Delete local backups after uploading
log_message "Deleting local backups."
rm "$backup_dir/pg_cluster_backup_dev_$timestamp.sql"
rm "$backup_dir/pg_cluster_backup_prod_$timestamp.sql"
rm "$backup_dir/minio_backup_$timestamp.tar.gz"

# Log the completion of the backup process
log_message "Backup process completed."

46 changes: 46 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
version: '3.8'
services:
postgres_dev:
image: pgvector/pgvector:pg16
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_DB_DEV_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB_DEV}
volumes:
- pgdata_dev:/var/lib/postgresql/data
ports:
- "5433:5432"
restart: always

postgres_prod:
image: pgvector/pgvector:pg16
environment:
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_DB_PROD_PASSWORD}
POSTGRES_DB: ${POSTGRES_DB_PROD}
volumes:
- pgdata_prod:/var/lib/postgresql/data
ports:
- "5432:5432"
restart: always

minio:
image: docker.io/bitnami/minio:2022
ports:
- '9000:9000'
- '9001:9001'
volumes:
- minio_data:/data
environment:
- MINIO_ROOT_USER=${MINIO_ROOT_USER}
- MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
- MINIO_DEFAULT_BUCKETS=${MINIO_DEFAULT_BUCKETS}
restart: always

volumes:
pgdata_dev:
external: true
pgdata_prod:
external: true
minio_data:
external: true