Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion labelit/labelit/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,16 @@
LSPROXY_CONNECTION_PER_POOL = 20

# Remote storage configs
LABELIT_REMOTE_STORAGE_CONFIG = {
LABELIT_REMOTE_STORAGE_DOWNLOAD_CONFIG = {
's3': {
'region': None,
},
'gs': {
'project': None,
},
}

LABELIT_REMOTE_STORAGE_UPLOAD_CONFIG = {
's3': {
'region': None,
},
Expand Down
6 changes: 3 additions & 3 deletions labelit/mainapp/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ class ProjectCreateForm(forms.ModelForm):
class Meta:
model = Project
fields = (
'name', 'dataset_format', 'dataset_path', 'config', 'export_format'
'name', 'dataset_format', 'dataset_path', 'config', 'export_format', "remote_export"
)


class ProjectEditForm(forms.ModelForm):
editable_fields = ('status', 'export_format',)
editable_fields = ('status', 'export_format')
status_choice_values = [Project.Status.ACTIVE.value, Project.Status.DISABLED.value]
class Meta:
model = Project
fields = (
'name', 'dataset_format', 'dataset_path', 'config', 'status', 'export_format'
'name', 'dataset_format', 'dataset_path', 'config', 'status', 'export_format', 'remote_export'
)

def __init__(self, *args, **kwargs):
Expand Down
27 changes: 24 additions & 3 deletions labelit/mainapp/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from apscheduler.schedulers.background import BackgroundScheduler

from django.core.cache import cache
from labelit.settings import LABELIT_DIRS, LABELIT_REMOTE_STORAGE_CONFIG
from labelit.settings import LABELIT_DIRS, LABELIT_REMOTE_STORAGE_DOWNLOAD_CONFIG, LABELIT_REMOTE_STORAGE_UPLOAD_CONFIG
from .models import Project, ProjectAnnotators
from .utils import get_random_port, save_config_file, get_label_studio_cmd, start_tool_server
from .storage.utils import get_storage_type
Expand Down Expand Up @@ -67,12 +67,12 @@ def manage_project_servers(projects=None):
try:
if project_storage_path_type == 'gs':
from .storage.gs import GoogleStorageHandler
gs_project = LABELIT_REMOTE_STORAGE_CONFIG['gs']['project']
gs_project = LABELIT_REMOTE_STORAGE_DOWNLOAD_CONFIG['gs']['project']
storage_obj = GoogleStorageHandler(project=gs_project)
storage_obj.download(project.dataset_path, project_local_storage)
elif project_storage_path_type == 's3':
from .storage.s3 import S3StorageHandler
s3_region = LABELIT_REMOTE_STORAGE_CONFIG['s3']['region']
s3_region = LABELIT_REMOTE_STORAGE_DOWNLOAD_CONFIG['s3']['region']
storage_obj = S3StorageHandler(region=s3_region)
storage_obj.download(project.dataset_path, project_local_storage)

Expand Down Expand Up @@ -111,6 +111,19 @@ def export_projects():
# Get all projects
projects = Project.objects.all()
for project in projects:
storage_type = None
if not project.remote_export=="None":
# Declare a storage object which will be used to upload files later
storage_type = get_storage_type(project.remote_export)
if storage_type == "gs":
from .storage.gs import GoogleStorageHandler
gs_project = LABELIT_REMOTE_STORAGE_UPLOAD_CONFIG['gs']['project']
storage_obj = GoogleStorageHandler(project = gs_project)
elif storage_type == "s3":
from .storage.s3 import S3StorageHandler
s3_region = LABELIT_REMOTE_STORAGE_UPLOAD_CONFIG['s3']['region']
storage_obj = S3StorageHandler(region=s3_region)

if project.status == Project.Status.ACTIVE and project.export_format != Project.ExportFormat.NONE:
logger.info(f"Exporting project {project.name}")
output_paths = []
Expand All @@ -136,6 +149,14 @@ def export_projects():
logger.debug(f"Export format {project.export_format} not supported for project {project.name}")
continue
output_paths.append(output_path)
# Both GS and S3 util have the same upload function which take the same set of parameters to
# upload a file to the bucket.
# The above declared `storage_obj` is used to upload.
if storage_type:
file_to_upload = list(x for x in output_path.iterdir() if x.is_file())[0]
storage_path = f"{project.name}/{annotator.username}/{str(file_to_upload).split('/')[-1]}"
storage_obj.upload(project.remote_export, storage_path, file_to_upload)


def stop_project_servers(projects=None):
"""Stops running Label studio servers"""
Expand Down
19 changes: 19 additions & 0 deletions labelit/mainapp/migrations/0003_project_remote_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 3.0.5 on 2020-06-27 10:17

from django.db import migrations, models
import mainapp.validators


class Migration(migrations.Migration):

dependencies = [
('mainapp', '0002_auto_20200613_0820'),
]

operations = [
migrations.AddField(
model_name='project',
name='remote_export',
field=models.CharField(default='None', help_text='Remote export path (GS or S3)', max_length=500, validators=[mainapp.validators.validate_remote_path], verbose_name='Remote Storage'),
),
]
4 changes: 3 additions & 1 deletion labelit/mainapp/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from django.contrib.auth.models import AbstractUser
from django.contrib.auth import get_user_model
from django.utils.translation import gettext_lazy as _
from mainapp.validators import validate_dataset_path, validate_label_config
from mainapp.validators import validate_dataset_path, validate_label_config, validate_remote_path

class User(AbstractUser):
class StaffRole(models.IntegerChoices):
Expand Down Expand Up @@ -65,6 +65,8 @@ class Status(models.IntegerChoices):
status = models.IntegerField(choices=Status.choices, default=Status.INITIALIZED, help_text="Status of project")
# Export format
export_format = models.IntegerField(choices=ExportFormat.choices, default=ExportFormat.NONE, help_text="Export format for labelled data", verbose_name="Export Format")
# Remote Export Path
remote_export = models.CharField(validators=[validate_remote_path], default="None", max_length=500, help_text="Remote export path (GS or S3)", verbose_name="Remote Storage")


class ProjectAnnotators(models.Model):
Expand Down
12 changes: 12 additions & 0 deletions labelit/mainapp/storage/gs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
from .base import StorageHandler
from .utils import split_bucket_path
from labelit.settings import LABELIT_DIRS
from pathlib import Path

try:
from google.cloud import storage
Expand All @@ -26,3 +28,13 @@ def download(self, storage_path, download_path):
cleaned_blob_name = blob.name.replace('/', '_')
destination_file_name = os.path.join(download_path, cleaned_blob_name)
blob.download_to_filename(destination_file_name)

def upload(self, bucket_address, bucket_path_of_file, file_to_upload):
bucket_name, source_blob_path = split_bucket_path(bucket_address)
if len(source_blob_path) > 0:
if not source_blob_path.endswith("/"):
source_blob_path += "/"
bucket_path_of_file = source_blob_path + bucket_path_of_file
bucket = self.storage_client.bucket(bucket_name)
blob = bucket.blob(bucket_path_of_file)
blob.upload_from_filename(file_to_upload)
8 changes: 8 additions & 0 deletions labelit/mainapp/storage/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,11 @@ def download(self, storage_path, download_path):
cleaned_object_name = object['Key'].replace('/', '_')
destination_file_name = os.path.join(download_path, cleaned_object_name)
self.storage_client.download_file(bucket_name, object['Key'], destination_file_name)

def upload(self, bucket_address, bucket_path_of_file, file_to_upload):
bucket_name, source_blob_path = split_bucket_path(bucket_address)
if len(source_blob_path) > 0:
if not source_blob_path.endswith("/"):
source_blob_path += "/"
bucket_path_of_file = source_blob_path + bucket_path_of_file
self.storage_client.upload_file(file_to_upload, bucket_name, bucket_path_of_file)
2 changes: 1 addition & 1 deletion labelit/mainapp/storage/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .config import storage_prefex_config
from .exceptions import StorageNotSupported
from .exceptions import StorageNotSupported, InvalidStoragePath

def get_storage_type(storage_path):
"""Get storage type for a storage path (local and supported remote storages)"""
Expand Down
11 changes: 11 additions & 0 deletions labelit/mainapp/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,14 @@ def validate_label_config(config):
_('Invalid Label Studio config'),
code='invalid'
)

def validate_remote_path(value):
try:
storage_type = get_storage_type(value)
if storage_type == 'local':
raise Exception("Please enter valid GCP or AWS Format")
except:
raise ValidationError(
_('Enter a valid storage path!'),
code='invalid'
)