diff --git a/config.py.template b/config.py.template index 1580b4d..d8f219c 100644 --- a/config.py.template +++ b/config.py.template @@ -3,10 +3,11 @@ config = { "db": "/Users/naren/Developer/gpu-cluster/gpu_cluster_instances.db", "gpuless": False, "redis": "redis://localhost:6379/0", - "price_per_hour": -1 + "price_per_hour": -1, + "domain_name": "vault.acm.illinois.edu" } #ACM Specific acm_config = { "groot_authorization": None -} \ No newline at end of file +} diff --git a/frontend b/frontend index ff6caf5..ac4cb5e 160000 --- a/frontend +++ b/frontend @@ -1 +1 @@ -Subproject commit ff6caf5ef3e5b96540d1373f81907efb78c81845 +Subproject commit ac4cb5e79f029f128b156c80f4789038a51c3f18 diff --git a/gpu_cluster/controllers/container_controller.py b/gpu_cluster/controllers/container_controller.py index 24e1b4f..1189765 100644 --- a/gpu_cluster/controllers/container_controller.py +++ b/gpu_cluster/controllers/container_controller.py @@ -13,6 +13,7 @@ class ContainerController(abc.ABC): ''' def __init__(self, config): super().__init__() + self.config = config self.hourly_rate = config["price_per_hour"] ''' @@ -37,7 +38,6 @@ def launch_container(self, c_id): ''' STATUS QUERIERS ''' - def verify_launch(self, c_id): instance = db_session.query(Instance).filter_by(cid = c_id).first() return True if instance.launched == True else False diff --git a/gpu_cluster/controllers/cpu_container_controller.py b/gpu_cluster/controllers/cpu_container_controller.py index 320e9b9..7129b71 100644 --- a/gpu_cluster/controllers/cpu_container_controller.py +++ b/gpu_cluster/controllers/cpu_container_controller.py @@ -5,43 +5,62 @@ from .container_controller import ContainerController import docker + class CPUContainerController(ContainerController): def __init__(self, config): super().__init__(config) self.client = docker.from_env(version='auto') - + def create_container(self, image, user="", token_required=False, budget=-1): - uport = self.get_port() - mport = self.get_port() + uport = super().get_port() + mport = super().get_port() while uport == mport: - mport = self.get_port() + mport = super().get_port() - ports = {'8888/tcp':uport, - '6006/tcp':mport} + ports = {'8888/tcp': uport, + '6006/tcp': mport} print(image) - c_id = self.client.containers.run(image, "", auto_remove=True, detach=True, ports=ports).id + container_list = self.client.containers.list(filters={'name': image}) + if container_list: + c_id = self.client.containers.run(image, "", auto_remove=True, detach=True, ports=ports).id + + else: + # Add a client.images.search to check if the path to the container exists on docker hub. If not, error out + has_result = self.client.images.search(image) + if not has_result: + print('No image in DockerHub') + return 'No image in DockerHub' , '', '' + + image_tag = image.split(':') + docker_image = self.client.images.pull(image_tag[0], image_tag[1]) + + # If pull returns more than one image, get the first one in the list + if hasattr(docker_image, '__len__'): + docker_image = docker_image[0] + + # Do you have to build the image after you pull it from Docker Hub? + c_id = self.client.containers.run(docker_image, '', auto_remove=True, detach=True, ports=ports).id print(c_id) uurl = "" murl = "" + base_url = "http://{}".format(self.config["domain_name"]) if token_required: c = self.client.containers.get(c_id) token = c.exec_run('python3 /opt/cluster-container/jupyter_get.py') - uurl = "http://localhost:{}/?token={}".format(uport, token.decode("utf-8") ) - murl = "http://localhost:" + str(mport) + uurl = "{}:{}/?token={}".format(base_url, uport, token.decode("utf-8") ) + murl = base_url + str(mport) else: - uurl = "http://localhost:" + str(uport) - murl = "http://localhost:" + str(mport) + uurl = base_url + str(uport) + murl = base_url + str(mport) print(image) - - #TODO insert budget - db_session.add(Instance(c_id, uport, mport, uurl, murl, user, budget, token)) + # TODO insert budget + db_session.add(Instance(c_id, uport, mport, uurl, murl, user, budget, token)) db_session.commit() return c_id, uurl, murl def kill_container(self, c_id): c = self.client.containers.get(c_id) - c.stop() - \ No newline at end of file + c.stop() \ No newline at end of file diff --git a/gpu_cluster/controllers/gpu_container_controller.py b/gpu_cluster/controllers/gpu_container_controller.py index 4b269d2..5ee73a8 100644 --- a/gpu_cluster/controllers/gpu_container_controller.py +++ b/gpu_cluster/controllers/gpu_container_controller.py @@ -3,32 +3,31 @@ from .container_controller import ContainerController from nvdocker import NVDockerClient + class GPUContainerController(ContainerController): def __init__(self, config): super().__init__(config) self.docker_client = NVDockerClient() - - def create_container(image, user="", token_required=False, budget=-1, num_gpus=1): + + def create_container(self, image, user="", token_required=False, budget=-1, num_gpus=1): # Get 2 open ports for UI and Monitor - uport = self.get_port() - mport = self.get_port() + uport = super().get_port() + mport = super().get_port() while uport == mport: - mport = self.get_port() + mport = super().get_port() # Get select a gpu(s) that are least in use - num_available_gpus = len(docker_client.list_gpus()) + num_available_gpus = len(NVDockerClient.gpu_info()) if num_gpus > num_available_gpus: num_gpus = num_available_gpus gpus = [] - memory_usage = docker_client.gpu_memory_usage() - for g in num_gpus: - for gpu, used in memory_usage.items(): - if used < memory_usage[gpu[-1]]: - gpus.append(gpu) + for g in range(num_gpus): + if NVDockerClient.gpu_memory_usage(g)["free_mb"] > 0: + gpus.append(g) - # Assemble config for container + # Assemble config for container container_config = { "ports": { '8888/tcp': uport, @@ -40,20 +39,42 @@ def create_container(image, user="", token_required=False, budget=-1, num_gpus=1 "auto_remove": True } - #create container - c_id = docker_client.create_container(image, **container_config).id + # create container + container_list = self.docker_client.docker_image_list(filters={'name': image}) + print(image) + if container_list: + c_id = self.docker_client.create_container(image, **container_config).id + + else: + # Add a client.images.search to check if the path to the container exists on docker hub. If not, error out + has_result = self.docker_client.docker_image_search(image) + if not has_result: + print('No image in DockerHub') + return 'No image in DockerHub' , '', '' + + image_tag = image.split(':') + docker_image = self.docker_client.docker_image_pull(image_tag[0], image_tag[1]) + + # If pull returns more than one image, get the first one in the list + if hasattr(docker_image, '__len__'): + docker_image = docker_image[0] + print(docker_image) + + # Do you have to build the image after you pull it from Docker Hub? + c_id = self.docker_client.create_container(docker_image, **container_config).id - #assemble endpoints for UI, monitor and get the access token if needed + # assemble endpoints for UI, monitor and get the access token if needed uurl = "" murl = "" token = "" + base_url = "http://{}".format(self.config["domain_name"]) if token_required: - token = docker_client.exec_run(c_id, 'python3 /opt/cluster-container/jupyter_get.py') - uurl = "http://vault.acm.illinois.edu:{}/?token={}".format(uport, token.decode("utf-8") ) - murl = "http://vault.acm.illinois.edu:" + str(mport) + token = self.docker_client.exec_run(c_id, 'python3 /opt/cluster-container/jupyter_get.py') + uurl = "{}:{}/?token={}".format(base_url, uport, token.decode("utf-8") ) + murl = base_url + str(mport) else: - uurl = "http://vault.acm.illinois.edu:" + str(uport) - murl = "http://vault.acm.illinois.edu:" + str(mport) + uurl = base_url + str(uport) + murl = base_url + str(mport) #TODO insert budget budget = -1 @@ -62,4 +83,4 @@ def create_container(image, user="", token_required=False, budget=-1, num_gpus=1 return c_id, uurl, murl def kill_container(self, c_id): - self.docker_client.stop_container(c_id) \ No newline at end of file + self.docker_client.stop_container(c_id) diff --git a/gpu_cluster/routes/cluster_api.py b/gpu_cluster/routes/cluster_api.py index 8028c53..1a27ad8 100644 --- a/gpu_cluster/routes/cluster_api.py +++ b/gpu_cluster/routes/cluster_api.py @@ -4,6 +4,8 @@ from ..database import db_session from ..models import Instance from flask import Flask, jsonify, request, abort +from nvdocker import NVDockerClient +import socket class ClusterAPI(): def __init__(self, controller): @@ -19,6 +21,9 @@ def create_container(self): abort(400) cid, ui_url, murl = self.controller.create_container(request.json['image'], token_required=request.json['token_required'])#, user=request.json['user'], budget=request.json['budget'] ) + if ui_url == '' or murl == '': + abort(400) + return jsonify({'cid': cid, 'ui_url' : ui_url, 'monitor_url': murl}) def confirm_launch(self): @@ -29,13 +34,22 @@ def confirm_launch(self): if launched == False: return jsonify({"error" : "non-existant container"}) - return jsonify({"verified" : "confirmed"}) - + return jsonify({"verified" : "confirmed"}) + def kill_container(self): pass + def status(self): + hostname = socket.gethostname() + available_gpu = NVDockerClient.least_used_gpu() + response = { + "hostname" : hostname, + "gpu" : available_gpu + } + return jsonify(response) + def register_routes(self, app): app.add_url_rule('/create_container', 'create_container', self.create_container, methods=['POST']) app.add_url_rule('/confirm', 'confirm', self.confirm_launch, methods=['POST']) app.add_url_rule('/kill_container', 'kill_container', self.kill_container, methods=['POST']) - + app.add_url_rule('/status', 'status', self.status, methods=['GET']) diff --git a/requirements.txt b/requirements.txt index 62bf4f3..977e4fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ docker==2.5.1 flask-cors==3.0.3 PyYaml==3.12 celery==4.1.0 -nvdocker==0.0.2a3 \ No newline at end of file +nvdocker==0.0.2a5