From 89f31ac56955fab01eaa1ad58c50baec70b51cdb Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Wed, 29 Jun 2022 15:33:14 +0200
Subject: [PATCH 01/13] MVP dataproxy
---
README.md | 29 ++++++++
ebrains_drive/__init__.py | 2 +-
ebrains_drive/bucket.py | 69 ++++++++++++++++++++
ebrains_drive/buckets.py | 19 ++++++
ebrains_drive/client.py | 127 +++++++++++++++++++++++++-----------
ebrains_drive/exceptions.py | 9 +++
ebrains_drive/files.py | 17 +++++
ebrains_drive/utils.py | 22 ++++++-
8 files changed, 254 insertions(+), 40 deletions(-)
create mode 100644 ebrains_drive/bucket.py
create mode 100644 ebrains_drive/buckets.py
diff --git a/README.md b/README.md
index 9b1dc4719d..077e2f3e0b 100644
--- a/README.md
+++ b/README.md
@@ -58,6 +58,35 @@ Example usage (refer to docs for more):
print(file_content)
```
+## Experimental support for data-proxy
+
+Original implementation source from Bjorn Kindler & Jan Fousek.
+
+Example Usage:
+
+```python
+from ebrains_drive import BucketApiClient
+client = BucketApiClient(token="ey...")
+
+# access existing bucket
+bucket = client.buckets.get_bucket("existing_bucket_name")
+
+# or create a new collab + bucket
+bucket = client.create_new("new_bucket_name")
+
+# upload new file
+bucket.upload("/home/jovyan/test.txt", "foobar.txt")
+
+# it seems newly uplaoded file will **NOT** be available immediately. Sleep for x seconds?
+from time import sleep
+sleep(1)
+
+# get the uploaded file
+bucket.get_file("foobar.txt")
+
+# delete a bucket (n.b. this will **NOT** delete the collab!)
+client.delete("new_bucket_name")
+```

diff --git a/ebrains_drive/__init__.py b/ebrains_drive/__init__.py
index 20ca2e167b..9325a9dd74 100644
--- a/ebrains_drive/__init__.py
+++ b/ebrains_drive/__init__.py
@@ -7,7 +7,7 @@
"""
-from ebrains_drive.client import DriveApiClient
+from ebrains_drive.client import DriveApiClient, BucketApiClient
def connect(username=None, password=None, token=None, env=""):
client = DriveApiClient(username, password, token, env)
diff --git a/ebrains_drive/bucket.py b/ebrains_drive/bucket.py
new file mode 100644
index 0000000000..367bcf5205
--- /dev/null
+++ b/ebrains_drive/bucket.py
@@ -0,0 +1,69 @@
+from typing import Iterable
+import requests
+from ebrains_drive.files import DataproxyFile
+from ebrains_drive.utils import on_401_raise_unauthorized
+
+class Bucket(object):
+
+ LIMIT = 10
+
+ """
+ A dataproxy bucket
+ """
+ def __init__(self, client, name: str, objects_count: int, bytes: int, last_modified: str, is_public: bool, role: str) -> None:
+ self.client = client
+ # Would have been a lot easier to use dataclass, but keep dependency to a minimum
+ self.name = name
+ self.objects_count = objects_count
+ self.bytes = bytes
+ self.last_modified = last_modified
+ self.is_public = is_public
+ self.role = role
+
+ @classmethod
+ def from_json(cls, client, bucket_json) -> 'Bucket':
+ return cls(client, **bucket_json)
+
+ def __str__(self):
+ return "(name='{}')".format(self.name)
+
+ def __repr__(self):
+ return "ebrains_drive.bucket.Bucket(name='{}')".format(self.name)
+
+ @on_401_raise_unauthorized
+ def ls(self) -> Iterable[DataproxyFile]:
+ marker = None
+ visited_hash = set()
+ while True:
+ resp = self.client.get(f"/v1/buckets/{self.name}", params={
+ 'limit': self.LIMIT,
+ 'marker': marker
+ })
+ objects = resp.json().get("objects", [])
+ if len(objects) == 0:
+ break
+
+ for obj in objects:
+
+ yield DataproxyFile.from_json(self.client, obj)
+ marker = obj.get("hash")
+
+ if marker in visited_hash:
+ raise RuntimeError(f"Bucket.ls error: hash {marker} has already been visited.")
+ visited_hash.add(marker)
+ return
+
+ @on_401_raise_unauthorized("Unauthorized")
+ def get_file(self, name: str):
+ name = name.lstrip("/")
+ return self.client.get(f"/v1/buckets/{self.name}/{name}").content
+
+ @on_401_raise_unauthorized("Unauthorized")
+ def upload(self, fileobj: str, filename: str):
+ filename = filename.lstrip("/")
+ resp = self.client.put(f"/v1/buckets/{self.name}/{filename}")
+ upload_url = resp.json().get("url")
+ if upload_url is None:
+ raise RuntimeError(f"Bucket.upload did not get upload url.")
+ resp = requests.request("PUT", upload_url, data=open(fileobj, 'rb'))
+ resp.raise_for_status()
diff --git a/ebrains_drive/buckets.py b/ebrains_drive/buckets.py
new file mode 100644
index 0000000000..95120fce02
--- /dev/null
+++ b/ebrains_drive/buckets.py
@@ -0,0 +1,19 @@
+from ebrains_drive.exceptions import ClientHttpError
+from ebrains_drive.utils import on_401_raise_unauthorized
+from ebrains_drive.bucket import Bucket
+
+class Buckets(object):
+
+ def __init__(self, client):
+ self.client = client
+
+ @on_401_raise_unauthorized('401 response. Check you/your token have access right and/or the bucket name has been spelt correctly.')
+ def get_bucket(self, bucket_name: str, *, forced=False) -> Bucket:
+ """Get the specified bucket according name. If forced flag is set to True, will attempt to create the collab, if necessary.
+ """
+ try:
+ resp = self.client.get(f"/v1/buckets/{bucket_name}/stat")
+ return Bucket.from_json(self.client, resp.json())
+ except ClientHttpError as e:
+ # if forced is True, create new and return
+ pass
diff --git a/ebrains_drive/client.py b/ebrains_drive/client.py
index efa825abe7..c3a568ca42 100644
--- a/ebrains_drive/client.py
+++ b/ebrains_drive/client.py
@@ -1,28 +1,20 @@
import re
from getpass import getpass
import requests
-from ebrains_drive.utils import urljoin
+from abc import ABC
+from ebrains_drive.utils import urljoin, on_401_raise_unauthorized
from ebrains_drive.exceptions import ClientHttpError
from ebrains_drive.repos import Repos
+from ebrains_drive.buckets import Buckets
from ebrains_drive.file import File
-
-class DriveApiClient(object):
- """Wraps seafile web api"""
- def __init__(self, username=None, password=None, token=None, env=""):
- """Wraps various basic operations to interact with seahub http api.
- """
- self._set_env(env)
-
- self.server = self.drive_url
+class ClientBase(ABC):
+ def __init__(self, username=None, password=None, token=None, env="") -> None:
self.username = username
self.password = password
self._token = token
-
- self.repos = Repos(self)
- self.groups = Groups(self)
- self.file = File(self)
+ self.server = None
if token is None:
if self.username is None:
@@ -36,6 +28,7 @@ def __init__(self, username=None, password=None, token=None, env=""):
print("Error: Invalid user credentials!")
raise
+
def _set_env(self, env=''):
self.suffix = ""
@@ -45,19 +38,9 @@ def _set_env(self, env=''):
self.suffix = "-int"
# else we keep empty suffix for production
- self.drive_url = "https://drive" + self.suffix + ".ebrains.eu"
self.iam_host = "iam" + self.suffix + ".ebrains.eu"
self.iam_url = "https://" + self.iam_host
-
- def get_drive_url(self):
- return self.drive_url
-
- def get_iam_host(self):
- return self.iam_host
-
- def get_iam_url(self):
- return self.iam_url
-
+
def _get_token(self):
response = requests.post(
self.iam_url+'/auth/realms/hbp/protocol/openid-connect/token',
@@ -67,29 +50,26 @@ def _get_token(self):
'username':self.username,
'password':self.password
})
-
self._token = response.json()['access_token']
-
- def __str__(self):
- return 'DriveApiClient[server=%s, user=%s]' % (self.server, self.username)
-
- __repr__ = __str__
-
+
def get(self, *args, **kwargs):
- return self._send_request('GET', *args, **kwargs)
+ return self.send_request('GET', *args, **kwargs)
def post(self, *args, **kwargs):
- return self._send_request('POST', *args, **kwargs)
+ return self.send_request('POST', *args, **kwargs)
def put(self, *args, **kwargs):
- return self._send_request('PUT', *args, **kwargs)
+ return self.send_request('PUT', *args, **kwargs)
def delete(self, *args, **kwargs):
- return self._send_request('delete', *args, **kwargs)
+ return self.send_request('delete', *args, **kwargs)
- def _send_request(self, method, url, *args, **kwargs):
+ def send_request(self, method: str, url: str, *args, **kwargs):
if not url.startswith('http'):
- url = urljoin(self.server, url)
+ # sanity checks.
+ # - accounts for if server was provided with trailing slashes
+ # - accounts for if url was provided with leading slashes
+ url = self.server.rstrip('/') + '/' + url.lstrip('/')
headers = kwargs.get('headers', {})
headers.setdefault('Authorization', 'Bearer ' + self._token)
@@ -106,6 +86,77 @@ def _send_request(self, method, url, *args, **kwargs):
return resp
+class DriveApiClient(ClientBase):
+ """Wraps seafile web api"""
+ def __init__(self, username=None, password=None, token=None, env=""):
+ """Wraps various basic operations to interact with seahub http api.
+ """
+ super().__init__(self, username, password, token, env)
+ self._set_env(env)
+
+ self.server = self.drive_url
+
+ self.repos = Repos(self)
+ self.groups = Groups(self)
+ self.file = File(self)
+
+ def _set_env(self, env=''):
+ super()._set_env(env)
+ self.drive_url = "https://drive" + self.suffix + ".ebrains.eu"
+
+ def get_drive_url(self):
+ return self.drive_url
+
+ def get_iam_host(self):
+ return self.iam_host
+
+ def get_iam_url(self):
+ return self.iam_url
+
+ def __str__(self):
+ return 'DriveApiClient[server=%s, user=%s]' % (self.server, self.username)
+
+ __repr__ = __str__
+
+ def send_request(self, method: str, url: str, *args, **kwargs):
+ if not url.startswith('http'):
+ url = urljoin(self.server, url)
+ return super().send_request(method, url, *args, **kwargs)
+
+
+class BucketApiClient(ClientBase):
+
+ def __init__(self, username=None, password=None, token=None, env="") -> None:
+ super().__init__(username, password, token, env)
+ if env != "":
+ raise NotImplementedError("non prod environment for dataproxy access has not yet been implemented.")
+
+ self._set_env(env)
+ self.server = "https://data-proxy.ebrains.eu/api"
+
+ self.buckets = Buckets(self)
+
+ @on_401_raise_unauthorized("Failed. Note: BucketApiClient.create_new needs to have clb.drive:write as a part of scope.")
+ def create_new(self, bucket_name: str, title=None, description="Created by ebrains_drive"):
+ # attempt to create new collab
+ self.send_request("POST", "https://wiki.ebrains.eu/rest/v1/collabs", json={
+ "name": bucket_name,
+ "title": title or bucket_name,
+ "description": description,
+ "drive": True,
+ "chat": True,
+ "public": False
+ }, expected=201)
+
+ # activate the bucket for the said collab
+ self.send_request("POST", "/v1/buckets", json={
+ "bucket_name": bucket_name
+ }, expected=201)
+
+ @on_401_raise_unauthorized("Failed. Note: BucketApiClient.create_new needs to have clb.drive:write as a part of scope.")
+ def delete(self, bucket_name: str):
+ self.send_request("DELETE", f"/v1/buckets/{bucket_name}")
+
class Groups(object):
def __init__(self, client):
diff --git a/ebrains_drive/exceptions.py b/ebrains_drive/exceptions.py
index b11498d776..8803c2fc9c 100644
--- a/ebrains_drive/exceptions.py
+++ b/ebrains_drive/exceptions.py
@@ -23,3 +23,12 @@ def __init__(self, msg):
def __str__(self):
return 'DoesNotExist: %s' % self.msg
+
+class Unauthorized(Exception):
+
+ def __init__(self, msg):
+ super().__init__()
+ self.msg = msg
+
+ def __str__(self):
+ return 'Unauthorized. This could be a result of either incorrect path or insufficient privilege. %s' % self.msg
diff --git a/ebrains_drive/files.py b/ebrains_drive/files.py
index 34c6d2399a..1ca9f92686 100644
--- a/ebrains_drive/files.py
+++ b/ebrains_drive/files.py
@@ -3,6 +3,7 @@
import posixpath
import re
import time
+from typing import Any, Dict
from ebrains_drive.utils import querystr
# Note: only files and dirs with contents is assigned an ID; else their ID is set to all zeros
@@ -338,3 +339,19 @@ def get_content(self):
"""Get the content of the file"""
url = self._get_download_link()
return self.client.get(url).content
+
+class DataproxyFile:
+ def __init__(self, client, hash: str, last_modified: str, bytes: int, name: str, content_type: str) -> None:
+ self.client = client
+
+ self.hash = hash
+ self.last_modified = last_modified
+ self.bytes = bytes
+ self.name = name
+ self.content_type = content_type
+
+
+ @classmethod
+ def from_json(cls, client, file_json: Dict[str, Any]):
+ return cls(client, **file_json)
+
diff --git a/ebrains_drive/utils.py b/ebrains_drive/utils.py
index 9e0d684739..669ca2156b 100644
--- a/ebrains_drive/utils.py
+++ b/ebrains_drive/utils.py
@@ -1,8 +1,9 @@
import string
import random
from functools import wraps
+from typing import Type
from urllib.parse import urlencode
-from ebrains_drive.exceptions import ClientHttpError, DoesNotExist
+from ebrains_drive.exceptions import ClientHttpError, DoesNotExist, Unauthorized
def randstring(length=0):
if length == 0:
@@ -20,6 +21,24 @@ def urljoin(base, *args):
url = url[:-1]
return url
+def _raise_on(http_code: int, Ex: Type[Exception]):
+ def raise_on(msg: str):
+ def decorator(func):
+ @wraps(func)
+ def wrapped(*args, **kwargs):
+ try:
+ return func(*args, **kwargs)
+ except ClientHttpError as e:
+ if e.code == http_code:
+ raise Ex(msg)
+ else:
+ raise e
+ return wrapped
+ return decorator
+ return raise_on
+
+on_401_raise_unauthorized = _raise_on(401, Unauthorized)
+
def raise_does_not_exist(msg):
"""Decorator to turn a function that get a http 404 response to a
:exc:`DoesNotExist` exception."""
@@ -44,6 +63,7 @@ def to_utf8(obj):
def querystr(**kwargs):
return '?' + urlencode(kwargs)
+# not used?
def utf8lize(obj):
if isinstance(obj, dict):
return {k: to_utf8(v) for k, v in obj.items()}
From 5c368a5d1483df539bac51bc1a8c16449cd102e3 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Thu, 30 Jun 2022 06:49:25 +0200
Subject: [PATCH 02/13] feat: cleaned up and unified file handle
---
README.md | 3 ++-
ebrains_drive/bucket.py | 15 ++++++++++-----
ebrains_drive/files.py | 22 +++++++++++++++++++---
3 files changed, 31 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index 077e2f3e0b..f61b294a9c 100644
--- a/README.md
+++ b/README.md
@@ -82,7 +82,8 @@ from time import sleep
sleep(1)
# get the uploaded file
-bucket.get_file("foobar.txt")
+file_handle = bucket.get_file("foobar.txt")
+file_content = file_handle.get_content()
# delete a bucket (n.b. this will **NOT** delete the collab!)
client.delete("new_bucket_name")
diff --git a/ebrains_drive/bucket.py b/ebrains_drive/bucket.py
index 367bcf5205..d8e06fa97c 100644
--- a/ebrains_drive/bucket.py
+++ b/ebrains_drive/bucket.py
@@ -1,5 +1,6 @@
from typing import Iterable
import requests
+from ebrains_drive.exceptions import DoesNotExist
from ebrains_drive.files import DataproxyFile
from ebrains_drive.utils import on_401_raise_unauthorized
@@ -31,13 +32,14 @@ def __repr__(self):
return "ebrains_drive.bucket.Bucket(name='{}')".format(self.name)
@on_401_raise_unauthorized
- def ls(self) -> Iterable[DataproxyFile]:
+ def ls(self, prefix: str) -> Iterable[DataproxyFile]:
marker = None
visited_hash = set()
while True:
resp = self.client.get(f"/v1/buckets/{self.name}", params={
'limit': self.LIMIT,
- 'marker': marker
+ 'marker': marker,
+ 'prefix': prefix
})
objects = resp.json().get("objects", [])
if len(objects) == 0:
@@ -45,7 +47,7 @@ def ls(self) -> Iterable[DataproxyFile]:
for obj in objects:
- yield DataproxyFile.from_json(self.client, obj)
+ yield DataproxyFile.from_json(self.client, self, obj)
marker = obj.get("hash")
if marker in visited_hash:
@@ -54,9 +56,12 @@ def ls(self) -> Iterable[DataproxyFile]:
return
@on_401_raise_unauthorized("Unauthorized")
- def get_file(self, name: str):
+ def get_file(self, name: str) -> DataproxyFile:
name = name.lstrip("/")
- return self.client.get(f"/v1/buckets/{self.name}/{name}").content
+ for file in self.ls(prefix=name):
+ if file.name == name:
+ return file
+ raise DoesNotExist(f"Cannot find {name}.")
@on_401_raise_unauthorized("Unauthorized")
def upload(self, fileobj: str, filename: str):
diff --git a/ebrains_drive/files.py b/ebrains_drive/files.py
index 1ca9f92686..caaf350d66 100644
--- a/ebrains_drive/files.py
+++ b/ebrains_drive/files.py
@@ -341,8 +341,9 @@ def get_content(self):
return self.client.get(url).content
class DataproxyFile:
- def __init__(self, client, hash: str, last_modified: str, bytes: int, name: str, content_type: str) -> None:
+ def __init__(self, client, bucket, hash: str, last_modified: str, bytes: int, name: str, content_type: str) -> None:
self.client = client
+ self.bucket = bucket
self.hash = hash
self.last_modified = last_modified
@@ -350,8 +351,23 @@ def __init__(self, client, hash: str, last_modified: str, bytes: int, name: str,
self.name = name
self.content_type = content_type
+ def __str__(self):
+ return 'DataproxyFile[bucket=%s, path=%s, size=%s]' % \
+ (self.bucket.name, self.name, self.bytes)
+
+ __repr__ = __str__
+
+ def get_download_link(self):
+ resp = self.client.get(f"/v1/buckets/{self.bucket.name}/{self.name}", params={
+ "redirect": False
+ })
+ return resp.json().get("url")
+
+ def get_content(self):
+ url = self.get_download_link()
+ return self.client.get(url).content
@classmethod
- def from_json(cls, client, file_json: Dict[str, Any]):
- return cls(client, **file_json)
+ def from_json(cls, client, bucket, file_json: Dict[str, Any]):
+ return cls(client, bucket, **file_json)
From e91f233e22284a1e1ef69db34b4f2582499f8961 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Thu, 30 Jun 2022 06:59:47 +0200
Subject: [PATCH 03/13] bugfix: use name as marker (found by brute force)
bugfix: unauth decorator
---
ebrains_drive/bucket.py | 12 ++++++------
ebrains_drive/buckets.py | 10 +++-------
2 files changed, 9 insertions(+), 13 deletions(-)
diff --git a/ebrains_drive/bucket.py b/ebrains_drive/bucket.py
index d8e06fa97c..1e30247954 100644
--- a/ebrains_drive/bucket.py
+++ b/ebrains_drive/bucket.py
@@ -31,10 +31,10 @@ def __str__(self):
def __repr__(self):
return "ebrains_drive.bucket.Bucket(name='{}')".format(self.name)
- @on_401_raise_unauthorized
- def ls(self, prefix: str) -> Iterable[DataproxyFile]:
+ @on_401_raise_unauthorized("Unauthorized.")
+ def ls(self, prefix: str=None) -> Iterable[DataproxyFile]:
marker = None
- visited_hash = set()
+ visited_name = set()
while True:
resp = self.client.get(f"/v1/buckets/{self.name}", params={
'limit': self.LIMIT,
@@ -48,11 +48,11 @@ def ls(self, prefix: str) -> Iterable[DataproxyFile]:
for obj in objects:
yield DataproxyFile.from_json(self.client, self, obj)
- marker = obj.get("hash")
+ marker = obj.get("name")
- if marker in visited_hash:
+ if marker in visited_name:
raise RuntimeError(f"Bucket.ls error: hash {marker} has already been visited.")
- visited_hash.add(marker)
+ visited_name.add(marker)
return
@on_401_raise_unauthorized("Unauthorized")
diff --git a/ebrains_drive/buckets.py b/ebrains_drive/buckets.py
index 95120fce02..dc130f4c67 100644
--- a/ebrains_drive/buckets.py
+++ b/ebrains_drive/buckets.py
@@ -8,12 +8,8 @@ def __init__(self, client):
self.client = client
@on_401_raise_unauthorized('401 response. Check you/your token have access right and/or the bucket name has been spelt correctly.')
- def get_bucket(self, bucket_name: str, *, forced=False) -> Bucket:
+ def get_bucket(self, bucket_name: str) -> Bucket:
"""Get the specified bucket according name. If forced flag is set to True, will attempt to create the collab, if necessary.
"""
- try:
- resp = self.client.get(f"/v1/buckets/{bucket_name}/stat")
- return Bucket.from_json(self.client, resp.json())
- except ClientHttpError as e:
- # if forced is True, create new and return
- pass
+ resp = self.client.get(f"/v1/buckets/{bucket_name}/stat")
+ return Bucket.from_json(self.client, resp.json())
From 4c7f353f54a7c3466144e7b65873ca66bc80e38c Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Thu, 30 Jun 2022 10:26:44 +0200
Subject: [PATCH 04/13] bugfix: access file content
---
ebrains_drive/files.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/ebrains_drive/files.py b/ebrains_drive/files.py
index caaf350d66..6021cf216b 100644
--- a/ebrains_drive/files.py
+++ b/ebrains_drive/files.py
@@ -364,8 +364,7 @@ def get_download_link(self):
return resp.json().get("url")
def get_content(self):
- url = self.get_download_link()
- return self.client.get(url).content
+ return self.client.get(f"/v1/buckets/{self.bucket.name}/{self.name}").content
@classmethod
def from_json(cls, client, bucket, file_json: Dict[str, Any]):
From c163a1ea9a9c2107620a6a92bfeb97f70b604c82 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Mon, 11 Jul 2022 16:36:00 +0200
Subject: [PATCH 05/13] add some test coverage
---
README.md | 34 +++++++++----------
ebrains_drive/buckets.py | 1 -
ebrains_drive/files.py | 7 +++-
ebrains_drive/utils.py | 7 +++-
tests/test_bucket.py | 73 ++++++++++++++++++++++++++++++++++++++++
5 files changed, 102 insertions(+), 20 deletions(-)
create mode 100644 tests/test_bucket.py
diff --git a/README.md b/README.md
index f61b294a9c..34923cb22b 100644
--- a/README.md
+++ b/README.md
@@ -60,33 +60,33 @@ Example usage (refer to docs for more):
## Experimental support for data-proxy
-Original implementation source from Bjorn Kindler & Jan Fousek.
+Original implementation from Bjorn Kindler & Jan Fousek.
Example Usage:
```python
-from ebrains_drive import BucketApiClient
-client = BucketApiClient(token="ey...")
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
-# access existing bucket
-bucket = client.buckets.get_bucket("existing_bucket_name")
+ # access existing bucket
+ bucket = client.buckets.get_bucket("existing_bucket_name")
-# or create a new collab + bucket
-bucket = client.create_new("new_bucket_name")
+ # or create a new collab + bucket
+ bucket = client.create_new("new_bucket_name")
-# upload new file
-bucket.upload("/home/jovyan/test.txt", "foobar.txt")
+ # upload new file
+ bucket.upload("/home/jovyan/test.txt", "foobar.txt")
-# it seems newly uplaoded file will **NOT** be available immediately. Sleep for x seconds?
-from time import sleep
-sleep(1)
+ # it seems newly uplaoded file will **NOT** be available immediately. Sleep for x seconds?
+ from time import sleep
+ sleep(1)
-# get the uploaded file
-file_handle = bucket.get_file("foobar.txt")
-file_content = file_handle.get_content()
+ # get the uploaded file
+ file_handle = bucket.get_file("foobar.txt")
+ file_content = file_handle.get_content()
-# delete a bucket (n.b. this will **NOT** delete the collab!)
-client.delete("new_bucket_name")
+ # delete a bucket (n.b. this will **NOT** delete the collab!)
+ client.delete("new_bucket_name")
```

diff --git a/ebrains_drive/buckets.py b/ebrains_drive/buckets.py
index dc130f4c67..f39bdc2f1d 100644
--- a/ebrains_drive/buckets.py
+++ b/ebrains_drive/buckets.py
@@ -1,4 +1,3 @@
-from ebrains_drive.exceptions import ClientHttpError
from ebrains_drive.utils import on_401_raise_unauthorized
from ebrains_drive.bucket import Bucket
diff --git a/ebrains_drive/files.py b/ebrains_drive/files.py
index 6021cf216b..5696407336 100644
--- a/ebrains_drive/files.py
+++ b/ebrains_drive/files.py
@@ -4,6 +4,7 @@
import re
import time
from typing import Any, Dict
+import requests
from ebrains_drive.utils import querystr
# Note: only files and dirs with contents is assigned an ID; else their ID is set to all zeros
@@ -358,13 +359,17 @@ def __str__(self):
__repr__ = __str__
def get_download_link(self):
+ """n.b. this download link expires in the order of seconds
+ """
resp = self.client.get(f"/v1/buckets/{self.bucket.name}/{self.name}", params={
"redirect": False
})
return resp.json().get("url")
def get_content(self):
- return self.client.get(f"/v1/buckets/{self.bucket.name}/{self.name}").content
+ url = self.get_download_link()
+ # Auth header must **NOT** be attached to the download link obtained, or we will get 401
+ return requests.get(url).content
@classmethod
def from_json(cls, client, bucket, file_json: Dict[str, Any]):
diff --git a/ebrains_drive/utils.py b/ebrains_drive/utils.py
index 669ca2156b..5f9a533f5b 100644
--- a/ebrains_drive/utils.py
+++ b/ebrains_drive/utils.py
@@ -1,5 +1,6 @@
import string
import random
+import inspect
from functools import wraps
from typing import Type
from urllib.parse import urlencode
@@ -27,7 +28,11 @@ def decorator(func):
@wraps(func)
def wrapped(*args, **kwargs):
try:
- return func(*args, **kwargs)
+ if inspect.isgeneratorfunction(func):
+ for v in func(*args, **kwargs):
+ yield v
+ else:
+ return func(*args, **kwargs)
except ClientHttpError as e:
if e.code == http_code:
raise Ex(msg)
diff --git a/tests/test_bucket.py b/tests/test_bucket.py
new file mode 100644
index 0000000000..27ad0096a8
--- /dev/null
+++ b/tests/test_bucket.py
@@ -0,0 +1,73 @@
+import pytest
+from unittest.mock import MagicMock
+from ebrains_drive.bucket import Bucket
+from ebrains_drive.exceptions import ClientHttpError, Unauthorized
+
+class MockClient:
+ def get(self, *args, **kwargs):
+ raise NotImplementedError
+ def put(self, *args, **kwargs):
+ raise NotImplementedError
+
+class MockHttpResp:
+ def __init__(self, resp):
+ self.resp = resp
+ def json(self):
+ return self.resp
+
+bucket_json={
+ 'name': 'foo',
+ 'objects_count': 12,
+ 'bytes': 112233,
+ 'last_modified': 'foo-bar',
+ 'is_public': False,
+ 'role': 'admin',
+}
+
+file_json1={
+ 'name': 'foo',
+ 'hash': 'hash-foo',
+ 'last_modified': 'last-modified',
+ 'bytes': 123,
+ 'content_type': 'json'
+}
+
+
+
+def test_from_json():
+ client = MockClient()
+ bucket = Bucket.from_json(client, bucket_json)
+ assert isinstance(bucket, Bucket)
+
+def test_ls_when_raise_client_error():
+
+ client = MockClient()
+ client.get = MagicMock()
+ client.get.side_effect = [
+ ClientHttpError(401, "foo-bar")
+ ]
+
+ bucket = Bucket.from_json(client, bucket_json)
+
+ try:
+ fs = [f for f in bucket.ls()]
+ raise Exception("did not raise")
+ except Exception as e:
+ assert isinstance(e, Unauthorized), f"Expect raise Unauthorized: {e}"
+
+def test_ls_when_repeats():
+
+ client = MockClient()
+ client.get = MagicMock()
+ client.get.side_effect = [
+ MockHttpResp({
+ 'objects': [file_json1, file_json1]
+ })
+ ]
+ bucket = Bucket.from_json(client, bucket_json)
+
+ try:
+ fs = [f for f in bucket.ls()]
+ raise Exception("did not raise")
+ except Exception as e:
+ assert isinstance(e, RuntimeError), f"Expect raise RuntimeError: {e}"
From c0a023e33c772fa1081478c39bed71d8cdbb57e5 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Tue, 12 Jul 2022 09:20:51 +0200
Subject: [PATCH 06/13] fix decorator & add test for decorator factory
---
ebrains_drive/utils.py | 38 +++++++++++++++++++++++++-------------
tests/test_utils.py | 16 ++++++++++++++++
2 files changed, 41 insertions(+), 13 deletions(-)
create mode 100644 tests/test_utils.py
diff --git a/ebrains_drive/utils.py b/ebrains_drive/utils.py
index 5f9a533f5b..fd18675699 100644
--- a/ebrains_drive/utils.py
+++ b/ebrains_drive/utils.py
@@ -23,22 +23,34 @@ def urljoin(base, *args):
return url
def _raise_on(http_code: int, Ex: Type[Exception]):
+ """Decorator factory funciton to turn a function that get a http http_code response
+ to a `Ex` exception."""
def raise_on(msg: str):
def decorator(func):
- @wraps(func)
- def wrapped(*args, **kwargs):
- try:
- if inspect.isgeneratorfunction(func):
- for v in func(*args, **kwargs):
- yield v
- else:
+
+ if inspect.isgeneratorfunction(func):
+ @wraps(func)
+ def wrapped(*args, **kwargs):
+ try:
+ yield from func(*args, *kwargs)
+ except ClientHttpError as e:
+ if e.code == http_code:
+ raise Ex(msg)
+ else:
+ raise e
+ return wrapped
+
+ else:
+ @wraps(func)
+ def wrapped(*args, **kwargs):
+ try:
return func(*args, **kwargs)
- except ClientHttpError as e:
- if e.code == http_code:
- raise Ex(msg)
- else:
- raise e
- return wrapped
+ except ClientHttpError as e:
+ if e.code == http_code:
+ raise Ex(msg)
+ else:
+ raise e
+ return wrapped
return decorator
return raise_on
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000000..960867e1f6
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,16 @@
+import inspect
+import pytest
+from ebrains_drive.utils import on_401_raise_unauthorized
+
+def generator_fn():
+ yield 1
+
+test_401_parameters = [
+ (generator_fn, True),
+ (lambda: 1, False)
+]
+
+@pytest.mark.parametrize('func,is_generator', test_401_parameters)
+def test_on_401_wrap(func,is_generator):
+ wrapped_fn = on_401_raise_unauthorized('oh noes')(func)
+ assert inspect.isgeneratorfunction(wrapped_fn) == is_generator
From 29b631531fee5931ed9ebf651d21f6d410b2f5d4 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Tue, 12 Jul 2022 09:49:08 +0200
Subject: [PATCH 07/13] fix typo
---
ebrains_drive/utils.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ebrains_drive/utils.py b/ebrains_drive/utils.py
index fd18675699..88d76dd11f 100644
--- a/ebrains_drive/utils.py
+++ b/ebrains_drive/utils.py
@@ -32,7 +32,7 @@ def decorator(func):
@wraps(func)
def wrapped(*args, **kwargs):
try:
- yield from func(*args, *kwargs)
+ yield from func(*args, **kwargs)
except ClientHttpError as e:
if e.code == http_code:
raise Ex(msg)
From ec4d8fa65b6ad8124939f60ee9e5249a50149719 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Wed, 13 Jul 2022 08:18:31 +0200
Subject: [PATCH 08/13] MVP support for dataset/hdg
---
README.md | 26 +++++++++++++++++++++++++-
ebrains_drive/bucket.py | 27 +++++++++++++++++++--------
ebrains_drive/buckets.py | 27 +++++++++++++++++++++++++--
ebrains_drive/exceptions.py | 2 ++
ebrains_drive/files.py | 2 +-
5 files changed, 72 insertions(+), 12 deletions(-)
diff --git a/README.md b/README.md
index 34923cb22b..5a168d993a 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,8 @@ Original implementation from Bjorn Kindler & Jan Fousek.
Example Usage:
+### Access collab bucket
+
```python
from ebrains_drive import BucketApiClient
client = BucketApiClient(token="ey...")
@@ -75,12 +77,15 @@ Example Usage:
bucket = client.create_new("new_bucket_name")
# upload new file
- bucket.upload("/home/jovyan/test.txt", "foobar.txt")
+ bucket.upload("/home/jovyan/test.txt", "test/foobar.txt")
# it seems newly uplaoded file will **NOT** be available immediately. Sleep for x seconds?
from time import sleep
sleep(1)
+ # list the contents
+ files = [f for f in bucket.ls(prefix="test")]
+
# get the uploaded file
file_handle = bucket.get_file("foobar.txt")
file_content = file_handle.get_content()
@@ -89,6 +94,25 @@ Example Usage:
client.delete("new_bucket_name")
```
+### Access datasets (e.g. HDG datasets)
+
+```python
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+
+ # access dataset bucket
+ # setting requeste_access = True will start the relevant access-request-flow when accessing HDG datasets
+ bucket = client.buckets.get_dataset("existing_dataset_id", request_access=True)
+
+ # list the contents
+ files = [f for f in bucket.ls(prefix="path/to/somewhere/foo")]
+
+ # get a file content
+ file_handle = bucket.get_file("path/to/somewhere/foobar.txt")
+ file_content = file_handle.get_content()
+
+```
+
### ACKNOWLEDGEMENTS
diff --git a/ebrains_drive/bucket.py b/ebrains_drive/bucket.py
index 1e30247954..2282e0a5a0 100644
--- a/ebrains_drive/bucket.py
+++ b/ebrains_drive/bucket.py
@@ -1,19 +1,27 @@
from typing import Iterable
import requests
-from ebrains_drive.exceptions import DoesNotExist
+from ebrains_drive.exceptions import DoesNotExist, InvalidParameter
from ebrains_drive.files import DataproxyFile
from ebrains_drive.utils import on_401_raise_unauthorized
class Bucket(object):
- LIMIT = 10
+ LIMIT = 100
"""
A dataproxy bucket
+ n.b. for a dataset bucket, role & is_public may be None
"""
- def __init__(self, client, name: str, objects_count: int, bytes: int, last_modified: str, is_public: bool, role: str) -> None:
+ def __init__(self, client, name: str, objects_count: int, bytes: int, last_modified: str, is_public: bool = None, role: str = None, *, public: bool= False, target: str='buckets', dataset_id: str=None) -> None:
+ if target != 'buckets' and target != 'datasets':
+ raise InvalidParameter(f'Init Buckets exception: target can be left unset, but if set, must either be buckets or datasets')
+ if public:
+ raise NotImplementedError(f"Access to public datasets/buckets NYI.")
+ self.public = public
+ self.target = target
+
self.client = client
- # Would have been a lot easier to use dataclass, but keep dependency to a minimum
+
self.name = name
self.objects_count = objects_count
self.bytes = bytes
@@ -21,9 +29,12 @@ def __init__(self, client, name: str, objects_count: int, bytes: int, last_modif
self.is_public = is_public
self.role = role
+ # n.b. for dataset bucket, dataset_id needs to be used for dataproxy_entity_name, but for collab bucket, name is used
+ self.dataproxy_entity_name = dataset_id or name
+
@classmethod
- def from_json(cls, client, bucket_json) -> 'Bucket':
- return cls(client, **bucket_json)
+ def from_json(cls, client, bucket_json, *, public:bool = False, target: str='buckets', dataset_id=None) -> 'Bucket':
+ return cls(client, **bucket_json, public=public, target=target, dataset_id=dataset_id)
def __str__(self):
return "(name='{}')".format(self.name)
@@ -36,7 +47,7 @@ def ls(self, prefix: str=None) -> Iterable[DataproxyFile]:
marker = None
visited_name = set()
while True:
- resp = self.client.get(f"/v1/buckets/{self.name}", params={
+ resp = self.client.get(f"/v1/{self.target}/{self.dataproxy_entity_name}", params={
'limit': self.LIMIT,
'marker': marker,
'prefix': prefix
@@ -66,7 +77,7 @@ def get_file(self, name: str) -> DataproxyFile:
@on_401_raise_unauthorized("Unauthorized")
def upload(self, fileobj: str, filename: str):
filename = filename.lstrip("/")
- resp = self.client.put(f"/v1/buckets/{self.name}/{filename}")
+ resp = self.client.put(f"/v1/{self.target}/{self.dataproxy_entity_name}/{filename}")
upload_url = resp.json().get("url")
if upload_url is None:
raise RuntimeError(f"Bucket.upload did not get upload url.")
diff --git a/ebrains_drive/buckets.py b/ebrains_drive/buckets.py
index f39bdc2f1d..f5986e0c72 100644
--- a/ebrains_drive/buckets.py
+++ b/ebrains_drive/buckets.py
@@ -1,5 +1,7 @@
+from ebrains_drive.exceptions import ClientHttpError, Unauthorized
from ebrains_drive.utils import on_401_raise_unauthorized
from ebrains_drive.bucket import Bucket
+from time import sleep
class Buckets(object):
@@ -7,8 +9,29 @@ def __init__(self, client):
self.client = client
@on_401_raise_unauthorized('401 response. Check you/your token have access right and/or the bucket name has been spelt correctly.')
- def get_bucket(self, bucket_name: str) -> Bucket:
+ def get_bucket(self, bucket_name: str, *, public: bool=False) -> Bucket:
"""Get the specified bucket according name. If forced flag is set to True, will attempt to create the collab, if necessary.
"""
resp = self.client.get(f"/v1/buckets/{bucket_name}/stat")
- return Bucket.from_json(self.client, resp.json())
+ return Bucket.from_json(self.client, resp.json(), public=public, target='buckets')
+
+ def get_dataset(self, dataset_id: str, *, public: bool=False, request_access: bool=False):
+ request_sent = False
+ attempt_no = 0
+ while True:
+ try:
+ resp = self.client.get(f"/v1/datasets/{dataset_id}/stat")
+ return Bucket.from_json(self.client, resp.json(), public=public, target="datasets", dataset_id=dataset_id)
+ except ClientHttpError as e:
+ if e.code != 401:
+ raise e
+
+ if not request_access:
+ raise Unauthorized(f"You do not have access to this dataset. If this is a private dataset, try to set request_access flag to true. We can start the procedure of requesting access for you.")
+ if not request_sent:
+ self.client.post(f"/v1/datasets/{dataset_id}", expected=(200, 201))
+ request_sent = True
+ print("Request sent. Please check the mail box associated with the token.")
+ sleep(5)
+ attempt_no = attempt_no + 1
+ print(f"Checking permission, attempt {attempt_no}")
diff --git a/ebrains_drive/exceptions.py b/ebrains_drive/exceptions.py
index 8803c2fc9c..cfbf9b16ec 100644
--- a/ebrains_drive/exceptions.py
+++ b/ebrains_drive/exceptions.py
@@ -32,3 +32,5 @@ def __init__(self, msg):
def __str__(self):
return 'Unauthorized. This could be a result of either incorrect path or insufficient privilege. %s' % self.msg
+
+class InvalidParameter(Exception): pass
diff --git a/ebrains_drive/files.py b/ebrains_drive/files.py
index 5696407336..e723246dbf 100644
--- a/ebrains_drive/files.py
+++ b/ebrains_drive/files.py
@@ -361,7 +361,7 @@ def __str__(self):
def get_download_link(self):
"""n.b. this download link expires in the order of seconds
"""
- resp = self.client.get(f"/v1/buckets/{self.bucket.name}/{self.name}", params={
+ resp = self.client.get(f"/v1/{self.bucket.target}/{self.bucket.dataproxy_entity_name}/{self.name}", params={
"redirect": False
})
return resp.json().get("url")
From a970964aa0dbe2997108613edc907331be2e26a9 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Fri, 22 Jul 2022 12:02:22 +0200
Subject: [PATCH 09/13] feat: add doc.md
---
README.md | 4 +-
doc.md | 269 ++++++++++++++++++++++++++++++++++++++++-
ebrains_drive/files.py | 9 +-
3 files changed, 277 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 5a168d993a..e570d5e786 100644
--- a/README.md
+++ b/README.md
@@ -71,10 +71,10 @@ Example Usage:
client = BucketApiClient(token="ey...")
# access existing bucket
- bucket = client.buckets.get_bucket("existing_bucket_name")
+ bucket = client.buckets.get_bucket("existing_collab_name")
# or create a new collab + bucket
- bucket = client.create_new("new_bucket_name")
+ bucket = client.create_new("new_collab_name")
# upload new file
bucket.upload("/home/jovyan/test.txt", "test/foobar.txt")
diff --git a/doc.md b/doc.md
index 0e1749760a..6d6f323022 100644
--- a/doc.md
+++ b/doc.md
@@ -1,6 +1,8 @@
-# Python Seafile
+# Ebrains Drive
-# Python Seafile
+# Drive (Seafile)
## Get Client ##
@@ -459,3 +484,243 @@ None
**Return Type**
A Response Instance
+
+
+
+# Bucket
+
+## Get Client
+**Request Parameters**
+
+* token
+
+**Sample Case**
+
+```python
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+```
+
+
+**Return Type**
+
+A Client Object
+
+## Bucket ##
+### Get Bucket ###
+**Request Parameters**
+
+* existing_collab_name
+
+**Sample Case**
+
+```python
+
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+ bucket = client.buckets.get_bucket("existing_collab_name")
+```
+
+**Return Type**
+
+A Bucket Object
+
+**Exceptions**
+
+* Bucket does not exist or not authorized to use the specified bucket
+
+### Create Bucket ###
+**Request Parameters**
+
+* new_collab_name
+
+**Sample Case**
+
+```python
+
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+ bucket = client.create_new("new_collab_name")
+```
+
+**Return Type**
+
+A Bucket Object
+
+**Exceptions**
+
+* Unauthorized to create new collab or bucket
+
+### List Bucket Entries ###
+**Request Parameters**
+
+* prefix (optional)
+
+**Sample Case**
+
+```python
+
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+ bucket = client.buckets.get_bucket("existing_collab_name")
+
+ # shows all files
+ all_files = [f for f in bucket.ls()]
+
+ # shows all files that begins with path/to/my/files
+ my_files = [f for f in bucket.ls(prefix="path/to/my/files")]
+```
+
+**Return Type**
+
+An Iterator of File Objects
+
+**Exceptions**
+
+* Unauthorized
+
+## Dataset ##
+### Get Dataset ###
+
+Note, if _request_access_ is set to `True`, this method may require user interaction.
+
+**Request Parameters**
+
+* dataset_id
+* request_access (optional, default `False`)
+
+**Sample Case**
+
+```python
+
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+ bucket = client.buckets.get_dataset("dataset_id")
+
+```
+
+**Return Type**
+A Bucket Object
+
+**Exceptions**
+
+* Unauthorized (if _request_access_ is not set)
+
+## File ##
+
+Files in buckets are not typically organised in directories. Users may use the `/` in filename to construct a directory-like structure.
+
+
+### Get File ###
+**Request Parameters**
+
+* filename
+
+**Sample Case**
+
+```python
+
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+
+ bucket = client.buckets.get_bucket("existing_collab_name")
+ # OR
+ bucket = client.buckets.get_dataset("dataset_id")
+
+ file_handle = bucket.get_file("filename")
+
+```
+
+**Return Type**
+
+A File Object
+
+**Exceptions**
+
+* Unauthorized
+* DoesNotExist
+
+### Get File Content ###
+**Request Parameters**
+
+* filename
+
+**Sample Case**
+
+```python
+
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+
+ bucket = client.buckets.get_bucket("existing_collab_name")
+ # OR
+ bucket = client.buckets.get_dataset("dataset_id")
+
+ file_handle = bucket.get_file("filename")
+ file_content = file_handle.get_content()
+
+```
+
+**Return Type**
+
+bytes
+
+**Exceptions**
+
+* Unauthorized
+* DoesNotExist
+
+
+### Upload File ###
+**Request Parameters**
+
+* path_to_file
+* dest_filename
+
+**Sample Case**
+
+```python
+
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+ bucket = client.buckets.get_bucket("existing_collab_name")
+
+ bucket.upload("path_to_file", "dest_filename")
+
+```
+
+**Return Type**
+
+None
+
+**Exceptions**
+
+* Unauthorized
+
+### Delete File ###
+**Request Parameters**
+
+* filename
+
+**Sample Case**
+
+```python
+
+ from ebrains_drive import BucketApiClient
+ client = BucketApiClient(token="ey...")
+ bucket = client.buckets.get_bucket("existing_collab_name")
+
+ file_handle = bucket.get_file("filename")
+ file_handle.delete()
+
+```
+
+**Return Type**
+
+None
+
+**Exceptions**
+
+* Unauthorized
+* DoesNotExist
+* AssertionError
diff --git a/ebrains_drive/files.py b/ebrains_drive/files.py
index e723246dbf..c6fef27f38 100644
--- a/ebrains_drive/files.py
+++ b/ebrains_drive/files.py
@@ -5,7 +5,7 @@
import time
from typing import Any, Dict
import requests
-from ebrains_drive.utils import querystr
+from ebrains_drive.utils import querystr, on_401_raise_unauthorized
# Note: only files and dirs with contents is assigned an ID; else their ID is set to all zeros
ZERO_OBJ_ID = '0000000000000000000000000000000000000000'
@@ -375,3 +375,10 @@ def get_content(self):
def from_json(cls, client, bucket, file_json: Dict[str, Any]):
return cls(client, bucket, **file_json)
+
+ @on_401_raise_unauthorized("Unauthorized")
+ def delete(self):
+ resp = self.client.delete(f"/v1/{self.bucket.target}/{self.bucket.dataproxy_entity_name}/{self.name}")
+ json_resp = resp.json()
+ assert "failures" in json_resp
+ assert len(json_resp.get("failures")) == 0
From e1c7925f3246e09ee28e8a58f8a3f78e4fc821f8 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Fri, 22 Jul 2022 12:28:10 +0200
Subject: [PATCH 10/13] bugfix: delete file
---
README.md | 2 +-
ebrains_drive/client.py | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index e570d5e786..94cbf53c64 100644
--- a/README.md
+++ b/README.md
@@ -91,7 +91,7 @@ Example Usage:
file_content = file_handle.get_content()
# delete a bucket (n.b. this will **NOT** delete the collab!)
- client.delete("new_bucket_name")
+ client.delete_bucket("new_bucket_name")
```
### Access datasets (e.g. HDG datasets)
diff --git a/ebrains_drive/client.py b/ebrains_drive/client.py
index c3a568ca42..d8ac377b88 100644
--- a/ebrains_drive/client.py
+++ b/ebrains_drive/client.py
@@ -62,7 +62,7 @@ def put(self, *args, **kwargs):
return self.send_request('PUT', *args, **kwargs)
def delete(self, *args, **kwargs):
- return self.send_request('delete', *args, **kwargs)
+ return self.send_request('DELETE', *args, **kwargs)
def send_request(self, method: str, url: str, *args, **kwargs):
if not url.startswith('http'):
@@ -154,7 +154,7 @@ def create_new(self, bucket_name: str, title=None, description="Created by ebrai
}, expected=201)
@on_401_raise_unauthorized("Failed. Note: BucketApiClient.create_new needs to have clb.drive:write as a part of scope.")
- def delete(self, bucket_name: str):
+ def delete_bucket(self, bucket_name: str):
self.send_request("DELETE", f"/v1/buckets/{bucket_name}")
From 081fa515b1e31713b17cf666cbd15d706aebd4ca Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Mon, 25 Jul 2022 16:52:00 +0200
Subject: [PATCH 11/13] add token expiration exception
---
ebrains_drive/client.py | 19 +++++++++++++++++--
ebrains_drive/exceptions.py | 2 ++
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/ebrains_drive/client.py b/ebrains_drive/client.py
index d8ac377b88..418e278acb 100644
--- a/ebrains_drive/client.py
+++ b/ebrains_drive/client.py
@@ -1,9 +1,11 @@
-import re
from getpass import getpass
import requests
from abc import ABC
+import base64
+import json
+import time
from ebrains_drive.utils import urljoin, on_401_raise_unauthorized
-from ebrains_drive.exceptions import ClientHttpError
+from ebrains_drive.exceptions import ClientHttpError, TokenExpired
from ebrains_drive.repos import Repos
from ebrains_drive.buckets import Buckets
from ebrains_drive.file import File
@@ -156,6 +158,19 @@ def create_new(self, bucket_name: str, title=None, description="Created by ebrai
@on_401_raise_unauthorized("Failed. Note: BucketApiClient.create_new needs to have clb.drive:write as a part of scope.")
def delete_bucket(self, bucket_name: str):
self.send_request("DELETE", f"/v1/buckets/{bucket_name}")
+
+ def send_request(self, method: str, url: str, *args, **kwargs):
+ hdr, info, sig = self._token.split('.')
+ info_json = base64.b64decode(info + '==').decode('utf-8')
+
+ # https://www.rfc-editor.org/rfc/rfc7519#section-2
+ exp_utc_seconds = json.loads(info_json).get('exp')
+ now_tc_seconds = time.time()
+
+ if now_tc_seconds > exp_utc_seconds:
+ raise TokenExpired
+
+ return super().send_request(method, url, *args, **kwargs)
class Groups(object):
diff --git a/ebrains_drive/exceptions.py b/ebrains_drive/exceptions.py
index cfbf9b16ec..a75331b4dc 100644
--- a/ebrains_drive/exceptions.py
+++ b/ebrains_drive/exceptions.py
@@ -34,3 +34,5 @@ def __str__(self):
return 'Unauthorized. This could be a result of either incorrect path or insufficient privilege. %s' % self.msg
class InvalidParameter(Exception): pass
+
+class TokenExpired(Exception): pass
From 5282c5f46e270a5b3b3ec618bf10cb2f35a3e081 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Tue, 26 Jul 2022 18:09:57 +0200
Subject: [PATCH 12/13] fix driveclient init
---
ebrains_drive/client.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/ebrains_drive/client.py b/ebrains_drive/client.py
index 418e278acb..ec8130d866 100644
--- a/ebrains_drive/client.py
+++ b/ebrains_drive/client.py
@@ -93,7 +93,7 @@ class DriveApiClient(ClientBase):
def __init__(self, username=None, password=None, token=None, env=""):
"""Wraps various basic operations to interact with seahub http api.
"""
- super().__init__(self, username, password, token, env)
+ super().__init__(username, password, token, env)
self._set_env(env)
self.server = self.drive_url
@@ -125,7 +125,6 @@ def send_request(self, method: str, url: str, *args, **kwargs):
url = urljoin(self.server, url)
return super().send_request(method, url, *args, **kwargs)
-
class BucketApiClient(ClientBase):
def __init__(self, username=None, password=None, token=None, env="") -> None:
From 395aa7c54306a580693317892b4c3c763c7525c8 Mon Sep 17 00:00:00 2001
From: Xiao Gui
Date: Thu, 28 Jul 2022 16:21:50 +0200
Subject: [PATCH 13/13] bugfix: set env prior to get token
---
ebrains_drive/client.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/ebrains_drive/client.py b/ebrains_drive/client.py
index ec8130d866..55659fe2a9 100644
--- a/ebrains_drive/client.py
+++ b/ebrains_drive/client.py
@@ -93,8 +93,8 @@ class DriveApiClient(ClientBase):
def __init__(self, username=None, password=None, token=None, env=""):
"""Wraps various basic operations to interact with seahub http api.
"""
- super().__init__(username, password, token, env)
self._set_env(env)
+ super().__init__(username, password, token, env)
self.server = self.drive_url
@@ -128,11 +128,12 @@ def send_request(self, method: str, url: str, *args, **kwargs):
class BucketApiClient(ClientBase):
def __init__(self, username=None, password=None, token=None, env="") -> None:
- super().__init__(username, password, token, env)
if env != "":
raise NotImplementedError("non prod environment for dataproxy access has not yet been implemented.")
-
self._set_env(env)
+
+ super().__init__(username, password, token, env)
+
self.server = "https://data-proxy.ebrains.eu/api"
self.buckets = Buckets(self)