From 40fbc978cf9a9ecdd8262bd1ce24e5698d29636e Mon Sep 17 00:00:00 2001 From: Melvin Koh Date: Wed, 5 Feb 2025 15:31:35 +0800 Subject: [PATCH 1/4] Add additional checks on project owner, because listing projects in CDSW includes other users, hence the API can return multiple projects with same name but different owners/creators. --- cmlutils/projects.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmlutils/projects.py b/cmlutils/projects.py index 11d83c1..ca9417a 100644 --- a/cmlutils/projects.py +++ b/cmlutils/projects.py @@ -358,7 +358,10 @@ def get_creator_username(self): if project_list: for project in project_list: - if project["name"] == self.project_name: + # If exporting from CDSW, it is possible that project lists can contain other users' projects, + # so there could be projects that has the same name but belong to other users. To ensure that + # we identify the correct project, we need to compare the project owner's name too. + if project["name"] == self.project_name and project["owner"]["username"] == self.username: if project["owner"]["type"] == constants.ORGANIZATION_TYPE: return ( project["owner"]["username"], From dd2b30c1d4a1f57bdf3e7412d8d7c9975067bd3e Mon Sep 17 00:00:00 2001 From: Melvin Koh Date: Fri, 7 Feb 2025 00:31:24 +0800 Subject: [PATCH 2/4] Add a variable to store the generated apiv2 key for repeated use, instead of generating a new key everything the apiv2 key is used --- cmlutils/base.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cmlutils/base.py b/cmlutils/base.py index 9d52973..f8cb8e0 100644 --- a/cmlutils/base.py +++ b/cmlutils/base.py @@ -9,6 +9,9 @@ class BaseWorkspaceInteractor(object): + # use a variable to store the apiv2 key for repeated use instead + _apiv2_key = None + def __init__( self, host: str, @@ -27,6 +30,9 @@ def __init__( @property def apiv2_key(self) -> str: + if self._apiv2_key: + return self._apiv2_key + endpoint = Template(ApiV1Endpoints.API_KEY.value).substitute( username=self.username ) @@ -44,8 +50,8 @@ def apiv2_key(self) -> str: ca_path=self.ca_path, ) response_dict = response.json() - _apiv2_key = response_dict["apiKey"] - return _apiv2_key + self._apiv2_key = response_dict["apiKey"] + return self._apiv2_key def remove_cdswctl_dir(self, file_path: str): if os.path.exists(file_path): From 7ebf3607b4ebb25c6c85cf780a6f28d36b105c5e Mon Sep 17 00:00:00 2001 From: Melvin Koh Date: Fri, 7 Feb 2025 09:32:11 +0800 Subject: [PATCH 3/4] DSE-37305: Fixed projects import where cmlutil was trying to generate apiv2 key using a team's username which result in 500 server error. --- cmlutils/project_entrypoint.py | 25 ++++++++++++++----------- cmlutils/projects.py | 16 +++++++++++++--- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/cmlutils/project_entrypoint.py b/cmlutils/project_entrypoint.py index a5e0ce6..4dedb4e 100644 --- a/cmlutils/project_entrypoint.py +++ b/cmlutils/project_entrypoint.py @@ -274,7 +274,12 @@ def project_import_cmd(project_name, verify): uses_engine = True project_metadata.pop("default_project_engine_type", None) - project_id = p.check_project_exist(project_metadata["name"]) + # check if the project to be imported is a team's project + if "team_name" in project_metadata: + project_id = p.check_project_exist(project_metadata["name"], project_metadata["team_name"]) + else: + project_id = p.check_project_exist(project_metadata["name"]) + if project_id == None: logging.info( "Creating project %s to migrate files and metadata.", project_name @@ -288,22 +293,20 @@ def project_import_cmd(project_name, verify): if "team_name" in project_metadata: username = project_metadata["team_name"] creator_username, project_slug = p.get_creator_username() - pimport = ProjectImporter( - host=url, - username=username, - project_name=project_name, - api_key=apiv1_key, - top_level_dir=local_directory, - ca_path=ca_path, - project_slug=project_slug, - ) + + # reuse the ProjectImporter obj since it already generated the apiv2 key + # this fixed the bug of team projects import where cmlutil was trying to + # generate apiv2 key using the team's username + pimport = p + pimport.username = username + pimport.project_slug = project_slug + start_time = time.time() if verify: import_diff_file_list=pimport.transfer_project(log_filedir=log_filedir, verify=True) else: pimport.transfer_project(log_filedir=log_filedir) - if uses_engine: proj_patch_metadata = {"default_project_engine_type": "legacy_engine"} pimport.convert_project_to_engine_based( diff --git a/cmlutils/projects.py b/cmlutils/projects.py index ca9417a..ecf98a1 100644 --- a/cmlutils/projects.py +++ b/cmlutils/projects.py @@ -358,7 +358,7 @@ def get_creator_username(self): if project_list: for project in project_list: - # If exporting from CDSW, it is possible that project lists can contain other users' projects, + # It is possible that project lists can contain other users' public projects, or team's projects # so there could be projects that has the same name but belong to other users. To ensure that # we identify the correct project, we need to compare the project owner's name too. if project["name"] == self.project_name and project["owner"]["username"] == self.username: @@ -1246,7 +1246,7 @@ def get_all_runtimes_v2(self, page_token=""): return result_list return None - def check_project_exist(self, project_name: str) -> str: + def check_project_exist(self, project_name: str, team_name: str = None) -> str: try: search_option = {"name": project_name} encoded_option = urllib.parse.quote( @@ -1263,9 +1263,19 @@ def check_project_exist(self, project_name: str) -> str: ca_path=self.ca_path, ) project_list = response.json()["projects"] + + # If the project is a team's project, then the owner of the project is the team + if team_name: + owner = team_name + else: + owner = self.username + + # It is possible that project lists can contain other users' public projects, or team's projects + # so there could be projects that has the same name but belong to other users. To ensure that + # we identify the correct project, we need to compare the project owner's name too. if project_list: for project in project_list: - if project["name"] == project_name: + if project["name"] == project_name and project["owner"]["username"] == owner: return project["id"] return None except KeyError as e: From c5156eb24e35816d94cb1df2960d0ee0fef589f5 Mon Sep 17 00:00:00 2001 From: Melvin Koh Date: Tue, 11 Feb 2025 00:25:16 +0800 Subject: [PATCH 4/4] Fixed issue with project export/import if the project name has back slashes --- cmlutils/projects.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cmlutils/projects.py b/cmlutils/projects.py index ecf98a1..bb81767 100644 --- a/cmlutils/projects.py +++ b/cmlutils/projects.py @@ -324,9 +324,11 @@ def get_creator_username(self): # Handle Pagination if exists while next_page_exists: # Note - projectName param makes LIKE query not the exact match + # If projectname has back slashes, it need to be escaped before passing to the API + escaped_project_name = self.project_name.replace("\\", "\\\\") endpoint = Template(ApiV1Endpoints.PROJECTS_SUMMARY.value).substitute( username=self.username, - projectName=self.project_name, + projectName=escaped_project_name, limit=constants.MAX_API_PAGE_LENGTH, offset=offset * constants.MAX_API_PAGE_LENGTH, ) @@ -930,9 +932,11 @@ def get_creator_username(self): # Handle Pagination if exists while next_page_exists: # Note - projectName param makes LIKE query not the exact match + # If projectname has back slashes, it need to be escaped before passing to the API + escaped_project_name = self.project_name.replace("\\", "\\\\") endpoint = Template(ApiV1Endpoints.PROJECTS_SUMMARY.value).substitute( username=self.username, - projectName=self.project_name, + projectName=escaped_project_name, limit=constants.MAX_API_PAGE_LENGTH, offset=offset * constants.MAX_API_PAGE_LENGTH, )