diff --git a/cadetrdm/__init__.py b/cadetrdm/__init__.py index 04c9683..d706e4b 100644 --- a/cadetrdm/__init__.py +++ b/cadetrdm/__init__.py @@ -1,5 +1,4 @@ -__version__ = "1.0.1" - +__version__ = "1.1.0" from cadetrdm.conda_env_utils import prepare_conda_env from cadetrdm.options import Options diff --git a/cadetrdm/logging.py b/cadetrdm/logging.py index e16599d..3dc71e2 100644 --- a/cadetrdm/logging.py +++ b/cadetrdm/logging.py @@ -1,6 +1,6 @@ import csv +import os from pathlib import Path -from typing import Dict, List from tabulate import tabulate @@ -8,12 +8,25 @@ class LogEntry: - def __init__(self, output_repo_commit_message, output_repo_branch, output_repo_commit_hash, - project_repo_commit_hash, project_repo_folder_name, project_repo_remotes, python_sys_args, tags, - options_hash, filepath, **kwargs): + def __init__( + self, + output_repo_commit_message: str, + output_repo_branch: str, + output_repo_commit_hash: str, + project_repo_branch: str, + project_repo_commit_hash: str, + project_repo_folder_name: str, + project_repo_remotes: str, + python_sys_args: str, + tags: str, + options_hash: str, + filepath: os.PathLike, + **kwargs + ): self.output_repo_commit_message = output_repo_commit_message self.output_repo_branch = output_repo_branch self.output_repo_commit_hash = output_repo_commit_hash + self.project_repo_branch = project_repo_branch self.project_repo_commit_hash = project_repo_commit_hash self.project_repo_folder_name = project_repo_folder_name self.project_repo_remotes = project_repo_remotes @@ -117,7 +130,7 @@ def __init__(self, filepath=None): return self._entry_list = self._read_file(filepath) - self.entries: Dict[str, LogEntry] = self._entries_from_entry_list(self._entry_list) + self.entries: dict[str, LogEntry] = self._entries_from_entry_list(self._entry_list) @property def n_entries(self) -> int: @@ -125,13 +138,13 @@ def n_entries(self) -> int: return len(self.entries) @classmethod - def from_list(cls, entry_list: List[List[str]]): + def from_list(cls, entry_list: list[list[str]]): instance = cls() instance._entry_list = entry_list - instance.entries: Dict[str, LogEntry] = instance._entries_from_entry_list(instance._entry_list) + instance.entries: dict[str, LogEntry] = instance._entries_from_entry_list(instance._entry_list) return instance - def _entries_from_entry_list(self, entry_list) -> Dict[str, LogEntry]: + def _entries_from_entry_list(self, entry_list) -> dict[str, LogEntry]: header = self._convert_header(entry_list[0]) if len(header) < 9: header.append("options_hash") diff --git a/cadetrdm/repositories.py b/cadetrdm/repositories.py index ffd108f..aa19fc4 100644 --- a/cadetrdm/repositories.py +++ b/cadetrdm/repositories.py @@ -866,17 +866,36 @@ def _update_version(self, metadata, cadetrdm_version): if SimpleSpec("<0.0.9").match(current_version): changes_were_made = True - self._convert_csv_to_tsv_if_necessary() + self.output_repo._convert_csv_to_tsv_if_necessary() self._add_jupytext_file(self.path) if SimpleSpec("<0.0.24").match(current_version): changes_were_made = True - self._expand_tsv_header() + self.output_repo._expand_tsv_header() output_remotes_path = self.path / "output_remotes.json" delete_path(output_remotes_path) self.add(output_remotes_path) + if SimpleSpec("<=0.0.34").match(current_version): + changes_were_made = True + if self.output_log_file.exists(): + warnings.warn( + "Repo version has outdated headers." + "Updating log.tsv." + ) + self.output_repo._update_headers() if SimpleSpec("<0.0.34").match(current_version): changes_were_made = True - self.fix_gitattributes_log_tsv() + self.output_repo._fix_gitattributes_log_tsv() + if SimpleSpec("<1.1.0").match(current_version): + # Note, this needs to be performed before upating the hashes, otherwise + # instantiating an `OutputLog` will crash when missing the + # `project_repo_branch` attribute. + changes_were_made = True + if self.output_log_file.exists(): + warnings.warn( + "Repo version has missing project repo branch_name field." + "Updating log.tsv." + ) + self.output_repo._add_branch_name_to_log() if SimpleSpec("<0.1.7").match(current_version): changes_were_made = True if self.output_repo.output_log.n_entries > 0: @@ -884,26 +903,21 @@ def _update_version(self, metadata, cadetrdm_version): "Repo version has outdated options hashes. " "Updating option hashes in output log.tsv." ) - self.output_repo.update_log_hashes() - + self.output_repo._update_log_hashes() if changes_were_made: - print(f"Repo version {metadata['cadet_rdm_version']} was outdated. " - f"Current CADET-RDM version is {cadetrdm.__version__}.\n Repo has been updated") + print( + f"Repo version {metadata['cadet_rdm_version']} was outdated. " + f"Current CADET-RDM version is {cadetrdm.__version__}.\n" + "Repo has been updated." + ) metadata["cadet_rdm_version"] = cadetrdm_version with open(self.data_json_path, "w", encoding="utf-8") as f: json.dump(metadata, f, indent=2) self.add(self.data_json_path) - self.commit("update cadetrdm version", add_all=False) - - def fix_gitattributes_log_tsv(self): - file = self.output_path / ".gitattributes" - with open(file, encoding="utf-8") as handle: - lines = handle.readlines() - lines = [line.replace("rdm-log.tsv", "log.tsv") for line in lines] - with open(file, "w", encoding="utf-8") as handle: - handle.writelines(lines) - self.output_repo.add(".gitattributes") - self.output_repo.commit("Update gitattributes") + self.commit( + f"Update CADET-RDM version to {cadetrdm_version}", + add_all=False + ) def _clone_output_repo(self, multi_options: List[str] = None): metadata = self.load_metadata() @@ -1031,57 +1045,6 @@ def output_log_file(self): def output_log(self): return self.output_repo.output_log - def _expand_tsv_header(self): - if not self.output_log_file.exists(): - return - - with open(self.output_log_file, "r", encoding="utf-8") as f: - lines = f.readlines() - - new_header = [ - "Output repo commit message", - "Output repo branch", - "Output repo commit hash", - "Project repo commit hash", - "Project repo folder name", - "Project repo remotes", - "Python sys args", - "Tags", - "Options hash", ] - with open(self.output_log_file, "w", encoding="utf-8") as f: - f.writelines(["\t".join(new_header) + "\n"]) - f.writelines(lines[1:]) - - self.output_repo.add(self.output_log_file) - self.output_repo.commit("Update tsv header", add_all=False) - - def _convert_csv_to_tsv_if_necessary(self): - """ - If not tsv log is found AND a csv log is found, convert the csv to tsv. - - :return: - """ - - if self.output_log_file.exists(): - return - - csv_filepath = self.path / self._output_folder / "log.csv" - if not csv_filepath.exists(): - # We have just initialized the repo and neither tsv nor csv exist. - return - - with open(csv_filepath, encoding="utf-8") as csv_handle: - csv_lines = csv_handle.readlines() - - tsv_lines = [line.replace(",", "\t") for line in csv_lines] - - with open(self.output_log_file, "w", encoding="utf-8") as f: - f.writelines(tsv_lines) - - write_lines_to_file(path=self.path / ".gitattributes", - lines=["rdm-log.tsv merge=union"], - open_type="a") - def update_output_main_logs(self, output_dict: dict = None): """ Dumps all the metadata information about the project repositories state and @@ -1110,6 +1073,7 @@ def update_output_main_logs(self, output_dict: dict = None): output_repo_commit_message=output_commit_message, output_repo_branch=output_branch_name, output_repo_commit_hash=output_repo_hash, + project_repo_branch=str(self.active_branch), project_repo_commit_hash=str(self.head.commit), project_repo_folder_name=self.path.name, project_repo_remotes=self.remote_urls, @@ -1520,7 +1484,112 @@ def output_log(self): self.checkout(self.main_branch) return OutputLog(filepath=self.output_log_file_path) - def update_log_hashes(self): + def print_output_log(self): + self.checkout(self.main_branch) + + output_log = self.output_log + print(output_log) + + self.checkout(self._most_recent_branch) + + def add_filetype_to_lfs(self, file_type): + """ + Add the filetype given in file_type to the GIT-LFS tracking + + :param file_type: + Wildcard formatted string. Examples: "*.png" or "*.xlsx" + :return: + """ + init_lfs(lfs_filetypes=[file_type], path=self.path) + self.add_all_files() + self.commit(f"Add {file_type} to lfs") + + def _convert_csv_to_tsv_if_necessary(self) -> None: + """Convert logfile from csv to tsv format.""" + if self.output_log_file_path.exists(): + return + + csv_filepath = self.path / "log.csv" + if not csv_filepath.exists(): + return + + with open(csv_filepath, encoding="utf-8") as csv_handle: + csv_lines = csv_handle.readlines() + + tsv_lines = [line.replace(",", "\t") for line in csv_lines] + + with open(self.output_log_file_path, "w", encoding="utf-8") as f: + f.writelines(tsv_lines) + + write_lines_to_file( + path=self.path / ".gitattributes", + lines=["rdm-log.tsv merge=union"], + open_type="a" + ) + + def _expand_tsv_header(self): + """Update tsv header.""" + if not self.output_log_file_path.exists(): + return + + with open(self.output_log_file_path, "r", encoding="utf-8") as f: + lines = f.readlines() + + new_header = [ + "Output repo commit message", + "Output repo branch", + "Output repo commit hash", + "Project repo commit hash", + "Project repo folder name", + "Project repo remotes", + "Python sys args", + "Tags", + "Options hash", ] + with open(self.output_log_file_path, "w", encoding="utf-8") as f: + f.writelines(["\t".join(new_header) + "\n"]) + f.writelines(lines[1:]) + + self.add(self.output_log_file_path) + self.commit("Update tsv header", add_all=False) + + def _update_headers(self): + """Update tsv header.""" + if not self.output_log_file_path.exists(): + return + + with open(self.output_log_file_path, "r", encoding="utf-8") as f: + lines = f.readlines() + + new_header = [ + "output_repo_commit_message", + "output_repo_branch", + "output_repo_commit_hash", + "project_repo_commit_hash", + "project_repo_folder_name", + "project_repo_remotes", + "python_sys_args", + "tags", + "options_hash", + ] + with open(self.output_log_file_path, "w", encoding="utf-8") as f: + f.writelines(["\t".join(new_header) + "\n"]) + f.writelines(lines[1:]) + + self.add(self.output_log_file_path) + self.commit("Update tsv header", add_all=False) + + def _fix_gitattributes_log_tsv(self): + """Update .gitattributes to account for changed logfile name.""" + file = self.path / ".gitattributes" + with open(file, encoding="utf-8") as handle: + lines = handle.readlines() + lines = [line.replace("rdm-log.tsv", "log.tsv") for line in lines] + with open(file, "w", encoding="utf-8") as handle: + handle.writelines(lines) + self.add(".gitattributes") + self.commit("Update .gitattributes", add_all=False) + + def _update_log_hashes(self): if self.has_uncomitted_changes: self._reset_hard_to_head(force_entry=True) if not self.active_branch == self.main_branch: @@ -1543,25 +1612,38 @@ def update_log_hashes(self): log.write() self.commit(message="Updated log hashes", add_all=True) - def print_output_log(self): - self.checkout(self.main_branch) - - output_log = self.output_log - print(output_log) - - self.checkout(self._most_recent_branch) - - def add_filetype_to_lfs(self, file_type): + def _add_branch_name_to_log(self) -> None: """ - Add the filetype given in file_type to the GIT-LFS tracking + Update the TSV file by adding a 'project_repo_branch' column. - :param file_type: - Wildcard formatted string. Examples: "*.png" or "*.xlsx" - :return: + The branch name is extracted from the 'output_repo_commit_message' field. """ - init_lfs(lfs_filetypes=[file_type], path=self.path) - self.add_all_files() - self.commit(f"Add {file_type} to lfs") + self.checkout(self.main_branch) + + with open(self.output_log_file_path, "r") as f: + reader = csv.DictReader(f, delimiter="\t") + rows = list(reader) + fieldnames = list(rows[0].keys()) + + # Add new column to header if not present + if "project_repo_branch" not in rows[0]: + for row in rows: + commit_msg = row["output_repo_branch"] + branch = commit_msg.split("_")[2] + row["project_repo_branch"] = branch + + if "project_repo_branch" not in fieldnames: + # Insert the new column at position 3 + fieldnames.insert(3, "project_repo_branch") + + # Write updated data back to file + with open(self.output_log_file_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter="\t") + writer.writeheader() + writer.writerows(rows) + + self.add("log.tsv") + self.commit(message="Add project_repo_branch_name to log.tsv") class JupyterInterfaceRepo(ProjectRepo):