Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions cadetrdm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
__version__ = "1.0.1"

__version__ = "1.1.0"

from cadetrdm.conda_env_utils import prepare_conda_env
from cadetrdm.options import Options
Expand Down
29 changes: 21 additions & 8 deletions cadetrdm/logging.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,32 @@
import csv
import os
from pathlib import Path
from typing import Dict, List

from tabulate import tabulate

from cadetrdm.environment import Environment


class LogEntry:
def __init__(self, output_repo_commit_message, output_repo_branch, output_repo_commit_hash,
project_repo_commit_hash, project_repo_folder_name, project_repo_remotes, python_sys_args, tags,
options_hash, filepath, **kwargs):
def __init__(
self,
output_repo_commit_message: str,
output_repo_branch: str,
output_repo_commit_hash: str,
project_repo_branch: str,
project_repo_commit_hash: str,
project_repo_folder_name: str,
project_repo_remotes: str,
python_sys_args: str,
tags: str,
options_hash: str,
filepath: os.PathLike,
**kwargs
):
self.output_repo_commit_message = output_repo_commit_message
self.output_repo_branch = output_repo_branch
self.output_repo_commit_hash = output_repo_commit_hash
self.project_repo_branch = project_repo_branch
self.project_repo_commit_hash = project_repo_commit_hash
self.project_repo_folder_name = project_repo_folder_name
self.project_repo_remotes = project_repo_remotes
Expand Down Expand Up @@ -117,21 +130,21 @@ def __init__(self, filepath=None):
return

self._entry_list = self._read_file(filepath)
self.entries: Dict[str, LogEntry] = self._entries_from_entry_list(self._entry_list)
self.entries: dict[str, LogEntry] = self._entries_from_entry_list(self._entry_list)

@property
def n_entries(self) -> int:
"""int: Number of results stored in the repository."""
return len(self.entries)

@classmethod
def from_list(cls, entry_list: List[List[str]]):
def from_list(cls, entry_list: list[list[str]]):
instance = cls()
instance._entry_list = entry_list
instance.entries: Dict[str, LogEntry] = instance._entries_from_entry_list(instance._entry_list)
instance.entries: dict[str, LogEntry] = instance._entries_from_entry_list(instance._entry_list)
return instance

def _entries_from_entry_list(self, entry_list) -> Dict[str, LogEntry]:
def _entries_from_entry_list(self, entry_list) -> dict[str, LogEntry]:
header = self._convert_header(entry_list[0])
if len(header) < 9:
header.append("options_hash")
Expand Down
254 changes: 168 additions & 86 deletions cadetrdm/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,44 +866,58 @@ def _update_version(self, metadata, cadetrdm_version):

if SimpleSpec("<0.0.9").match(current_version):
changes_were_made = True
self._convert_csv_to_tsv_if_necessary()
self.output_repo._convert_csv_to_tsv_if_necessary()
self._add_jupytext_file(self.path)
if SimpleSpec("<0.0.24").match(current_version):
changes_were_made = True
self._expand_tsv_header()
self.output_repo._expand_tsv_header()
output_remotes_path = self.path / "output_remotes.json"
delete_path(output_remotes_path)
self.add(output_remotes_path)
if SimpleSpec("<=0.0.34").match(current_version):
changes_were_made = True
if self.output_log_file.exists():
warnings.warn(
"Repo version has outdated headers."
"Updating log.tsv."
)
self.output_repo._update_headers()
if SimpleSpec("<0.0.34").match(current_version):
changes_were_made = True
self.fix_gitattributes_log_tsv()
self.output_repo._fix_gitattributes_log_tsv()
if SimpleSpec("<1.1.0").match(current_version):
# Note, this needs to be performed before upating the hashes, otherwise
# instantiating an `OutputLog` will crash when missing the
# `project_repo_branch` attribute.
changes_were_made = True
if self.output_log_file.exists():
warnings.warn(
"Repo version has missing project repo branch_name field."
"Updating log.tsv."
)
self.output_repo._add_branch_name_to_log()
if SimpleSpec("<0.1.7").match(current_version):
changes_were_made = True
if self.output_repo.output_log.n_entries > 0:
warnings.warn(
"Repo version has outdated options hashes. "
"Updating option hashes in output log.tsv."
)
self.output_repo.update_log_hashes()

self.output_repo._update_log_hashes()
if changes_were_made:
print(f"Repo version {metadata['cadet_rdm_version']} was outdated. "
f"Current CADET-RDM version is {cadetrdm.__version__}.\n Repo has been updated")
print(
f"Repo version {metadata['cadet_rdm_version']} was outdated. "
f"Current CADET-RDM version is {cadetrdm.__version__}.\n"
"Repo has been updated."
)
metadata["cadet_rdm_version"] = cadetrdm_version
with open(self.data_json_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=2)
self.add(self.data_json_path)
self.commit("update cadetrdm version", add_all=False)

def fix_gitattributes_log_tsv(self):
file = self.output_path / ".gitattributes"
with open(file, encoding="utf-8") as handle:
lines = handle.readlines()
lines = [line.replace("rdm-log.tsv", "log.tsv") for line in lines]
with open(file, "w", encoding="utf-8") as handle:
handle.writelines(lines)
self.output_repo.add(".gitattributes")
self.output_repo.commit("Update gitattributes")
self.commit(
f"Update CADET-RDM version to {cadetrdm_version}",
add_all=False
)

def _clone_output_repo(self, multi_options: List[str] = None):
metadata = self.load_metadata()
Expand Down Expand Up @@ -1031,57 +1045,6 @@ def output_log_file(self):
def output_log(self):
return self.output_repo.output_log

def _expand_tsv_header(self):
if not self.output_log_file.exists():
return

with open(self.output_log_file, "r", encoding="utf-8") as f:
lines = f.readlines()

new_header = [
"Output repo commit message",
"Output repo branch",
"Output repo commit hash",
"Project repo commit hash",
"Project repo folder name",
"Project repo remotes",
"Python sys args",
"Tags",
"Options hash", ]
with open(self.output_log_file, "w", encoding="utf-8") as f:
f.writelines(["\t".join(new_header) + "\n"])
f.writelines(lines[1:])

self.output_repo.add(self.output_log_file)
self.output_repo.commit("Update tsv header", add_all=False)

def _convert_csv_to_tsv_if_necessary(self):
"""
If not tsv log is found AND a csv log is found, convert the csv to tsv.

:return:
"""

if self.output_log_file.exists():
return

csv_filepath = self.path / self._output_folder / "log.csv"
if not csv_filepath.exists():
# We have just initialized the repo and neither tsv nor csv exist.
return

with open(csv_filepath, encoding="utf-8") as csv_handle:
csv_lines = csv_handle.readlines()

tsv_lines = [line.replace(",", "\t") for line in csv_lines]

with open(self.output_log_file, "w", encoding="utf-8") as f:
f.writelines(tsv_lines)

write_lines_to_file(path=self.path / ".gitattributes",
lines=["rdm-log.tsv merge=union"],
open_type="a")

def update_output_main_logs(self, output_dict: dict = None):
"""
Dumps all the metadata information about the project repositories state and
Expand Down Expand Up @@ -1110,6 +1073,7 @@ def update_output_main_logs(self, output_dict: dict = None):
output_repo_commit_message=output_commit_message,
output_repo_branch=output_branch_name,
output_repo_commit_hash=output_repo_hash,
project_repo_branch=str(self.active_branch),
project_repo_commit_hash=str(self.head.commit),
project_repo_folder_name=self.path.name,
project_repo_remotes=self.remote_urls,
Expand Down Expand Up @@ -1520,7 +1484,112 @@ def output_log(self):
self.checkout(self.main_branch)
return OutputLog(filepath=self.output_log_file_path)

def update_log_hashes(self):
def print_output_log(self):
self.checkout(self.main_branch)

output_log = self.output_log
print(output_log)

self.checkout(self._most_recent_branch)

def add_filetype_to_lfs(self, file_type):
"""
Add the filetype given in file_type to the GIT-LFS tracking

:param file_type:
Wildcard formatted string. Examples: "*.png" or "*.xlsx"
:return:
"""
init_lfs(lfs_filetypes=[file_type], path=self.path)
self.add_all_files()
self.commit(f"Add {file_type} to lfs")

def _convert_csv_to_tsv_if_necessary(self) -> None:
"""Convert logfile from csv to tsv format."""
if self.output_log_file_path.exists():
return

csv_filepath = self.path / "log.csv"
if not csv_filepath.exists():
return

with open(csv_filepath, encoding="utf-8") as csv_handle:
csv_lines = csv_handle.readlines()

tsv_lines = [line.replace(",", "\t") for line in csv_lines]

with open(self.output_log_file_path, "w", encoding="utf-8") as f:
f.writelines(tsv_lines)

write_lines_to_file(
path=self.path / ".gitattributes",
lines=["rdm-log.tsv merge=union"],
open_type="a"
)

def _expand_tsv_header(self):
"""Update tsv header."""
if not self.output_log_file_path.exists():
return

with open(self.output_log_file_path, "r", encoding="utf-8") as f:
lines = f.readlines()

new_header = [
"Output repo commit message",
"Output repo branch",
"Output repo commit hash",
"Project repo commit hash",
"Project repo folder name",
"Project repo remotes",
"Python sys args",
"Tags",
"Options hash", ]
with open(self.output_log_file_path, "w", encoding="utf-8") as f:
f.writelines(["\t".join(new_header) + "\n"])
f.writelines(lines[1:])

self.add(self.output_log_file_path)
self.commit("Update tsv header", add_all=False)

def _update_headers(self):
"""Update tsv header."""
if not self.output_log_file_path.exists():
return

with open(self.output_log_file_path, "r", encoding="utf-8") as f:
lines = f.readlines()

new_header = [
"output_repo_commit_message",
"output_repo_branch",
"output_repo_commit_hash",
"project_repo_commit_hash",
"project_repo_folder_name",
"project_repo_remotes",
"python_sys_args",
"tags",
"options_hash",
]
with open(self.output_log_file_path, "w", encoding="utf-8") as f:
f.writelines(["\t".join(new_header) + "\n"])
f.writelines(lines[1:])

self.add(self.output_log_file_path)
self.commit("Update tsv header", add_all=False)

def _fix_gitattributes_log_tsv(self):
"""Update .gitattributes to account for changed logfile name."""
file = self.path / ".gitattributes"
with open(file, encoding="utf-8") as handle:
lines = handle.readlines()
lines = [line.replace("rdm-log.tsv", "log.tsv") for line in lines]
with open(file, "w", encoding="utf-8") as handle:
handle.writelines(lines)
self.add(".gitattributes")
self.commit("Update .gitattributes", add_all=False)

def _update_log_hashes(self):
if self.has_uncomitted_changes:
self._reset_hard_to_head(force_entry=True)
if not self.active_branch == self.main_branch:
Expand All @@ -1543,25 +1612,38 @@ def update_log_hashes(self):
log.write()
self.commit(message="Updated log hashes", add_all=True)

def print_output_log(self):
self.checkout(self.main_branch)

output_log = self.output_log
print(output_log)

self.checkout(self._most_recent_branch)

def add_filetype_to_lfs(self, file_type):
def _add_branch_name_to_log(self) -> None:
"""
Add the filetype given in file_type to the GIT-LFS tracking
Update the TSV file by adding a 'project_repo_branch' column.

:param file_type:
Wildcard formatted string. Examples: "*.png" or "*.xlsx"
:return:
The branch name is extracted from the 'output_repo_commit_message' field.
"""
init_lfs(lfs_filetypes=[file_type], path=self.path)
self.add_all_files()
self.commit(f"Add {file_type} to lfs")
self.checkout(self.main_branch)

with open(self.output_log_file_path, "r") as f:
reader = csv.DictReader(f, delimiter="\t")
rows = list(reader)
fieldnames = list(rows[0].keys())

# Add new column to header if not present
if "project_repo_branch" not in rows[0]:
for row in rows:
commit_msg = row["output_repo_branch"]
branch = commit_msg.split("_")[2]
row["project_repo_branch"] = branch

if "project_repo_branch" not in fieldnames:
# Insert the new column at position 3
fieldnames.insert(3, "project_repo_branch")

# Write updated data back to file
with open(self.output_log_file_path, "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter="\t")
writer.writeheader()
writer.writerows(rows)

self.add("log.tsv")
self.commit(message="Add project_repo_branch_name to log.tsv")


class JupyterInterfaceRepo(ProjectRepo):
Expand Down