Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
014b885
rebase packaging functions to shared module
nkrabben Apr 22, 2024
fb7ae66
remove now-shared functions
nkrabben Apr 22, 2024
0de76bd
genericize file moving, TODO tests
nkrabben Apr 22, 2024
e294b3e
minimal version of script
nkrabben Apr 22, 2024
f4e148f
refactor move functions to be cleaner
nkrabben Apr 22, 2024
51c30e4
add acq_dir creation
nkrabben Apr 22, 2024
df99b73
minimal working version to run on acquisition
nkrabben Apr 22, 2024
65d5344
formatting
nkrabben May 28, 2024
18177b5
tests and code to validate filesets
nkrabben May 28, 2024
6913a27
add more bagging functions, untested
nkrabben May 28, 2024
8c117d3
initial functioning bag and move, untested
nkrabben May 28, 2024
3afa237
formatting
nkrabben May 28, 2024
de7076a
add test files for package images
nkrabben May 28, 2024
8bbb8f0
clean up moving methods
nkrabben May 28, 2024
f523be2
make bag creation more generic
nkrabben May 29, 2024
1e8ae61
expand bag validation
nkrabben May 29, 2024
6192a15
adjust argument requirement
nkrabben May 29, 2024
37d30c7
fix validation to report result
nkrabben May 29, 2024
3378972
fix bug from refactor
nkrabben May 29, 2024
b4a9e8d
move more functions into base
nkrabben May 29, 2024
4dc3ec5
clean up
nkrabben May 29, 2024
76ead5e
fix acqid carrierid confusion
nkrabben May 29, 2024
ea29daa
first running package ft
nkrabben May 29, 2024
2e50139
add new tools to entrypoints
nkrabben May 30, 2024
850b7c7
improve rsync log handling
nkrabben May 31, 2024
22c697b
add new validation
nkrabben May 31, 2024
78b7582
fix typo
nkrabben May 31, 2024
878ea29
fix a bug from rsync hash choice
nkrabben May 31, 2024
4b7ea96
cleanup empty folders post packaging
nkrabben May 31, 2024
075f5ba
add rsync to package script
nkrabben May 31, 2024
7adbe71
move to single source folder for packaging
nkrabben Jun 20, 2024
4023841
formatting
nkrabben Jun 20, 2024
83b3729
fix bug with manifest new lines
nkrabben Jun 20, 2024
34b8137
catch when streams contains folders
nkrabben Jun 20, 2024
8293743
initial commit on report transfers
nkrabben Aug 12, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ lxml = "^4.9.3"
bagit = "^1.8.1"

[tool.poetry.scripts]
report_ftk_extents = 'digarch_scripts.report_ftk_extents:main'
report_hdd_extents = 'digarch_scripts.report_hdd_extents:main'
package_cloud = 'ipres_package_cloud.package_cloud:main'
report_ftk_extents = 'digarch_scripts.report.report_ftk_extents:main'
report_hdd_extents = 'digarch_scripts.report.report_hdd_extents:main'
package_cloud = 'digarch_scripts.package.package_cloud:main'
package_images = 'digarch_scripts.package.package_images:main'
package_filetransfer = 'digarch_scripts.package.package_filetransfer:main'
transfer_rsync = 'digarch_scripts.transfer.transfer_rsync:main'

[tool.poetry.group.dev.dependencies]
nox = "^2023.4.22"
Expand Down
3 changes: 3 additions & 0 deletions rsync.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
2024/05/29 15:07:45 [46235] building file list
2024/05/29 15:07:45 [46235] , tests/fixtures/rsync/rsync_files/file.01, 3072, f075a8d6d4df7509d39a3140bbae9fcd
2024/05/29 15:07:45 [46235] sent 3206 bytes received 41 bytes total size 3072
80 changes: 47 additions & 33 deletions src/digarch_scripts/lint/lint_ft.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

LOGGER = logging.getLogger(__name__)


def _configure_logging(log_folder: Path):
log_fn = datetime.now().strftime("lint_%Y_%m_%d_%H_%M.log")
log_fpath = log_folder / log_fn
Expand All @@ -21,15 +22,14 @@ def _configure_logging(log_folder: Path):
encoding="utf-8",
)


def parse_args() -> argparse.Namespace:
"""Validate and return command-line args"""

def extant_dir(p):
path = Path(p)
if not path.is_dir():
raise argparse.ArgumentTypeError(
f'{path} does not exist'
)
raise argparse.ArgumentTypeError(f"{path} does not exist")
return path

def list_of_paths(p):
Expand All @@ -43,28 +43,21 @@ def list_of_paths(p):
parser = argparse.ArgumentParser()

parser.add_argument(
'--package',
type=extant_dir,
nargs='+',
dest='packages',
action='extend'
"--package", type=extant_dir, nargs="+", dest="packages", action="extend"
)
parser.add_argument(
'--directory',
type=list_of_paths,
dest='packages',
action='extend'
"--directory", type=list_of_paths, dest="packages", action="extend"
)
parser.add_argument(
'--log_folder',
help='''Optional. Designate where to save the log file,
or it will be saved in current directory''',
default='.'
"--log_folder",
help="""Optional. Designate where to save the log file,
or it will be saved in current directory""",
default=".",
)


return parser.parse_args()


def package_has_valid_name(package: Path) -> bool:
"""Top level folder name has to conform to ACQ_####_######"""
folder_name = package.name
Expand All @@ -76,15 +69,17 @@ def package_has_valid_name(package: Path) -> bool:
LOGGER.error(f"{folder_name} does not conform to ACQ_####_######")
return False


def package_has_two_subfolders(package: Path) -> bool:
"""There must be two subfolders in the package"""
pkg_folders = [ x for x in package.iterdir() if x.is_dir() ]
pkg_folders = [x for x in package.iterdir() if x.is_dir()]
if len(pkg_folders) == 2:
return True
else:
LOGGER.error(f"{package} does not have exactly two subfolders")
return False


def package_has_valid_subfolder_names(package: Path) -> bool:
"""Second level folders must be objects and metadata folder"""
expected = set(["objects", "metadata"])
Expand All @@ -98,6 +93,7 @@ def package_has_valid_subfolder_names(package: Path) -> bool:
)
return False


def package_has_no_hidden_file(package: Path) -> bool:
"""The package should not have any hidden file"""
hidden_ls = [
Expand All @@ -111,17 +107,19 @@ def package_has_no_hidden_file(package: Path) -> bool:
else:
return True


def package_has_no_zero_bytes_file(package: Path) -> bool:
"""The package should not have any zero bytes file"""
all_file = [ f for f in package.rglob("*") if f.is_file() ]
zero_bytes_ls = [ f for f in all_file if f.stat().st_size == 0 ]
all_file = [f for f in package.rglob("*") if f.is_file()]
zero_bytes_ls = [f for f in all_file if f.stat().st_size == 0]

if zero_bytes_ls:
LOGGER.warning(f"{package.name} has zero bytes file {zero_bytes_ls}")
return False
else:
return True


def metadata_folder_is_flat(package: Path) -> bool:
"""The metadata folder should not have folder structure"""
metadata_path = package / "metadata"
Expand All @@ -132,40 +130,49 @@ def metadata_folder_is_flat(package: Path) -> bool:
else:
return True


def metadata_folder_has_files(package: Path) -> bool:
"""The metadata folder should have one or more file"""
metadata_path = package / "metadata"
md_files_ls = [ x for x in metadata_path.rglob("*") if x.is_file() ]
md_files_ls = [x for x in metadata_path.rglob("*") if x.is_file()]
if md_files_ls:
return True
else:
LOGGER.warning(f"{package.name} metadata folder does not have any files")
return False


def metadata_has_correct_naming_convention(package: Path) -> bool:
"""The metadata file name should be in the accepted list"""
metadata_path = package / "metadata"
accepted_fn = ["rclone.log"]

md_files_ls = [ x for x in metadata_path.rglob("*") if x.is_file() ]
md_files_ls = [x for x in metadata_path.rglob("*") if x.is_file()]
nonconforming = []
for file in md_files_ls:
if not file.name in accepted_fn:
nonconforming.append(file)

if nonconforming:
LOGGER.error(f"""{package.name} has nonconforming metadata file(s):
{nonconforming}""")
LOGGER.error(
f"""{package.name} has nonconforming metadata file(s):
{nonconforming}"""
)
return False
else:
return True


def objects_folder_correct_structure(package: Path) -> bool:
"""objects folder should have a data folder, which includes four files:
bag-info.txt, bagit.txt, manifest-md5.txt and tagmanifest-md5.txt"""
expected_paths = []
expected_files = ["bag-info.txt", "bagit.txt",
"manifest-md5.txt", "tagmanifest-md5.txt"]
expected_files = [
"bag-info.txt",
"bagit.txt",
"manifest-md5.txt",
"tagmanifest-md5.txt",
]
missing = []

data_folder = package / "objects" / "data"
Expand All @@ -180,16 +187,19 @@ def objects_folder_correct_structure(package: Path) -> bool:
missing.append(fp.name)

if missing:
LOGGER.error(f"""{package.name} has incorrect structure.
missing {missing}""")
LOGGER.error(
f"""{package.name} has incorrect structure.
missing {missing}"""
)
return False
else:
return True


def objects_folder_has_no_empty_folder(package: Path) -> bool:
"""The objects folder should not have any empty folders"""
objects_path = package / "objects"
folder_in_obj = [ x for x in objects_path.rglob("*") if x.is_dir() ]
folder_in_obj = [x for x in objects_path.rglob("*") if x.is_dir()]
empty = []

for folder in folder_in_obj:
Expand All @@ -202,14 +212,15 @@ def objects_folder_has_no_empty_folder(package: Path) -> bool:
else:
return True


def lint_package(package: Path) -> Literal["valide", "invalide", "needs review"]:
"""Run all linting tests against a package"""
result = "valid"

less_strict_tests = [
package_has_no_hidden_file,
package_has_no_zero_bytes_file,
metadata_folder_has_files
metadata_folder_has_files,
]

for test in less_strict_tests:
Expand All @@ -223,7 +234,7 @@ def lint_package(package: Path) -> Literal["valide", "invalide", "needs review"]
metadata_folder_is_flat,
metadata_has_correct_naming_convention,
objects_folder_correct_structure,
objects_folder_has_no_empty_folder
objects_folder_has_no_empty_folder,
]

for test in strict_tests:
Expand All @@ -232,6 +243,7 @@ def lint_package(package: Path) -> Literal["valide", "invalide", "needs review"]

return result


def main():
args = parse_args()
_configure_logging(args.log_folder)
Expand Down Expand Up @@ -266,7 +278,9 @@ def main():
print(
f"""
The following {len(needs_review)} packages need review.
They may be passed without change after review: {needs_review}""")
They may be passed without change after review: {needs_review}"""
)


if __name__ == "__main__":
main()
main()
Loading