From 3a11c32cfd3c0f81cb92b30fcacaeeb3801847fc Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 27 Jan 2026 16:56:53 +0100 Subject: [PATCH 01/12] Start developing check-samples: implement 'stats' subcommand --- scripts/check-samples.py | 117 +++++++++++++++++++++++++++++++++++++++ setup.py | 1 + 2 files changed, 118 insertions(+) create mode 100644 scripts/check-samples.py diff --git a/scripts/check-samples.py b/scripts/check-samples.py new file mode 100644 index 0000000..1905027 --- /dev/null +++ b/scripts/check-samples.py @@ -0,0 +1,117 @@ +#! python +"""Check Samples and help upgrading icat.server to 7.0 + +This script is supposed to run various checks and maintenance tasks on +Samples in an ICAT server. It is mostly aimed at assisting an upgrade +of icat.server to version 7.0 which requires the Sample.pid attribute +to be populated with unique non-null values. +""" + +import logging +import re +import icat +import icat.config +from icat.query import Query + +logging.NOTICE = logging.INFO + 5 +logging.addLevelName(logging.NOTICE, "NOTICE") +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") +logger = logging.getLogger(__name__) + +# ============================= helper =============================== + +def get_pid_prefixes(client): + prefixes = set() + query = Query(client, "Sample", conditions={ + "pid": "LIKE '%:%'" + }, attributes=["pid"], aggregate="DISTINCT") + for pid in client.searchChunked(query): + p, _ = pid.split(':', maxsplit=1) + prefixes.add(p) + return prefixes + +def find_potential_upgrade_conflicts(client, prefix): + auto_pid_re = re.compile("%s:\d+" % prefix) + query = Query(client, "Sample", conditions={ + "pid": "LIKE '%s:%%'" % prefix + }) + for sample in client.searchChunked(query): + pid = str(sample.pid) + if not auto_pid_re.fullmatch(pid): + continue + p, i = pid.split(':') + if int(i) != sample.id: + yield sample.id, pid + +# ============================= stats ================================ +# The stats subcommand: provide some statistics and predict whether +# there are any obstacles for the schema upgrade. + +def cmd_stats(client, conf): + have_warning = False + + query = Query(client, "Sample", aggregate="COUNT") + num_samples = client.assertedSearch(query)[0] + logger.info("number of samples: %d", num_samples) + + query = Query(client, "Sample", conditions={ + "pid": "IS NOT NULL" + }, aggregate="COUNT") + num_samples_pid = client.assertedSearch(query)[0] + logger.info("number of samples having pid set: %d", num_samples_pid) + assert num_samples_pid <= num_samples + + query = Query(client, "Sample", conditions={ + "pid": "IS NOT NULL" + }, attributes=["pid"], aggregate="COUNT:DISTINCT") + num_pid_values = client.assertedSearch(query)[0] + logger.info("number of distinct pid values: %d", num_pid_values) + assert num_pid_values <= num_samples_pid + if num_pid_values < num_samples_pid: + logger.warning("there are duplicate pid values") + have_warning = True + + query = Query(client, "Sample", conditions={ + "pid": "IS NULL" + }, aggregate="COUNT") + num_samples_nopid = client.assertedSearch(query)[0] + logger.info("number of samples having no pid set: %d", num_samples_nopid) + assert num_samples_nopid <= num_samples + assert num_samples_pid + num_samples_nopid == num_samples + + prefixes = get_pid_prefixes(client) + if prefixes: + prefix_list = ",".join(("'%s'" % p for p in sorted(prefixes))) + logger.info("prefixes in use in sample pids: %s", prefix_list) + if num_samples_nopid > 0 and '_local' in prefixes: + for id, pid in find_potential_upgrade_conflicts(client, '_local'): + logger.warning("potentially conflicting pid value '%s' " + "in Sample %d", pid, id) + have_warning = True + else: + logger.info("no prefixes in use in sample pids") + + if have_warning: + logger.warning("there were warnings that need to be fixed " + "before upgrading to icat.server 7.0!") + else: + logger.info("no warnings, upgrading to icat.server 7.0 should succeed") + +def cfg_stats(subcmd): + help_string = "provide statistics and predict obstacles for schema upgrade" + sub_cfg = subcmd.add_subconfig("stats", + dict(help=help_string), + func=cmd_stats) + +# ============================== main ================================ + +if __name__ == '__main__': + logger.log(logging.NOTICE, + "this scripts needs to be run by a user " + "having read access to all samples!") + config = icat.config.Config(ids=False) + subcmd = config.add_subcommands() + cfg_stats(subcmd) + client, conf = config.getconfig() + client.login(conf.auth, conf.credentials) + conf.subcmd.func(client, conf) diff --git a/setup.py b/setup.py index 7ac3f96..aaa70fc 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ url = "https://github.com/icatproject-contrib/scripts", license = "Apache-2.0", scripts = [ + "scripts/check-samples.py", "scripts/check-sizes.py", "scripts/test-schema-sizes-triggers.py", "scripts/panet.py", From be64a9bc33a33da4b2be9084ac35f7c5913b6579 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 27 Jan 2026 18:33:00 +0100 Subject: [PATCH 02/12] Add 'duplicates' subcommand to check-samples script --- scripts/check-samples.py | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index 1905027..3ad7aba 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -43,6 +43,28 @@ def find_potential_upgrade_conflicts(client, prefix): if int(i) != sample.id: yield sample.id, pid +def find_duplicate_pids(client): + pid_query = Query(client, "Sample", conditions={ + "pid": "IS NOT NULL" + }, attributes=["pid"], order=["pid"], aggregate="DISTINCT") + for pid in client.searchChunked(pid_query): + count_query = Query(client, "Sample", conditions={ + "pid": "= '%s'" % pid + }, aggregate="COUNT") + if client.assertedSearch(count_query)[0] == 1: + continue + yield pid + +def sample_attr_string(sample): + attrs = [] + attrs.append("id:%d" % sample.id) + attrs.append("name:'%s'" % sample.name) + attrs.append("investigation.name:'%s'" % sample.investigation.name) + attrs.append("investigation.visitId:'%s'" % sample.investigation.visitId) + if sample.type: + attrs.append("type.name:'%s'" % sample.type.name) + return ", ".join(attrs) + # ============================= stats ================================ # The stats subcommand: provide some statistics and predict whether # there are any obstacles for the schema upgrade. @@ -103,6 +125,32 @@ def cfg_stats(subcmd): dict(help=help_string), func=cmd_stats) +# =========================== duplicates ============================= +# The duplicates subcommand: find duplicates, e.g. different samples +# having the same pid value. + +def cmd_dup(client, conf): + num_dup_pid = 0 + for pid in find_duplicate_pids(client): + num_dup_pid += 1 + query = Query(client, "Sample", conditions={ + "pid": "= '%s'" % pid + }, order=["id"], includes=["investigation", "type"]) + dup_list = "" + for sample in client.searchChunked(query): + dup_list += "\n\t%s" % sample_attr_string(sample) + logger.warning("duplicate pid '%s': %s", pid, dup_list) + if num_dup_pid: + logger.warning("%d duplicate pids found", num_dup_pid) + else: + logger.info("no duplicate pids found") + +def cfg_dup(subcmd): + help_string = "find duplicates, e.g. samples having the same pid attributes" + sub_cfg = subcmd.add_subconfig("duplicates", + dict(help=help_string), + func=cmd_dup) + # ============================== main ================================ if __name__ == '__main__': @@ -112,6 +160,7 @@ def cfg_stats(subcmd): config = icat.config.Config(ids=False) subcmd = config.add_subcommands() cfg_stats(subcmd) + cfg_dup(subcmd) client, conf = config.getconfig() client.login(conf.auth, conf.credentials) conf.subcmd.func(client, conf) From 1a15e000b4908dd1a4ba2335339334b4947e8465 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 27 Jan 2026 22:05:06 +0100 Subject: [PATCH 03/12] Rename the duplicates subcommand to lsdup --- scripts/check-samples.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index 3ad7aba..64bdab8 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -125,11 +125,11 @@ def cfg_stats(subcmd): dict(help=help_string), func=cmd_stats) -# =========================== duplicates ============================= -# The duplicates subcommand: find duplicates, e.g. different samples -# having the same pid value. +# ============================= lsdup ================================ +# The lsdup subcommand: show duplicates, e.g. different samples having +# the same pid value. -def cmd_dup(client, conf): +def cmd_lsdup(client, conf): num_dup_pid = 0 for pid in find_duplicate_pids(client): num_dup_pid += 1 @@ -145,11 +145,11 @@ def cmd_dup(client, conf): else: logger.info("no duplicate pids found") -def cfg_dup(subcmd): - help_string = "find duplicates, e.g. samples having the same pid attributes" - sub_cfg = subcmd.add_subconfig("duplicates", +def cfg_lsdup(subcmd): + help_string = "show duplicates, e.g. samples having the same pid attributes" + sub_cfg = subcmd.add_subconfig("lsdup", dict(help=help_string), - func=cmd_dup) + func=cmd_lsdup) # ============================== main ================================ @@ -160,7 +160,7 @@ def cfg_dup(subcmd): config = icat.config.Config(ids=False) subcmd = config.add_subcommands() cfg_stats(subcmd) - cfg_dup(subcmd) + cfg_lsdup(subcmd) client, conf = config.getconfig() client.login(conf.auth, conf.credentials) conf.subcmd.func(client, conf) From 303b0aafa0901a8bc3fcb1cdda3050dd5ddabafd Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 28 Jan 2026 09:56:21 +0100 Subject: [PATCH 04/12] Add helper function get_samples_by_pid() --- scripts/check-samples.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index 64bdab8..5390bac 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -30,6 +30,12 @@ def get_pid_prefixes(client): prefixes.add(p) return prefixes +def get_samples_by_pid(client, pid): + query = Query(client, "Sample", conditions={ + "pid": "= '%s'" % pid + }, order=["id"], includes="1") + return client.searchChunked(query) + def find_potential_upgrade_conflicts(client, prefix): auto_pid_re = re.compile("%s:\d+" % prefix) query = Query(client, "Sample", conditions={ @@ -133,11 +139,8 @@ def cmd_lsdup(client, conf): num_dup_pid = 0 for pid in find_duplicate_pids(client): num_dup_pid += 1 - query = Query(client, "Sample", conditions={ - "pid": "= '%s'" % pid - }, order=["id"], includes=["investigation", "type"]) dup_list = "" - for sample in client.searchChunked(query): + for sample in get_samples_by_pid(client, pid): dup_list += "\n\t%s" % sample_attr_string(sample) logger.warning("duplicate pid '%s': %s", pid, dup_list) if num_dup_pid: From 9de6446464d29be4109271fccefecbe45336bb5c Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 28 Jan 2026 11:52:15 +0100 Subject: [PATCH 05/12] Add 'setpids' subcommand to check-samples script --- scripts/check-samples.py | 61 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index 5390bac..0eb69f7 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -8,6 +8,7 @@ """ import logging +import math import re import icat import icat.config @@ -36,6 +37,17 @@ def get_samples_by_pid(client, pid): }, order=["id"], includes="1") return client.searchChunked(query) +def get_max_sample_id(client): + query = Query(client, "Sample", + attributes=["id"], order=[("id", "DESC")], limit=(0, 1)) + try: + return client.assertedSearch(query)[0] + except icat.SearchAssertionError as exc: + if exc.num == 0: + return 1 + else: + raise + def find_potential_upgrade_conflicts(client, prefix): auto_pid_re = re.compile("%s:\d+" % prefix) query = Query(client, "Sample", conditions={ @@ -154,6 +166,54 @@ def cfg_lsdup(subcmd): dict(help=help_string), func=cmd_lsdup) +# ============================ setpids =============================== +# The setpids subcommand: populate the pid attribute for all samples +# having it not set. + +def cmd_setpids(client, conf): + have_warning = False + for id, pid in find_potential_upgrade_conflicts(client, conf.prefix): + logger.warning("potentially conflicting pid value '%s' " + "in Sample %d", pid, id) + have_warning = True + if have_warning: + if conf.force: + logger.warning("potential conflicts detected, " + "proceeding anyway with force") + else: + logger.warning("potential conflicts detected, " + "won't proceed without force") + return + num_digits = math.ceil(math.log10(get_max_sample_id(client)))+1 + # Note: we can't use client.searchChunked() here, because we are + # changing the result set in the body of the loop. + query = Query(client, "Sample", conditions={ + "pid": "IS NULL" + }, includes="1", limit=(0, 100)) + count = 0 + while True: + samples = client.search(query) + for sample in samples: + sample.pid = "%s:%0*d" % (conf.prefix, num_digits, sample.id) + sample.update() + count += 1 + if len(samples) < 100: + break + logger.info("%d pid attributes set", count) + +def cfg_setpids(subcmd): + help_string = "populate the pid attribute for all samples having it not set" + sub_cfg = subcmd.add_subconfig("setpids", + dict(help=help_string), + func=cmd_setpids) + sub_cfg.add_variable('prefix', ("--prefix",), + dict(help="prefix to use in the dummy pid values"), + default="_local") + sub_cfg.add_variable('force', ("--force",), + dict(help="do it even if there is the risk of " + "creating new conflicts"), + default=False, type=icat.config.flag) + # ============================== main ================================ if __name__ == '__main__': @@ -164,6 +224,7 @@ def cfg_lsdup(subcmd): subcmd = config.add_subcommands() cfg_stats(subcmd) cfg_lsdup(subcmd) + cfg_setpids(subcmd) client, conf = config.getconfig() client.login(conf.auth, conf.credentials) conf.subcmd.func(client, conf) From d730a93c21a8388ae3dd1afb9f78cf2924e4d7ea Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 28 Jan 2026 12:21:21 +0100 Subject: [PATCH 06/12] Fixup 3a11c32: should use a raw string literal for a regexp pattern --- scripts/check-samples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index 0eb69f7..b49a3b7 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -49,7 +49,7 @@ def get_max_sample_id(client): raise def find_potential_upgrade_conflicts(client, prefix): - auto_pid_re = re.compile("%s:\d+" % prefix) + auto_pid_re = re.compile(r"%s:\d+" % prefix) query = Query(client, "Sample", conditions={ "pid": "LIKE '%s:%%'" % prefix }) From 59a8ca67eeefe8c73a8718a7ab659705893000af Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 28 Jan 2026 13:21:47 +0100 Subject: [PATCH 07/12] Add a helper function searchChunkedNoSkip() --- scripts/check-samples.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index b49a3b7..e2d07b3 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -21,6 +21,22 @@ # ============================= helper =============================== +def searchChunkedNoSkip(client, query, chunksize=100): + """A variant of client.searchChunked() that does not skip. + + To be used in cases where the body of the loop modifies the result + set in a way that the treated objects do not match the search + criterion any more. + """ + query = query.copy() + query.setLimit((0, chunksize)) + while True: + items = client.search(query) + for item in items: + yield item + if len(items) < chunksize: + break + def get_pid_prefixes(client): prefixes = set() query = Query(client, "Sample", conditions={ @@ -185,20 +201,14 @@ def cmd_setpids(client, conf): "won't proceed without force") return num_digits = math.ceil(math.log10(get_max_sample_id(client)))+1 - # Note: we can't use client.searchChunked() here, because we are - # changing the result set in the body of the loop. query = Query(client, "Sample", conditions={ "pid": "IS NULL" - }, includes="1", limit=(0, 100)) + }, includes="1") count = 0 - while True: - samples = client.search(query) - for sample in samples: - sample.pid = "%s:%0*d" % (conf.prefix, num_digits, sample.id) - sample.update() - count += 1 - if len(samples) < 100: - break + for sample in searchChunkedNoSkip(client, query): + sample.pid = "%s:%0*d" % (conf.prefix, num_digits, sample.id) + sample.update() + count += 1 logger.info("%d pid attributes set", count) def cfg_setpids(subcmd): From ffdf0d8605ebd36b6c96961f990dd7c4bbde6e41 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 28 Jan 2026 14:01:51 +0100 Subject: [PATCH 08/12] Add 'dedup' subcommand to check-samples script --- scripts/check-samples.py | 53 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index e2d07b3..a4bb51d 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -77,6 +77,16 @@ def find_potential_upgrade_conflicts(client, prefix): if int(i) != sample.id: yield sample.id, pid +def find_potential_dedup_conflicts(client, pid): + auto_pid_re = re.compile(r"%s/dedup-\d+" % pid) + query = Query(client, "Sample", conditions={ + "pid": "LIKE '%s/%%'" % pid + }) + for sample in client.searchChunked(query): + if not auto_pid_re.fullmatch(str(sample.pid)): + continue + yield sample.id, sample.pid + def find_duplicate_pids(client): pid_query = Query(client, "Sample", conditions={ "pid": "IS NOT NULL" @@ -224,6 +234,48 @@ def cfg_setpids(subcmd): "creating new conflicts"), default=False, type=icat.config.flag) +# ============================= dedup ================================ +# The dedup subcommand: deduplicate pid values. + +def cmd_dedup(client, conf): + num_dedup_pid = 0 + for pid in list(find_duplicate_pids(client)): + have_warning = False + for id, pid2 in find_potential_dedup_conflicts(client, pid): + logger.warning("potentially conflicting pid value '%s' " + "in Sample %d", pid2, id) + have_warning = True + if have_warning: + if conf.force: + logger.warning("potential conflicts detected, " + "proceeding with dedup '%s' anyway with force", + pid) + else: + logger.warning("potential conflicts detected, " + "won't proceed with dedup '%s' without force", + pid) + continue + count = 0 + query = Query(client, "Sample", conditions={ + "pid": "= '%s'" % pid + }, order=["id"], includes="1") + for sample in searchChunkedNoSkip(client, query): + sample.pid = "%s/dedup-%03d" % (pid, count) + sample.update() + count += 1 + num_dedup_pid += 1 + logger.info("%d pid values deduplicated", num_dedup_pid) + +def cfg_dedup(subcmd): + help_string = "deduplicate pid values" + sub_cfg = subcmd.add_subconfig("dedup", + dict(help=help_string), + func=cmd_dedup) + sub_cfg.add_variable('force', ("--force",), + dict(help="do it even if there is the risk of " + "creating new conflicts"), + default=False, type=icat.config.flag) + # ============================== main ================================ if __name__ == '__main__': @@ -235,6 +287,7 @@ def cfg_setpids(subcmd): cfg_stats(subcmd) cfg_lsdup(subcmd) cfg_setpids(subcmd) + cfg_dedup(subcmd) client, conf = config.getconfig() client.login(conf.auth, conf.credentials) conf.subcmd.func(client, conf) From 01d83696423f883dd4a821a5f48092e87317e63b Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 28 Jan 2026 15:43:00 +0100 Subject: [PATCH 09/12] Document the check-samples in the docstring --- scripts/check-samples.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index a4bb51d..843b163 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -5,6 +5,35 @@ Samples in an ICAT server. It is mostly aimed at assisting an upgrade of icat.server to version 7.0 which requires the Sample.pid attribute to be populated with unique non-null values. + +The script implements the following subcommands: + +stats + Display some statistics and provide an indication whether there + are any obstacles for the upgrade to icat.server 7.0. + +lsdup + List all non-unique Sample.pid values along with the corresponding + samples. + +setpids + Populate the pid attribute for all samples having it not set. The + values are of the form ":" which is guaranteed to be + unique unless there are any existing samples using the same + prefix. These values are considered to be placeholders that may + be replaced by something more sensible later on. The default + prefix is "_local", but this can be changed on the command line. + +dedup + Deduplicate existing pid values in samples, e.g. change them to + make the unique. This is done by appending a suffix: the value + "" will be changed to "/dedup-" with some + incermental number . + +For the subcommands that set new pid values (setpids and dedup), the +script checks whether there are any existing pid values that could +potentially conflict with the new values to be set before making any +changes. In this case, the change will not be applied unless forced. """ import logging From b00f1483f76e83c65375be6741379cfabc9fb089 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 28 Jan 2026 15:53:57 +0100 Subject: [PATCH 10/12] Fix docstring typo --- scripts/check-samples.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index 843b163..f8794c9 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -25,15 +25,16 @@ prefix is "_local", but this can be changed on the command line. dedup - Deduplicate existing pid values in samples, e.g. change them to - make the unique. This is done by appending a suffix: the value - "" will be changed to "/dedup-" with some - incermental number . + Deduplicate existing pid values in samples, e.g. change them to be + unique. This is done by appending a suffix: the value "" + will be changed to "/dedup-" with some incermental + number . For the subcommands that set new pid values (setpids and dedup), the script checks whether there are any existing pid values that could potentially conflict with the new values to be set before making any changes. In this case, the change will not be applied unless forced. + """ import logging From d2e65f9d0552699c28d444e596a719d19666fff7 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 29 Jan 2026 11:31:02 +0100 Subject: [PATCH 11/12] Add a note on requires permissions --- scripts/check-samples.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index f8794c9..3339c3e 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -27,7 +27,7 @@ dedup Deduplicate existing pid values in samples, e.g. change them to be unique. This is done by appending a suffix: the value "" - will be changed to "/dedup-" with some incermental + will be changed to "/dedup-" with some incremental number . For the subcommands that set new pid values (setpids and dedup), the @@ -35,6 +35,15 @@ potentially conflict with the new values to be set before making any changes. In this case, the change will not be applied unless forced. +The script needs to be run by a user having read access to all +samples. While this sounds like stating the obvious, it is important +to mention here, because obviously, the script can not point on issues +if it is not allowed to see them. And the script has no way to detect +whether there are more samples than it is allowed to see. So you +won't get any sort of a warning if the script can't see all the +samples. Furthermore, for the setpids and dedup subcommands, the user +running the scripts needs the corresponding update permissions. + """ import logging From 06af4435510246d2f2e2a645b5c5a067876ae3d2 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 29 Jan 2026 11:48:40 +0100 Subject: [PATCH 12/12] Use at least three digits in the pids in the setpids subcommand --- scripts/check-samples.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check-samples.py b/scripts/check-samples.py index 3339c3e..1ed0eb6 100644 --- a/scripts/check-samples.py +++ b/scripts/check-samples.py @@ -249,7 +249,7 @@ def cmd_setpids(client, conf): logger.warning("potential conflicts detected, " "won't proceed without force") return - num_digits = math.ceil(math.log10(get_max_sample_id(client)))+1 + num_digits = max(math.ceil(math.log10(get_max_sample_id(client)))+1, 3) query = Query(client, "Sample", conditions={ "pid": "IS NULL" }, includes="1")