From 2c76263d1c87deee8dc1709df57adb9d3753d1a8 Mon Sep 17 00:00:00 2001 From: William Swanson Date: Thu, 5 Jun 2025 10:52:02 -0500 Subject: [PATCH 01/13] WIP commit for COmanage mass CO Person creation / modification script. --- .gitignore | 1 + comanage_person_schema_utils.py | 189 ++++++++++++++++++++++ comanage_utils.py | 30 ++++ mass_person_create_modify.py | 278 ++++++++++++++++++++++++++++++++ 4 files changed, 498 insertions(+) create mode 100644 comanage_person_schema_utils.py create mode 100644 mass_person_create_modify.py diff --git a/.gitignore b/.gitignore index ba698c7..8129ef9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .vscode/launch.json +__pycache__/* \ No newline at end of file diff --git a/comanage_person_schema_utils.py b/comanage_person_schema_utils.py new file mode 100644 index 0000000..e2558fa --- /dev/null +++ b/comanage_person_schema_utils.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 + +CO_PERSON = {"co_id": None, "timezone": None, "dateofbirth": None, "status": None} + +IDENTIFIER = { + "identifier": None, + "type": None, + "login": False, + "status": None, +} + +NAME = { + "honorific": None, + "given": "None", + "middle": None, + "family": "None", + "suffix": None, + "type": "official", + "language": None, + "primary_name": True, +} + + +GROUP = { + "co_group_id": None, + "member": True, + "owner": False, +} + + +COU = { + "co_id": None, + "name": None, + "description": None, + "parent_id": None, +} + + +ORG_IDENTITY = { + "co_id": None, + "title": None, + # Organization for OrgID + "o": None, + # Department for OrgID + "ou": None, + "valid_from": None, + "valid_through": None, + "status": "", + "affiliation": None, + "date_of_birth": None, + "Address": [], + "AdHocAttribute": [], + "EmailAddress": [], + "Identifier": [], + "Name": [], + "TelephoneNumber": [], + "Url": [], +} + + +EMAIL_ADDRESS = { + "mail": None, + "type": None, + "verified": None, +} + + +UNIX_CLUSTER_ACCOUNT = { + "sync_mode": "F", + "status": None, + "username": None, + "uid": None, + "gecos": None, + "login_shell": "/bin/bash", + "home_directory": None, + "primary_co_group_id": None, + "valid_from": None, + "valid_through": None, + "unix_cluster_id": None, +} + + +SSHKEY = { + "type": None, + "skey": None, + "comment": "", + "ssh_key_authenticator_id": None, +} + + +def co_person_schema(co_id, timezone=None, dob=None, status="Active"): + person_data = CO_PERSON.copy() + person_data["co_id"] = co_id + person_data["timezone"] = timezone + person_data["dateofbirth"] = dob + person_data["status"] = status + return person_data + + +def co_person_identifier(identifier, type, login=False, status="Active"): + identifier_data = IDENTIFIER.copy() + identifier_data["identifier"] = identifier + identifier_data["type"] = type + identifier_data["login"] = login + identifier_data["status"] = status + return identifier_data + + +def co_person_name(given, family=None, middle=None, type="official", primary=False): + name_data = NAME.copy() + name_data["given"] = given + name_data["family"] = family + name_data["middle"] = middle + name_data["type"] = type + name_data["primary_name"] = primary + return name_data + + +def name_split(whole_name): + name_sections = str(whole_name).split() + parts_count = len(name_sections) + if parts_count == 1 or parts_count > 3: + return co_person_name(whole_name) + elif parts_count == 2: + return co_person_name(name_sections[0], name_sections[1]) + else: + return co_person_name(co_person_name(name_sections[0], name_sections[2], name_sections[1])) + + +def co_person_group_member(group_id, member=True, owner=False): + group_member = GROUP.copy() + group_member["co_group_id"] = group_id + group_member["member"] = member + group_member["owner"] = owner + return group_member + + +def co_person_org_id( + osg_co_id, name, organization="", department="", title="", affiliation="member", id_list=[] +): + #org_id = {"co_id" : osg_co_id} + org_id = ORG_IDENTITY.copy() + org_id["co_id"] = osg_co_id + org_id["title"] = title + org_id["o"] = organization + org_id["ou"] = department + org_id["affiliation"] = affiliation + org_id["Identifier"] = id_list + org_id["Name"] = name + return org_id + + +def co_person_email_address(mail, type="delivery", verified=False): + email = EMAIL_ADDRESS.copy() + email["mail"] = mail + email["type"] = type + email["verified"] = verified + return email + + +def co_person_unix_cluster_acc(unix_cluster_id, username, uid, name, group_id, status="A"): + uca = UNIX_CLUSTER_ACCOUNT.copy() + uca["unix_cluster_id"] = unix_cluster_id + uca["username"] = username + uca["uid"] = uid + uca["status"] = status + uca["gecos"] = name + uca["home_directory"] = f"/home/{username}" + uca["primary_co_group_id"] = group_id + return uca + + +def co_person_sshkey(type, skey, comment, auth_id): + sshkey_data = SSHKEY.copy() + sshkey_data["type"] = type + sshkey_data["skey"] = skey + sshkey_data["comment"] = comment + sshkey_data["ssh_key_authenticator_id"] = auth_id + return sshkey_data + + +# def merge_schema(base_data, new_data, type): +# temp = base_data +# for field in type.keys(): +# for entry in new_data[field]: +# if "meta" in entry: +# +# +# return temp diff --git a/comanage_utils.py b/comanage_utils.py index 238b0ec..8b14d2a 100644 --- a/comanage_utils.py +++ b/comanage_utils.py @@ -99,6 +99,7 @@ def call_api3(method, target, data, endpoint, authstr, **kw): resp = urllib.request.urlopen(req, timeout=current_timeout) # exception catching, mainly for request timeouts, "Service Temporarily Unavailable" (Rate limiting), and DNS failures. except urllib.error.URLError as exception: + print(exception) req_attempts += 1 if req_attempts >= MAX_ATTEMPTS: raise URLRequestError( @@ -140,6 +141,18 @@ def get_co_group(gid, endpoint, authstr): return grouplist[0] +def core_api_co_person_read(identifier, coid, endpoint, authstr): + return call_api(f"api/co/{coid}/core/v1/people/{identifier}", endpoint, authstr) + + +def core_api_co_person_create(data, coid, endpoint, authstr): + return call_api3(POST, f"api/co/{coid}/core/v1/people/", data, endpoint, authstr) + + +def core_api_co_person_update(identifier, coid, data, endpoint, authstr): + return call_api3(PUT, f"api/co/{coid}/core/v1/people/{identifier}", data, endpoint, authstr) + + def get_identifier(id_, endpoint, authstr): resp_data = call_api("identifiers/%s.json" % id_, endpoint, authstr) idfs = get_datalist(resp_data, "Identifiers") @@ -190,6 +203,23 @@ def identifier_matches(id_list, id_type, regex_string): return (value is not None) and (pattern.match(value) is not None) +def create_co_group(groupname, description, coId, endpoint, authstr, open=False,): + group_info = { + "Version" : "1.0", + "CoId" : coId, + "Name" : groupname, + "Description" : description, + "Open" : open, + "Status" : "Active", + } + data = { + "CoGroups" : [group_info], + "RequestType" : "CoGroups", + "Version" : "1.0" + } + return call_api3(POST, "co_groups/.json", data, endpoint, authstr) + + def rename_co_group(gid, group, newname, endpoint, authstr): # minimal edit CoGroup Request includes Name+CoId+Status+Version new_group_info = { diff --git a/mass_person_create_modify.py b/mass_person_create_modify.py new file mode 100644 index 0000000..b195ce7 --- /dev/null +++ b/mass_person_create_modify.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 + +import os +import re +import sys +import json +import getopt +import collections +import comanage_utils as utils +import comanage_person_schema_utils as schema_utils + +SCRIPT = os.path.basename(__file__) +ENDPOINT = "https://registry.cilogon.org/registry/" +OSG_CO_ID = 7 +CMS_GROUP_ID = 4622 + +_usage = f"""\ +usage: [PASS=...] {SCRIPT} [OPTIONS] +""" + +def usage(msg=None): + if msg: + print(msg + "\n", file=sys.stderr) + + print(_usage, file=sys.stderr) + sys.exit() + +class Options: + endpoint = ENDPOINT + user = "co_7.project_script" + osg_co_id = OSG_CO_ID + authstr = None + input_file = None + mapping_file = None + ssh_key_authenticator = 5 + unix_cluster_id = 5 + + +options = Options() + +def parse_options(args): + try: + ops, args = getopt.getopt(args, 'u:c:d:f:e:i:m:h') + except getopt.GetoptError: + usage() + + if args: + usage("Extra arguments: %s" % repr(args)) + + passfd = None + passfile = None + + for op, arg in ops: + if op == '-h': usage() + if op == '-u': options.user = arg + if op == '-c': options.osg_co_id = int(arg) + if op == '-d': passfd = int(arg) + if op == '-f': passfile = arg + if op == '-e': options.endpoint = arg + if op == '-i': options.input_file = arg + if op == '-m': options.mapping_file = arg + + try: + user, passwd = utils.getpw(options.user, passfd, passfile) + options.authstr = utils.mkauthstr(user, passwd) + except PermissionError: + usage("PASS required") + + +def read_data_dump(): + data_json = [] + with open(options.input_file, 'r', encoding='utf-8') as input_file: + data_json = json.load(input_file) + for entry in range(len(data_json)): + for key_index in range(len(data_json[entry]["public_keys"])): + key = data_json[entry]["public_keys"][key_index] + key_sections = str(key).split() + if len(key_sections) >= 2: + data_json[entry]["public_keys"][key_index] = {"type" : key_sections[0], "pkey" : key_sections[1]} + if len(key_sections) >= 3: + data_json[entry]["public_keys"][key_index].update({"authenticator" : key_sections[2]}) + with open(options.mapping_file, 'r', encoding='utf-8') as mapping_file: + mapping_json = json.load(mapping_file) + return data_json + + +def build_co_person_record(entry): + record = {} + record.update({"CoPerson" : schema_utils.co_person_schema(options.osg_co_id, status="A")}) + + names = [] + names.append(schema_utils.name_split(entry["name"])) + record.update({"Name" : names}) + + identifiers = [] + + # CMS Username + identifiers.append(schema_utils.co_person_identifier(entry["username"], "cmsuser", status="A")) + # CMS UID + identifiers.append(schema_utils.co_person_identifier(entry["uid"], "cmsuid", status="A")) + #globus id + identifiers.append(schema_utils.co_person_identifier(entry["globus_id"], "cmsglobusid", status="A")) + #cilogon id + identifiers.append(schema_utils.co_person_identifier(entry["cilogon_id"], "oidcsub", status="A")) + + record.update({"Identifier" : identifiers }) + + group_memberships = [] + group_memberships.append(schema_utils.co_person_group_member(CMS_GROUP_ID)) + + # Group Memberships + record.update({"CoGroupMember" : group_memberships }) + + emails = [] + + emails.append(schema_utils.co_person_email_address(entry["email"])) + record.update({"EmailAddress" : emails}) + + org_ids = [] + + org_ids.append(schema_utils.co_person_org_id(options.osg_co_id, names, entry["institution"], id_list=identifiers)) + record.update({"OrgIdentity" : org_ids}) + + ssh_keys = [] + + for ssh_key in entry["public_keys"]: + key_type = ssh_key["type"] + public_key = ssh_key["pkey"] + comment = "" + if "authenticator" in dict(ssh_key): + comment = f"SSH Key for {ssh_key['authenticator']}" + auth_id = options.ssh_key_authenticator + ssh_keys.append(schema_utils.co_person_sshkey(type=key_type, skey=public_key, comment=comment, auth_id=auth_id)) + record.update({"SshKey" : ssh_keys}) + + return record + + +def add_unix_cluster_group(co_person_record): + identifiers_list = co_person_record["Identifier"] + username = next((item for item in identifiers_list if item["type"] == "osguser")) + uid = next((item for item in identifiers_list if item["type"] == "uid")) + description = f"Unix Cluster Group for {username}" + result = utils.create_co_group(username, description, options.osg_co_id, options.endpoint, options.authstr) + ucg = None + if not (result is None) and ("ResponseType" in result) and (result["ResponseType"] == "NewObject"): + group_id = result["Id"] + utils.add_identifier_to_group(group_id, "osggid", uid, options.endpoint, options.authstr) + utils.add_identifier_to_group(group_id, "osggroup", username, options.endpoint, options.authstr) + ucg = utils.add_unix_cluster_group(group_id, options.unix_cluster_id, options.endpoint, options.authstr) + #TODO throw catch on new group creation + if not (ucg is None) and ("ResponseType" in ucg) and (ucg["ResponseType"] == "NewObject"): + return ucg["Id"] + else: + raise ValueError(f"Failed to create CO Group for Unix Cluster Group, results were: {result} and {ucg}") + + +def add_unix_cluster_account(co_person_record): + identifiers_list = co_person_record["Identifier"] + names_list = co_person_record["Name"] + groups_list =co_person_record["CoGroupMember"] + username = next((item for item in identifiers_list if item["type"] == "osguser")) + uid = next((item for item in identifiers_list if item["type"] == "uid")) + name = next((item for item in names_list if item["primary_name"] == True)) + default_group_id = -1 + for membership in groups_list: + gid = membership["co_group_id"] + co_group_info = utils.get_co_group(gid, options.endpoint, options.authstr) + group_name = co_group_info["Name"] + if group_name == username: + default_group_id = gid + break + if default_group_id != -1: + uca = schema_utils.co_person_unix_cluster_acc(options.unix_cluster_id, username, uid, name, default_group_id) + print(uca) + if "UnixClusterAccount" in co_person_record: + co_person_record.update({"UnixClusterAccount" : co_person_record["UnixClusterAccount"].append(uca)}) + else: + co_person_record.update({"UnixClusterAccount" : [uca]}) + return co_person_record + + +def main(args): + parse_options(args) + + co_person_records = [] + + data_dump_json = read_data_dump() + for entry in data_dump_json: + co_person_records.append(build_co_person_record(entry)) + + print(co_person_records[0]) + print(len(co_person_records)) + + results = utils.core_api_co_person_create(data=co_person_records[0], coid=options.osg_co_id, endpoint=options.endpoint, authstr=options.authstr) + + print(results) + + co_person_read = utils.core_api_co_person_read("abbashassani", options.osg_co_id, options.endpoint, options.authstr) + + print(co_person_read) + + print(add_unix_cluster_account(co_person_read)) + # Read in dump to build / update users from + # select which field of the dump co-responds to the identifier we'll use to index the corresponding CO Person + # mapping file from dump attributes to COmanage object types, so we know what each dump attribute should become + + # Loop over entries in dump + # try a CO Person Read based on specified field + # if Read succeeds, we're doing an update + # else if we fail due to the record not existing, we're doing a create + # else, something went wrong and we should log about it + + # Needs: + # Create base CO Person record with at least enough information to read them at a later date + # Read CO Person via index identifier + # Write CO Person with specified full schema + # "matching identifier/key/name already exists in CO Person entry?" function + # + # Data -> Write schema functions, with argument for existing field to merge data in from, for + # Base CO Person + # Identifiers + # Email + # Name + # SSHKey + # Org ID + # CO Person Role + # UNIX Cluster Account + # GroupMembership (?) + + # Schema: + # CoPerson + # { + # 'meta': {'id': , 'created': , 'modified': , 'co_person_id': , 'revision': , 'deleted': , 'actor_identifier': }, + # 'co_id': <8 for test / 7 for prod>, + # 'status': 'A', + # 'timezone': None, + # 'date_of_birth': None + # } + # Identifier + # { + # + # } + # CoGroupMember + # Array of { + # 'meta': {'id': 9119, 'source_org_identity_id': None, 'created': '2025-05-14 21:47:15', 'modified': '2025-05-14 21:47:15', 'co_group_member_id': None, 'revision': 0, 'deleted': False, 'actor_identifier': 'co_8.william_test'}, + # 'co_group_id': 101, + # 'member': True, + # 'owner': False, + # 'valid_from': None, + # 'valid_through': None, + # 'co_group_nesting_id': None + # } + # Name + # Array of { + # 'honorific': None, + # 'given': 'TEST_PERSON', + # 'middle': None, + # 'family': 'Wanson', + # 'suffix': None, + # 'type': 'official', + # 'language': None, + # 'primary_name': True + # } + # SSHKEY + # Array of { + # 'type': '', + # 'skey': '', + # 'ssh_key_authenticator_id': 5 + # } + # NOTE: the "5" is from https://registry-test.cilogon.org/registry/ssh_key_authenticator/ssh_key_authenticators/edit/5 + + +if __name__ == "__main__": + try: + main(sys.argv[1:]) + except Exception as e: + sys.exit(e) \ No newline at end of file From e25048b34d6c1b387da1ad16456e94ab3fad9254 Mon Sep 17 00:00:00 2001 From: William Swanson Date: Thu, 12 Jun 2025 16:33:56 -0500 Subject: [PATCH 02/13] Add fail-fast api exception check for 403, 404, 405, 500 HTTP codes --- comanage_utils.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/comanage_utils.py b/comanage_utils.py index 8b14d2a..865ba8a 100644 --- a/comanage_utils.py +++ b/comanage_utils.py @@ -30,6 +30,8 @@ TIMEOUT_BASE = 5 MAX_ATTEMPTS = 5 +# HTTP return codes we shouldn't attempt to retry +HTTP_NO_RETRY_CODES = {403, 404, 405, 500} GET = "GET" PUT = "PUT" @@ -47,6 +49,16 @@ class URLRequestError(Error): pass +class HTTPRequestError(URLRequestError): + """Class for exceptions due to not being able to fulfill a HTTPRequest""" + def __init__(self, message, code): + self.message = message + self.code = code + + def __str__(self): + return self.message + + def getpw(user, passfd, passfile): if ":" in user: user, pw = user.split(":", 1) @@ -99,8 +111,15 @@ def call_api3(method, target, data, endpoint, authstr, **kw): resp = urllib.request.urlopen(req, timeout=current_timeout) # exception catching, mainly for request timeouts, "Service Temporarily Unavailable" (Rate limiting), and DNS failures. except urllib.error.URLError as exception: - print(exception) req_attempts += 1 + # If the exception was an HTTPError, with a code like 404 that won't change on a retry, fail fast + if issubclass(exception.__class__, urllib.error.HTTPError) and exception.code in HTTP_NO_RETRY_CODES: + raise HTTPRequestError( + "Exception raised due to api call error status" + + f"Exception reason: {exception}.\n Request: {req.full_url}", + code=exception.code + ) + # Since we think the expection *might* be transient, continue with retry logic if req_attempts >= MAX_ATTEMPTS: raise URLRequestError( "Exception raised after maximum number of retries reached after total backoff of " + From bca35a7722b23431cb81cc6a1ddf6ab6aa1f916c Mon Sep 17 00:00:00 2001 From: William Swanson Date: Thu, 12 Jun 2025 16:38:14 -0500 Subject: [PATCH 03/13] Fix name_split to handle >=3 sections in name, add name_unsplit --- comanage_person_schema_utils.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/comanage_person_schema_utils.py b/comanage_person_schema_utils.py index e2558fa..c26477d 100644 --- a/comanage_person_schema_utils.py +++ b/comanage_person_schema_utils.py @@ -119,12 +119,24 @@ def co_person_name(given, family=None, middle=None, type="official", primary=Fal def name_split(whole_name): name_sections = str(whole_name).split() parts_count = len(name_sections) - if parts_count == 1 or parts_count > 3: - return co_person_name(whole_name) + if parts_count == 1: + return co_person_name(given=whole_name) elif parts_count == 2: - return co_person_name(name_sections[0], name_sections[1]) + return co_person_name(given=name_sections[0], family=name_sections[1]) else: - return co_person_name(co_person_name(name_sections[0], name_sections[2], name_sections[1])) + return co_person_name(given=name_sections[0], family=name_sections[parts_count-1], middle=" ".join(name_sections[1:parts_count-1])) + + +def name_unsplit(name_id): + if name_id["given"] is None: + return "" + elif name_id["family"] is None: + return name_id["given"] + elif name_id["middle"] is None: + return f'{name_id["given"]} {name_id["family"]}' + else: + return f'{name_id["given"]} {name_id["middle"]} {name_id["family"]}' + def co_person_group_member(group_id, member=True, owner=False): From 1f080dfa94b387a052349710bbefb6f1d9212a60 Mon Sep 17 00:00:00 2001 From: William Swanson Date: Thu, 12 Jun 2025 16:39:11 -0500 Subject: [PATCH 04/13] Add schema info and builder for CO Roles --- comanage_person_schema_utils.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/comanage_person_schema_utils.py b/comanage_person_schema_utils.py index c26477d..a64469f 100644 --- a/comanage_person_schema_utils.py +++ b/comanage_person_schema_utils.py @@ -58,6 +58,20 @@ } +ROLE = { + "cou_id": None, + "title": "", + "o": "", + "ou": "", + "valid_from": None, + "valid_through": None, + "status": "A", + "sponsor_co_person_id": None, + "affiliation": "member", + "ordr": 1, +} + + EMAIL_ADDRESS = { "mail": None, "type": None, @@ -162,6 +176,14 @@ def co_person_org_id( return org_id +def co_person_role(cou, title, affiliation, order): + role = ROLE.copy() + role["cou_id"] = cou + role["title"] = title + role["affiliation"] = affiliation + role["ordr"] = order + return role + def co_person_email_address(mail, type="delivery", verified=False): email = EMAIL_ADDRESS.copy() email["mail"] = mail From 5ef73c17e7ce69d43ec065c17746d1743cfd1d0a Mon Sep 17 00:00:00 2001 From: William Swanson Date: Thu, 12 Jun 2025 16:39:32 -0500 Subject: [PATCH 05/13] Fix CO_Person schema def formatting --- comanage_person_schema_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/comanage_person_schema_utils.py b/comanage_person_schema_utils.py index a64469f..d7fba5e 100644 --- a/comanage_person_schema_utils.py +++ b/comanage_person_schema_utils.py @@ -1,6 +1,11 @@ #!/usr/bin/env python3 -CO_PERSON = {"co_id": None, "timezone": None, "dateofbirth": None, "status": None} +CO_PERSON = { + "co_id": None, + "timezone": None, + "dateofbirth": None, + "status": None + } IDENTIFIER = { "identifier": None, From 2115261027b34a593a5b7d1f0b7adebc84fbd046 Mon Sep 17 00:00:00 2001 From: William Swanson Date: Thu, 12 Jun 2025 16:51:12 -0500 Subject: [PATCH 06/13] WIP commit 2 for COmanage mass CO Person creation / modification script. --- mass_person_create_modify.py | 97 +++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 35 deletions(-) diff --git a/mass_person_create_modify.py b/mass_person_create_modify.py index b195ce7..333378a 100644 --- a/mass_person_create_modify.py +++ b/mass_person_create_modify.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 import os -import re import sys import json import getopt -import collections import comanage_utils as utils import comanage_person_schema_utils as schema_utils @@ -13,6 +11,8 @@ ENDPOINT = "https://registry.cilogon.org/registry/" OSG_CO_ID = 7 CMS_GROUP_ID = 4622 +CMS_COU_ID = 1785 +LDAP_TARGET_ID = 9 _usage = f"""\ usage: [PASS=...] {SCRIPT} [OPTIONS] @@ -33,7 +33,8 @@ class Options: input_file = None mapping_file = None ssh_key_authenticator = 5 - unix_cluster_id = 5 + unix_cluster_id = 10 + provisioning_target = LDAP_TARGET_ID options = Options() @@ -101,7 +102,10 @@ def build_co_person_record(entry): #globus id identifiers.append(schema_utils.co_person_identifier(entry["globus_id"], "cmsglobusid", status="A")) #cilogon id - identifiers.append(schema_utils.co_person_identifier(entry["cilogon_id"], "oidcsub", status="A")) + if not entry["cilogon_id"] is None: + identifiers.append(schema_utils.co_person_identifier(entry["cilogon_id"], "oidcsub", status="A")) + else: + print(f"Warning: user {entry['username']} lacks a cilogon id.") record.update({"Identifier" : identifiers }) @@ -111,6 +115,11 @@ def build_co_person_record(entry): # Group Memberships record.update({"CoGroupMember" : group_memberships }) + roles = [] + + roles.append(schema_utils.co_person_role(CMS_COU_ID, "CMS User", "member", 1)) + record.update({"CoPersonRole" : roles }) + emails = [] emails.append(schema_utils.co_person_email_address(entry["email"])) @@ -136,10 +145,10 @@ def build_co_person_record(entry): return record -def add_unix_cluster_group(co_person_record): +def create_unix_cluster_group(co_person_record): identifiers_list = co_person_record["Identifier"] - username = next((item for item in identifiers_list if item["type"] == "osguser")) - uid = next((item for item in identifiers_list if item["type"] == "uid")) + username = next((item["identifier"] for item in identifiers_list if item["type"] == "osguser")) + uid = next((item["identifier"] for item in identifiers_list if item["type"] == "uid")) description = f"Unix Cluster Group for {username}" result = utils.create_co_group(username, description, options.osg_co_id, options.endpoint, options.authstr) ucg = None @@ -148,9 +157,10 @@ def add_unix_cluster_group(co_person_record): utils.add_identifier_to_group(group_id, "osggid", uid, options.endpoint, options.authstr) utils.add_identifier_to_group(group_id, "osggroup", username, options.endpoint, options.authstr) ucg = utils.add_unix_cluster_group(group_id, options.unix_cluster_id, options.endpoint, options.authstr) + utils.provision_group(group_id, options.provisioning_target, options.endpoint, options.authstr) #TODO throw catch on new group creation if not (ucg is None) and ("ResponseType" in ucg) and (ucg["ResponseType"] == "NewObject"): - return ucg["Id"] + return(result["Id"]) else: raise ValueError(f"Failed to create CO Group for Unix Cluster Group, results were: {result} and {ucg}") @@ -158,23 +168,21 @@ def add_unix_cluster_group(co_person_record): def add_unix_cluster_account(co_person_record): identifiers_list = co_person_record["Identifier"] names_list = co_person_record["Name"] - groups_list =co_person_record["CoGroupMember"] - username = next((item for item in identifiers_list if item["type"] == "osguser")) - uid = next((item for item in identifiers_list if item["type"] == "uid")) - name = next((item for item in names_list if item["primary_name"] == True)) + username = next((item["identifier"] for item in identifiers_list if item["type"] == "osguser")) + uid = next((item["identifier"] for item in identifiers_list if item["type"] == "uid")) + name_id = next((item for item in names_list if item["primary_name"] == True)) + name = schema_utils.name_unsplit(name_id) default_group_id = -1 - for membership in groups_list: - gid = membership["co_group_id"] - co_group_info = utils.get_co_group(gid, options.endpoint, options.authstr) - group_name = co_group_info["Name"] - if group_name == username: - default_group_id = gid - break + default_group_id = create_unix_cluster_group(co_person_record) + ucg_membership = schema_utils.co_person_group_member(default_group_id) + if "CoGroupMember" in co_person_record: + co_person_record["CoGroupMember"].append(ucg_membership) + else: + co_person_record.update({"CoGroupMember" : [ucg_membership]}) if default_group_id != -1: uca = schema_utils.co_person_unix_cluster_acc(options.unix_cluster_id, username, uid, name, default_group_id) - print(uca) if "UnixClusterAccount" in co_person_record: - co_person_record.update({"UnixClusterAccount" : co_person_record["UnixClusterAccount"].append(uca)}) + co_person_record["UnixClusterAccount"].append(uca) else: co_person_record.update({"UnixClusterAccount" : [uca]}) return co_person_record @@ -183,24 +191,43 @@ def add_unix_cluster_account(co_person_record): def main(args): parse_options(args) - co_person_records = [] + co_person_records = dict() data_dump_json = read_data_dump() for entry in data_dump_json: - co_person_records.append(build_co_person_record(entry)) - - print(co_person_records[0]) - print(len(co_person_records)) - - results = utils.core_api_co_person_create(data=co_person_records[0], coid=options.osg_co_id, endpoint=options.endpoint, authstr=options.authstr) - - print(results) - - co_person_read = utils.core_api_co_person_read("abbashassani", options.osg_co_id, options.endpoint, options.authstr) - - print(co_person_read) + co_person_records.update({entry["username"] : build_co_person_record(entry)}) + + usernames = list(co_person_records.keys()) + + for user in usernames: + + try: + try: + #If the CO Person record exists, stop creating/modifying (TODO: switch to modifying existing user rather than trying to create) + if utils.core_api_co_person_read(user, options.osg_co_id, options.endpoint, options.authstr): + continue + except utils.HTTPRequestError as e: + # If the record *doesn't* exist, pass and make it. Else, some other error happened on our read, like 403 or 500 and we'll try again on another run. + if e.code == 404: + pass + else: + break + print(f"CREATING RECORDS FOR USER: {user}") + results_create = utils.core_api_co_person_create(data=co_person_records[user], coid=options.osg_co_id, endpoint=options.endpoint, authstr=options.authstr) + + co_person_data = utils.core_api_co_person_read(user, options.osg_co_id, options.endpoint, options.authstr) + + co_person_data = add_unix_cluster_account(co_person_data) + + utils.core_api_co_person_update(user, options.osg_co_id, co_person_data, options.endpoint, options.authstr) + except Exception as e: + print(f"\tException for user {user}.") + print(f"\t{e}") + if results_create: + print(f"\t{results_create}") + if co_person_data: + print(f"\t{co_person_data}") - print(add_unix_cluster_account(co_person_read)) # Read in dump to build / update users from # select which field of the dump co-responds to the identifier we'll use to index the corresponding CO Person # mapping file from dump attributes to COmanage object types, so we know what each dump attribute should become From 4afbd4c2326e9b5c86fcd85595b3ba5e0e197fa3 Mon Sep 17 00:00:00 2001 From: William Swanson Date: Fri, 13 Jun 2025 16:55:41 -0500 Subject: [PATCH 07/13] Add Unix cluster group provisioning after adding the user to the group --- mass_person_create_modify.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mass_person_create_modify.py b/mass_person_create_modify.py index 333378a..f636a8c 100644 --- a/mass_person_create_modify.py +++ b/mass_person_create_modify.py @@ -185,7 +185,7 @@ def add_unix_cluster_account(co_person_record): co_person_record["UnixClusterAccount"].append(uca) else: co_person_record.update({"UnixClusterAccount" : [uca]}) - return co_person_record + return co_person_record, default_group_id def main(args): @@ -217,9 +217,11 @@ def main(args): co_person_data = utils.core_api_co_person_read(user, options.osg_co_id, options.endpoint, options.authstr) - co_person_data = add_unix_cluster_account(co_person_data) + co_person_data, gid = add_unix_cluster_account(co_person_data) utils.core_api_co_person_update(user, options.osg_co_id, co_person_data, options.endpoint, options.authstr) + + utils.provision_group(gid, options.provisioning_target, options.endpoint, options.authstr) except Exception as e: print(f"\tException for user {user}.") print(f"\t{e}") From 67f985a1ad95ad7661093ca2ec404575b90941c7 Mon Sep 17 00:00:00 2001 From: William Swanson Date: Fri, 13 Jun 2025 16:56:16 -0500 Subject: [PATCH 08/13] Fix exception messaging for early failures --- mass_person_create_modify.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mass_person_create_modify.py b/mass_person_create_modify.py index f636a8c..4ea7fd5 100644 --- a/mass_person_create_modify.py +++ b/mass_person_create_modify.py @@ -200,6 +200,8 @@ def main(args): usernames = list(co_person_records.keys()) for user in usernames: + co_person_data = None + results_create = None try: try: From 81301461d030c8591af30a223e457a6092fa2c3c Mon Sep 17 00:00:00 2001 From: William Swanson Date: Fri, 13 Jun 2025 16:58:20 -0500 Subject: [PATCH 09/13] Retry on 403 Forbidden for rate limiting, typo in exception note --- comanage_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/comanage_utils.py b/comanage_utils.py index 865ba8a..a2684a4 100644 --- a/comanage_utils.py +++ b/comanage_utils.py @@ -31,7 +31,8 @@ MAX_ATTEMPTS = 5 # HTTP return codes we shouldn't attempt to retry -HTTP_NO_RETRY_CODES = {403, 404, 405, 500} +#HTTP_NO_RETRY_CODES = {403, 404, 405, 500} +HTTP_NO_RETRY_CODES = {404, 405, 500} GET = "GET" PUT = "PUT" @@ -119,7 +120,7 @@ def call_api3(method, target, data, endpoint, authstr, **kw): + f"Exception reason: {exception}.\n Request: {req.full_url}", code=exception.code ) - # Since we think the expection *might* be transient, continue with retry logic + # Since we think the exception *might* be transient, continue with retry logic if req_attempts >= MAX_ATTEMPTS: raise URLRequestError( "Exception raised after maximum number of retries reached after total backoff of " + From dfd61e0d74a18881d85bfe1d4044935c58511763 Mon Sep 17 00:00:00 2001 From: William Swanson Date: Thu, 13 Nov 2025 16:54:41 -0600 Subject: [PATCH 10/13] Mass import script: fixup for U-Chicago dev ingest --- comanage_utils.py | 35 +++++++++++++++++++++ mass_person_create_modify.py | 61 +++++++++++++++++++++++++----------- 2 files changed, 78 insertions(+), 18 deletions(-) diff --git a/comanage_utils.py b/comanage_utils.py index a2684a4..a53fbc0 100644 --- a/comanage_utils.py +++ b/comanage_utils.py @@ -190,10 +190,37 @@ def get_unix_cluster_groups_ids(ucid, endpoint, authstr): return set(group["CoGroupId"] for group in unix_cluster_groups["UnixClusterGroups"]) +def update_co_person_identifier(id_, type, identifier, person_id, endpoint, authstr, provisioning_target): + id_data = { + "RequestType":"Identifiers", + "Version":"1.0", + "Identifiers": + [ + { + "Version":"1.0", + "Type":type, + "Identifier":identifier, + "Login":False, + "Person":{"Type":"CO","Id":person_id}, + "CoProvisioningTargetId":provisioning_target, + "Status":"Active" + } + ] + } + return call_api3(PUT, "/api/v2/identifiers" % id_, id_data, endpoint, authstr, ) + #return call_api3(PUT, "identifiers/%s.json" % id_, id_data, endpoint, authstr) + + def delete_identifier(id_, endpoint, authstr): return call_api2(DELETE, "identifiers/%s.json" % id_, endpoint, authstr) +def get_co_group_members_pids(gid, endpoint, authstr): + resp_data = get_co_group_members(gid, endpoint, authstr) + data = get_datalist(resp_data, "CoGroupMembers") + return [m["Person"]["Id"] for m in data] + + def get_datalist(data, listname): return data[listname] if data else [] @@ -216,6 +243,14 @@ def identifier_from_list(id_list, id_type): except ValueError: return None +def full_identifier_from_list(id_list, id_type): + id_type_list = [id["Type"] for id in id_list] + try: + id_index = id_type_list.index(id_type) + return id_list[id_index] + except ValueError: + return None + def identifier_matches(id_list, id_type, regex_string): pattern = re.compile(regex_string) diff --git a/mass_person_create_modify.py b/mass_person_create_modify.py index 4ea7fd5..e7a8929 100644 --- a/mass_person_create_modify.py +++ b/mass_person_create_modify.py @@ -10,8 +10,6 @@ SCRIPT = os.path.basename(__file__) ENDPOINT = "https://registry.cilogon.org/registry/" OSG_CO_ID = 7 -CMS_GROUP_ID = 4622 -CMS_COU_ID = 1785 LDAP_TARGET_ID = 9 _usage = f"""\ @@ -32,16 +30,18 @@ class Options: authstr = None input_file = None mapping_file = None - ssh_key_authenticator = 5 - unix_cluster_id = 10 + ssh_key_authenticator = 5 # 1 + unix_cluster_id = 10 # 1 provisioning_target = LDAP_TARGET_ID + import_group_id = None + import_cou_id = None options = Options() def parse_options(args): try: - ops, args = getopt.getopt(args, 'u:c:d:f:e:i:m:h') + ops, args = getopt.getopt(args, 'u:c:d:f:e:i:m:g:o:h') except getopt.GetoptError: usage() @@ -60,6 +60,8 @@ def parse_options(args): if op == '-e': options.endpoint = arg if op == '-i': options.input_file = arg if op == '-m': options.mapping_file = arg + if op == '-g': options.import_group_id = arg + if op == '-o': options.import_cou_id = arg try: user, passwd = utils.getpw(options.user, passfd, passfile) @@ -82,10 +84,10 @@ def read_data_dump(): data_json[entry]["public_keys"][key_index].update({"authenticator" : key_sections[2]}) with open(options.mapping_file, 'r', encoding='utf-8') as mapping_file: mapping_json = json.load(mapping_file) - return data_json + return data_json, mapping_json -def build_co_person_record(entry): +def build_co_person_record(entry, mapping_json : dict): record = {} record.update({"CoPerson" : schema_utils.co_person_schema(options.osg_co_id, status="A")}) @@ -95,29 +97,37 @@ def build_co_person_record(entry): identifiers = [] - # CMS Username - identifiers.append(schema_utils.co_person_identifier(entry["username"], "cmsuser", status="A")) - # CMS UID - identifiers.append(schema_utils.co_person_identifier(entry["uid"], "cmsuid", status="A")) + # UC Connect Username + identifiers.append(schema_utils.co_person_identifier(entry["username"], "ucconnectuser", status="A")) + # UC Connect UID + identifiers.append(schema_utils.co_person_identifier(entry["uid"], "ucconnectuid", status="A")) #globus id - identifiers.append(schema_utils.co_person_identifier(entry["globus_id"], "cmsglobusid", status="A")) + identifiers.append(schema_utils.co_person_identifier(entry["globus_id"], "ucconnectglobusid", status="A")) #cilogon id - if not entry["cilogon_id"] is None: - identifiers.append(schema_utils.co_person_identifier(entry["cilogon_id"], "oidcsub", status="A")) + if not (entry["cilogon_oidc_sub"] is None or entry["cilogon_oidc_sub"] == ""): + identifiers.append(schema_utils.co_person_identifier(entry["cilogon_oidc_sub"], "oidcsub", status="A")) else: print(f"Warning: user {entry['username']} lacks a cilogon id.") + # With our current LDAP Provisioner configuration, the CO Person still needs an oidcsub identifier, even if it's junk + identifiers.append(schema_utils.co_person_identifier(f"dummy-text-for-provisioning-{entry['username']}", "oidcsub", status="A", login=False)) record.update({"Identifier" : identifiers }) group_memberships = [] - group_memberships.append(schema_utils.co_person_group_member(CMS_GROUP_ID)) + group_memberships.append(schema_utils.co_person_group_member(options.import_group_id)) + + for user_membership in entry["groups"]: + if not mapping_json.get(user_membership) is None: + group_memberships.append(schema_utils.co_person_group_member(mapping_json.get(user_membership))) + else: + print(f"Warning: could not find group id for group {user_membership}, user {entry['username']}.") # Group Memberships record.update({"CoGroupMember" : group_memberships }) roles = [] - roles.append(schema_utils.co_person_role(CMS_COU_ID, "CMS User", "member", 1)) + roles.append(schema_utils.co_person_role(options.import_cou_id, "UC Connect User", "member", 1)) record.update({"CoPersonRole" : roles }) emails = [] @@ -144,6 +154,15 @@ def build_co_person_record(entry): return record +def fix_username(co_person_record, new_username): + record = co_person_record + + for identifier in co_person_record["Identifier"]: + if identifier["type"] == "osguser": + identifier["identifier"] = new_username + + return record + def create_unix_cluster_group(co_person_record): identifiers_list = co_person_record["Identifier"] @@ -193,9 +212,12 @@ def main(args): co_person_records = dict() - data_dump_json = read_data_dump() + data_dump_json, mapping_json = read_data_dump() + + #data_dump_json = [data_dump_json[0], data_dump_json[1], data_dump_json[2], data_dump_json[3], data_dump_json[8], data_dump_json[9]] + for entry in data_dump_json: - co_person_records.update({entry["username"] : build_co_person_record(entry)}) + co_person_records.update({entry["username"] : build_co_person_record(entry, mapping_json)}) usernames = list(co_person_records.keys()) @@ -219,6 +241,9 @@ def main(args): co_person_data = utils.core_api_co_person_read(user, options.osg_co_id, options.endpoint, options.authstr) + co_person_data = fix_username(co_person_data, user) + utils.core_api_co_person_update(user, options.osg_co_id, co_person_data, options.endpoint, options.authstr) + co_person_data, gid = add_unix_cluster_account(co_person_data) utils.core_api_co_person_update(user, options.osg_co_id, co_person_data, options.endpoint, options.authstr) From 1a68819d007038e84c06adb7606a47ed567da8f3 Mon Sep 17 00:00:00 2001 From: William Swanson Date: Fri, 30 Jan 2026 13:41:59 -0600 Subject: [PATCH 11/13] COmanage Mass import script: use production values for target, plugin ids --- mass_person_create_modify.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mass_person_create_modify.py b/mass_person_create_modify.py index e7a8929..544fdba 100644 --- a/mass_person_create_modify.py +++ b/mass_person_create_modify.py @@ -10,7 +10,7 @@ SCRIPT = os.path.basename(__file__) ENDPOINT = "https://registry.cilogon.org/registry/" OSG_CO_ID = 7 -LDAP_TARGET_ID = 9 +LDAP_TARGET_ID = 6 _usage = f"""\ usage: [PASS=...] {SCRIPT} [OPTIONS] @@ -30,8 +30,8 @@ class Options: authstr = None input_file = None mapping_file = None - ssh_key_authenticator = 5 # 1 - unix_cluster_id = 10 # 1 + ssh_key_authenticator = 1 + unix_cluster_id = 1 provisioning_target = LDAP_TARGET_ID import_group_id = None import_cou_id = None From 8f77623ae150d1947b3a3a7bf36175574f171d2b Mon Sep 17 00:00:00 2001 From: William Swanson Date: Fri, 30 Jan 2026 13:43:05 -0600 Subject: [PATCH 12/13] COmanage utils: add 401 to error codes to not retry Also remove old commented out version --- comanage_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/comanage_utils.py b/comanage_utils.py index a53fbc0..9d5e5b1 100644 --- a/comanage_utils.py +++ b/comanage_utils.py @@ -31,8 +31,7 @@ MAX_ATTEMPTS = 5 # HTTP return codes we shouldn't attempt to retry -#HTTP_NO_RETRY_CODES = {403, 404, 405, 500} -HTTP_NO_RETRY_CODES = {404, 405, 500} +HTTP_NO_RETRY_CODES = {401, 404, 405, 500} GET = "GET" PUT = "PUT" From 2edda0b229ef7ee51f900ba70da4def1dee8574d Mon Sep 17 00:00:00 2001 From: William Swanson Date: Fri, 30 Jan 2026 13:44:24 -0600 Subject: [PATCH 13/13] COmanage mass import script: add additional comments for where to find COmanage schema add EOF newline --- mass_person_create_modify.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mass_person_create_modify.py b/mass_person_create_modify.py index 544fdba..fe71ebd 100644 --- a/mass_person_create_modify.py +++ b/mass_person_create_modify.py @@ -284,7 +284,9 @@ def main(args): # UNIX Cluster Account # GroupMembership (?) - # Schema: + # Some Schema Info: + # See comanage_person_schema_utils.py and COmanage docs on co person schema and associated record types + # https://spaces.at.internet2.edu/spaces/COmanage/pages/25859280/cm_co_people # CoPerson # { # 'meta': {'id': , 'created': , 'modified': , 'co_person_id': , 'revision': , 'deleted': , 'actor_identifier': }, @@ -295,7 +297,10 @@ def main(args): # } # Identifier # { - # + # 'meta': {'id': , 'created': , 'modified': , 'co_person_id': , 'revision': , 'deleted': , 'actor_identifier': }, + #"identifier": None, + #"type": None, + #"login": False, # } # CoGroupMember # Array of { @@ -331,4 +336,4 @@ def main(args): try: main(sys.argv[1:]) except Exception as e: - sys.exit(e) \ No newline at end of file + sys.exit(e)