diff --git a/get_collection_storagesize.py b/get_collection_storagesize.py index cd0b6bc..610cf40 100755 --- a/get_collection_storagesize.py +++ b/get_collection_storagesize.py @@ -10,6 +10,7 @@ parser.add_argument('-c', '--collection', help='Sum the size of datasets in the given Dataverse collection.') parser.add_argument('-a', '--all_collections', help='Report the sizes of all collections in the given Dataverse.', action='store_true') parser.add_argument('-t', '--api_token', help='API token of an admin user.') +parser.add_argument('-r', '--root_collection', default='root', help='Specify the name of the root collection.') args = parser.parse_args() if (args.collection is None) and (args.all_collections is False): @@ -25,12 +26,13 @@ collection = args.collection all = args.all_collections token = args.api_token +root_coll = args.root_collection def get_size(dataverse,collection,token): # throws I/O errors in TRSA case #dvurl = dataverse + '/api/dataverses/' + collection + '/storagesize?includeCached=true&key=' + token dvurl = dataverse + '/api/dataverses/' + collection + '/storagesize?key=' + token - r = requests.get(dvurl) + r = requests.get(dvurl) j = r.json() # strip out "size of this ... bytes" error = "Couldn't get storagesize for collection: " + collection @@ -77,15 +79,16 @@ def format_size(byte_size): readablesize = format_size(size) dvfilecount = get_filecount(dataverse,collection,token) print(collection + ': ' + str(size) + ' bytes' + ' (' + readablesize + '), ' + str(dvfilecount) + ' files.') - + else: - # start with the root dataverse - collection = 'root' - size = get_size(dataverse,collection,token) - dvfilecount = get_filecount(dataverse,collection,token) - print(collection + ': ' + str(size) + ' bytes, ' + str(dvfilecount) + ' files.') - # now iterate through sub-collections - instanceurl = dataverse + '/api/dataverses/root/contents' + # don't process root for performance reasons + #collection = 'root' + #size = get_size(dataverse,collection,token) + #dvfilecount = get_filecount(dataverse,collection,token) + #print(collection + ': ' + str(size) + ' bytes, ' + str(dvfilecount) + ' files.') + + # iterate through sub-collections + instanceurl = dataverse + f"/api/dataverses/{root_coll}/contents" r = requests.get(instanceurl) j = r.json() for i in range(len(j["data"])): @@ -97,6 +100,8 @@ def format_size(byte_size): ar = requests.get(aliasurl) aj = ar.json() collection = aj["data"]["alias"] + # get size size = get_size(dataverse,collection,token) + readablesize = format_size(size) dvfilecount = get_filecount(dataverse,collection,token) print(collection + ': ' + str(size) + ' bytes' + ' (' + readablesize + '), ' + str(dvfilecount) + ' files.')