From c24e7061d23356dea9ef5aa8c68db9b187779fba Mon Sep 17 00:00:00 2001
From: Alexander Rogovskiy <arogovsk@lxplus772.cern.ch>
Date: Mon, 5 Jun 2023 16:54:54 +0200
Subject: [PATCH] feat: added script for searching corrupted (stub) files

The check is performed in 3 steps: initial search, verification and
dark object's search.

In the fist step a sorted list of objects is read, for every file
we get it's size, size of the last object, and also calculate the
number of objects that constitute the file.
If <file_size> != <object_size>*(<object_count> -1) + <last_obj_size>,
the file is considered to be corrupted.

In the second step list of obtained in the first step files is read
and "full" live check is performed for every file. I.e. every file's
object is statted and sizes are compared. This is necessary because
some files may be only partially transferred during the dump collection.

The final step is collection of orphaned objects. This is done since
these objects can not be removed using rados striper.
---
 dataconsistency/search_stub.py | 327 +++++++++++++++++++++++++++++++++
 1 file changed, 327 insertions(+)
 create mode 100644 dataconsistency/search_stub.py

diff --git a/dataconsistency/search_stub.py b/dataconsistency/search_stub.py
new file mode 100644
index 0000000..9f14694
--- /dev/null
+++ b/dataconsistency/search_stub.py
@@ -0,0 +1,327 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import rados
+import argparse
+
+from multiprocessing.pool import ThreadPool
+from subprocess import run, PIPE
+from tempfile import mkstemp
+
+DEF_NTHREADS = 1
+DEF_NPROCS = 1
+DEF_TMPDIR = '/tmp'
+
+FLUSH_STEP = 1000
+
+def filename2object(filename, obj_num):
+    "Given file's name and object number, get full object's name"
+    return '{0}.{1:0>16x}'.format(filename, obj_num)
+
+
+def simple_check_file(ctx, file_name, obj_count, object_size=None):
+    """
+    Check whether file is 'stub' or not. Stub means its size according to metadata differs from its real size.
+    Here we assume that the number of objects constituting the file is known. The check is quick, i.e. we do not
+    stat every individual object.
+
+    @param ctx:         rados context
+    @param file_name:   name of the file to check
+    @param obj_count:   number of ceph objects that store file's data
+    @param object_size: maximum size of a single ceph object
+    """
+    res = True
+    try:
+        size = int(ctx.get_xattr(filename2object(file_name, 0), 'striper.size'))
+        if object_size is None:
+            object_size = int(ctx.get_xattr(filename2object(file_name, 0), 'striper.layout.object_size'))
+    except (rados.NoData, rados.ObjectNotFound):
+        res = False
+    else:
+        try:
+            last_obj_size = ctx.stat(filename2object(file_name, obj_count-1))[0]
+        except rados.ObjectNotFound:
+            res = False
+        else:
+            if object_size * (obj_count-1) + last_obj_size != size:
+                res = False
+    return res
+
+
+def fully_check_file(ctx, file_name):
+    """
+    Check whether file is 'stub' or not. Stub means its size according to metadata differs from its real size.
+    The check is full -- all file's objects are statted.
+
+    @param ctx:         rados context
+    @param file_name:   name of the file to check
+    @return: 1 if file's attributes are missing
+             2 if file's first object is missing
+             3 if some objects except the first one are missing
+             4 if sizes mismatch
+    """
+    res = 0
+    try:
+        file_size = int(ctx.get_xattr(filename2object(file_name, 0), 'striper.size'))
+        obj_size = int(ctx.get_xattr(filename2object(file_name, 0), 'striper.layout.object_size'))
+    except rados.NoData:
+        res = 1
+    except rados.ObjectNotFound:
+        res = 2
+    else:
+        if file_size > 0:
+            obj_count = file_size // obj_size + (0 if obj_size % file_size == 0 else 1)
+        else:
+            obj_count = 1
+        real_size = 0
+        for obj_idx in range(obj_count):
+            try:
+                obj_size = ctx.stat(filename2object(file_name, obj_idx))[0]
+            except rados.ObjectNotFound:
+                res = 3
+                break
+            else:
+                real_size += obj_size
+        if real_size != file_size:
+            res = 4
+    return res
+
+
+def sort_file(filename, tmpdir, ncpus=1):
+    """
+    Sort given file.
+
+    @param filename: name of the file to sort
+    @param tmpdir:   directory where sorted file will be stored. It also will be used by 'sort' utility
+    @return:         path of the sorted file
+    """
+    sorted_path = None
+    if os.path.exists(filename) and os.path.isdir(tmpdir):
+        _tfd, sorted_path= mkstemp(dir=tmpdir)
+        os.close(_tfd)
+        with open(sorted_path, 'w') as fd:
+            if ncpus > 1:
+                par_opts = ['--parallel', ncpus]
+            else:
+                par_opts = []
+            out = run(['sort'] + par_opts + [filename], stdout=fd, env={'LC_COLLATE': 'C'})
+        if out.returncode != 0:
+            print("Failed to sort file {0}:\n{1}\n{2}".format(filename, out.stdout, out.stderr), file=sys.stderr)
+            os.unlink(sorted_path)
+            sorted_path = None
+    return sorted_path
+
+
+def process_results(async_results):
+    """
+    Print stub files that has been already found.
+
+    @param async_results: array [(<file_name>, <async_result>), ...], where <async_result> is the output of async
+                          application of the 'check_file' function to filename
+    """
+    for filename, ares in async_results:
+        ares.wait()
+        if not ares.successful():
+            print(filename)
+        else:
+            if not ares.get():
+                print(filename)
+
+
+def find_stub(dump, ceph_pool, object_size=None, nprocs=1, conffile='/etc/ceph/ceph.conf'):
+    """
+    Find stub files and print them to stdout. File is considered to be stub if its size differs
+    from the 'size' value written in its metadata.
+
+    @param dump:        a file with the list of all cehp objects in the pool, separated by newlines
+    @param ceph_pool:   ceph pool name
+    @param object_size: maximum object size (defined by libradosstriper)
+    @param nprocs:      number of threads to use
+    @param conffile:    ceph config file
+    """
+    cluster = rados.Rados(conffile=conffile)
+    cluster.connect()
+    ctx = cluster.open_ioctx(ceph_pool)
+
+    async_results = []
+    last_obj = None
+    thread_pool = None
+    if nprocs > 1:
+        thread_pool = ThreadPool(nprocs)
+    obj_count = 0
+    line = None
+    idx = 0
+    with open(dump) as fd:
+        while line != '':
+            idx += 1
+            if idx % 1000 == 0:
+                print("processing line ", idx, file=sys.stderr)
+            line = fd.readline().rstrip()
+            filename = line[:-17]
+            last_filename = last_obj[:-17] if last_obj else filename
+            if filename != last_filename:
+                fargs = (ctx, last_filename, obj_count, object_size)
+                if thread_pool:
+                    async_results.append(  ( last_filename, thread_pool.apply_async(check_file, fargs) )  )
+                else:
+                    if not check_file(*fargs):
+                        print(last_filename)
+                obj_count = 1
+            else:
+                obj_count += 1
+
+            if len(async_results) > FLUSH_STEP:
+                process_results(async_results)
+                async_results = []
+
+            last_obj = line
+
+
+    #async_results.append(
+    #        (last_filename, thread_pool.apply_async(stat, (ceph_pool, last_obj)), thread_pool.apply_async(stat, (ceph_pool, last_filename, True)))
+    #    )
+    process_results(async_results)
+    ctx.close()
+
+
+def verify_stub(file_list, ceph_pool, conffile='/etc/ceph/ceph.conf'):
+    """
+    Given the list of potentially stub files, check every file in the list for stubness.
+
+    @param file_list: list of files to check
+    @param ceph_pool: ceph pool where files are supposed to be stored
+    @param conffile:  ceph config file
+    """
+    cluster = rados.Rados(conffile=conffile)
+    cluster.connect()
+    ctx = cluster.open_ioctx(ceph_pool)
+
+    with open(file_list) as fd:
+        for file_name in fd:
+            file_name = file_name.rstrip()
+            ret = fully_check_file(ctx, file_name)
+            if ret > 0:
+                print(file_name, ret)
+    ctx.close()
+
+
+def find_dark_objects(file_list, object_dump, conffile='/etc/ceph/ceph.conf'):
+    """
+    Given the list of files with "dark" objects (i.e. first object is missing),
+    print all their objects.
+
+    @param file_list: list of files to check
+    @param object_dump: list of all objects stored in the pool
+    @param conffile:  ceph config file
+    """
+    with open(object_dump) as obj_fd:
+        obj = obj_fd.readline().strip()
+        with open(file_list) as file_fd:
+            for filename in file_fd:
+                filename = filename.strip()
+                while True:
+                    obj_name = obj[:-17]
+                    if obj_name == filename:
+                        print(obj)
+                    elif obj_name > filename:
+                        break
+
+                    obj = obj_fd.readline()
+                    if obj == '':
+                        obj = '\0'
+                        break
+                    else:
+                        obj = obj.strip()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(epilog="""
+The script is used to identify potentially 'stub' files on the ceph-based storage element. A file is
+considered to be stub if its size differs from the 'size' value written in file's metadata
+
+The check is done in three steps: search for potentially stub files, verification of these files, and dark object search.
+
+To perform the initial search a dump of all objects from a given rados pool is needed. The script will then
+count the sizes of all objects (taken from object dump) corresponding to given file and compare this value with the one
+stored in file's metadata. If there is a mismatch, file is stub.
+
+Please note that the files that were partially transferred at the time when the dump was collected
+also will be reported as stub. That means that additional verification of all found files is absolutely
+necessary.
+
+To verify files provide file list to the script in verification mode. It will stat every individual object
+of the file (in "live" mode, not from object dump) and (again) compare sizes.
+
+Note that if a file is "very dark", i.e. its first object is missing, you will not be able to delete it using
+rados striper, only rados commands on objects will work. To identify those objects, a third step is necessary:
+search for dark objects. To do this first collect a fresh dump of all objects (it should be newer then the
+one used for initial search, to avoid false-positives from incomplete deletions), and then run script in
+search_dark_objects mode.
+
+Usage example:
+/* search for potentially stub files, save results */
+$ search_stub.py search_stub -p lhcb -o "$((64*1024*1024))" -s ./lhcb_dump_sorted | awk '{print $1}' | tee potentially_stub
+/* verify that previously found files are indeed stub */
+$ search_stub.py search_stub -p lhcb ./potentially_stub | tee really_stub
+/* collect fresh objects dump, sort it (optionall), then search for dark objects */
+$ search_stub.py search_stub -s -d <( grep ' 2$' really_stub | awk '{print $1}' ) ./new_lhcb_dump_sorted | tee dark_objects
+
+As a result you get list of stub files (really stub, where second column is 1, 3 or 4) and dark objects (dark_objects file).
+""", formatter_class=argparse.RawTextHelpFormatter)
+    subparsers = parser.add_subparsers(dest='subcommand')
+    p1 = subparsers.add_parser("search_stub", help="Search for potentially stub files")
+    p1.add_argument('-p', '--pool', help="Rados pool to use", required=True)
+    p1.add_argument('-n', '--nthreads', help="Number of threads to use. Default is {0}.".format(DEF_NTHREADS), default=DEF_NTHREADS, type=int)
+    p1.add_argument('-N', '--Nprocs', help="Number of processes to use for sort. Default is {0}.".format(DEF_NPROCS), default=DEF_NPROCS, type=int)
+    p1.add_argument('-c', '--cleanup', help="Remove temporary files after exit.", action='store_true')
+    p1.add_argument('-o', '--object_size', help="Object size. If omitted, it will be requested from each object." \
+            + "Such an approach has lower performance, but allow one to handle files with different object sizes",
+            type=int,
+            default=None
+        )
+    gr = p1.add_mutually_exclusive_group()
+    gr.add_argument('-s', '--sorted', help="Indicates that the file with object names is already sorted.", action='store_true')
+    gr.add_argument('-t', '--tmpdir', help="Temporary directory to store sorted object dump. Default is {0}".format(DEF_TMPDIR), default=DEF_TMPDIR)
+    p1.add_argument('obj_dump', help="File with all object names from given pool.")
+
+    p2 = subparsers.add_parser("verify_stub", help="Verify that files are indeed stub")
+    p2.add_argument('-p', '--pool', help="Rados pool to use", required=True)
+    p2.add_argument('stub_list', help="List of stub files.")
+
+    p3 = subparsers.add_parser("search_dark_objects", help="Print objects of 'very dark' files identified earlier")
+    p3.add_argument('-d', '--dark_list', help="List of 'dark' files.")
+    gr = p3.add_mutually_exclusive_group()
+    gr.add_argument('-s', '--sorted', help="Indicates that the file with object names is already sorted.", action='store_true')
+    gr.add_argument('-t', '--tmpdir', help="Temporary directory to store sorted object dump. Default is {0}".format(DEF_TMPDIR), default=DEF_TMPDIR)
+    p3.add_argument('obj_dump', help="List of all objects in the pool.")
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    if args.subcommand == 'search_stub':
+        if args.sorted:
+            dump = args.obj_dump
+        else:
+            dump = sort_file(args.obj_dump, args.tmpdir)
+
+        if dump is not None:
+            find_stub(dump, args.pool, args.object_size, args.nthreads)
+
+        if args.cleanup:
+            if not args.sorted:
+                os.unlink(dump)
+            else:
+                print("Will not delete file {0} that was not created by me".format(dump), file=sys.stderr)
+    elif args.subcommand == 'verify_stub':
+        verify_stub(args.stub_list, args.pool)
+    elif args.subcommand == 'search_dark_objects':
+        if args.sorted:
+            obj_dump = args.obj_dump
+            file_dump = args.dark_list
+        else:
+            obj_dump = sort_file(args.obj_dump, args.tmpdir)
+            file_dump = sort_file(args.dark_list, args.tmpdir)
+        find_dark_objects(file_dump, obj_dump)