From b12454caa832a347574571e79d127ce9649dd9ec Mon Sep 17 00:00:00 2001
From: dsbarinov1 <dsbarinov_1@edu.hse.ru>
Date: Fri, 21 Jul 2023 12:18:53 +0300
Subject: [PATCH 1/3] Update evaluate.py to be able to profile and correctly
 validate the outputs on VM

---
 evaluate.py | 1488 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1488 insertions(+)
 create mode 100644 evaluate.py

diff --git a/evaluate.py b/evaluate.py
new file mode 100644
index 000000000000..a482f8aef2de
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,1488 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import numpy as np
+
+import tvm
+from tvm import relay
+from tvm.relay import testing
+from tvm import autotvm
+from tvm.contrib import utils, ndk
+from tvm.topi import testing
+from tvm.runtime import profiler_vm
+
+from tvm.relay.op import register_mixed_precision_conversion
+
+# TODO(amalyshe) current ugly solution with global variable should be substituted to
+# more convenient
+conv2d_acc = "float32"
+
+# Pick a priority > 10 to overwrite defaults, higher priorities take precedence
+@register_mixed_precision_conversion("nn.conv2d", level=11)
+def conv2d_mixed_precision_rule(call_node: "relay.Call", mixed_precision_type: str):
+    global conv2d_acc
+    return [
+        # always do main calculation in mixed_precision_type
+        relay.transform.mixed_precision.MIXED_PRECISION_ALWAYS,
+        # the dtype for the accumulator
+        conv2d_acc,
+        # the output dtype for the operation (usually fp16)
+        mixed_precision_type,
+    ]
+
+@register_mixed_precision_conversion("nn.dense", level=11)
+def conv2d_mixed_precision_rule(call_node: "relay.Call", mixed_precision_type: str):
+    global conv2d_acc
+    return [
+        # always do main calculation in mixed_precision_type
+        relay.transform.mixed_precision.MIXED_PRECISION_ALWAYS,
+        # the dtype for the accumulator
+        conv2d_acc,
+        # the output dtype for the operation (usually fp16)
+        mixed_precision_type,
+    ]
+
+
+class ModelImporter(object):
+    def available_models(self):
+        import inspect
+        models = []
+        for method in inspect.getmembers(type(self)):
+            if "import_" in method[0]:
+                models.append(method[0].split("import_")[1])
+        return models
+
+    def __call__(self, model, *args, **kwargs):
+        import inspect
+
+        for method in inspect.getmembers(type(self)):
+            if "import_" + model == method[0]:
+                return method[1](self, *args, **kwargs)
+        raise ValueError("import_" + model + " not found.")
+
+
+    def get_onnx_from_tf1(self, model_url, filename, input_names, output_names, shape_override = None):
+        tf_model_file = os.path.abspath(
+            os.path.dirname(os.path.realpath(__file__))
+            + "/models/{}.pb".format(filename)
+        )
+
+        from tvm.contrib import download
+        download.download(model_url, tf_model_file)
+        # converted using command line:
+        # python -m tf2onnx.convert --graphdef mace_resnet-v2-50.pb --output mace_resnet-v2-50.onnx --inputs input:0[1,224,224,3] --outputs resnet_v2_50/predictions/Reshape_1:0
+        onnx_model_file = os.path.abspath(
+            os.path.dirname(os.path.realpath(__file__))
+            + "/models/{}.onnx".format(filename))
+        if os.path.exists(onnx_model_file) == False:
+            import tf2onnx
+            import tensorflow as tf
+            try:
+                tf_compat_v1 = tf.compat.v1
+            except ImportError:
+                tf_compat_v1 = tf
+            # Tensorflow utility functions
+            import tvm.relay.testing.tf as tf_testing
+
+            with tf_compat_v1.gfile.GFile(tf_model_file, "rb") as f:
+                graph_def = tf_compat_v1.GraphDef()
+                graph_def.ParseFromString(f.read())
+                #graph = tf.import_graph_def(graph_def, name="")
+                # Call the utility to import the graph definition into default graph.
+                graph_def = tf_testing.ProcessGraphDefParam(graph_def)
+
+                model_proto, external_tensor_storage = tf2onnx.convert.from_graph_def(graph_def,
+                    name=filename, input_names=input_names, output_names=output_names,
+                    shape_override = shape_override,
+                    output_path=onnx_model_file)
+
+        return onnx_model_file
+
+
+    def get_graphdef_from_tf1(self, model_url, filename):
+        graph_def = None
+        tf_model_file = os.path.abspath(
+            os.path.dirname(os.path.realpath(__file__))
+            + "/models/{}.pb".format(filename)
+        )
+
+        from tvm.contrib import download
+        download.download(model_url, tf_model_file)
+        # converted using command line:
+        # python -m tf2onnx.convert --graphdef mace_resnet-v2-50.pb --output mace_resnet-v2-50.onnx --inputs input:0[1,224,224,3] --outputs resnet_v2_50/predictions/Reshape_1:0
+        onnx_model_file = os.path.abspath(
+            os.path.dirname(os.path.realpath(__file__))
+            + "/../models/{}.onnx".format(filename))
+        import tensorflow as tf
+        try:
+            tf_compat_v1 = tf.compat.v1
+        except ImportError:
+            tf_compat_v1 = tf
+        # Tensorflow utility functions
+        import tvm.relay.testing.tf as tf_testing
+
+        with tf_compat_v1.gfile.GFile(tf_model_file, "rb") as f:
+            graph_def = tf_compat_v1.GraphDef()
+            graph_def.ParseFromString(f.read())
+            graph_def = tf_testing.ProcessGraphDefParam(graph_def)
+        return graph_def
+
+    def import_mace_mobilenetv1_nhwc(self, target="llvm", dtype="float32"):
+        model_url = "https://cnbj1.fds.api.xiaomi.com/mace/miai-models/mobilenet-v1/mobilenet-v1-1.0.pb"
+        filename = "mace_mobilenet-v1-1.0"
+        graph_def = self.get_graphdef_from_tf1(model_url, filename)
+        shape_dict = {"input": (1, 224, 224, 3)}
+        mod, params = relay.frontend.from_tensorflow(graph_def, shape=shape_dict,
+                                        outputs=["MobilenetV1/Predictions/Reshape_1"])
+
+        # downcast to float16
+        mod = convert_to_dtype(mod["main"], dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+
+        return (mod, params, shape_dict, dtype, target, ImageNetValidator(shape_dict, "NHWC", preproc="keras_mobilenetv1"))
+
+    def import_mace_mobilenetv1_nchw(self, target="llvm", dtype="float32"):
+        model_url = "https://cnbj1.fds.api.xiaomi.com/mace/miai-models/mobilenet-v1/mobilenet-v1-1.0.pb"
+        filename = "mace_mobilenet-v1-1.0"
+        input_names = ["input:0"]
+        output_names = ["MobilenetV1/Predictions/Reshape_1:0"]
+        onnx_model_file = self.get_onnx_from_tf1(model_url, filename, input_names, output_names)
+        import onnx
+        model = onnx.load(onnx_model_file)
+        shape_dict = {'input:0': [1, 224, 224, 3]}
+        mod, params = relay.frontend.from_onnx(model, shape_dict, freeze_params=True)
+
+        # downcast to float16
+        mod = convert_to_dtype(mod["main"], dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+
+        return (mod, params, shape_dict, dtype, target, ImageNetValidator(shape_dict, "NHWC", preproc="keras_mobilenetv1"))
+
+    def import_conv2d_deeplabv3(self, target="llvm", dtype="float32"):
+        dtype_init="float32"
+        input_shape = (1, 513, 513, 3)
+        filter_shape = (3, 3, 3, 32)
+        bias_shape = (1, 1, 1, 32)
+        A = relay.var("data", shape=input_shape, dtype=dtype_init)
+        B = relay.var("weight", shape=filter_shape, dtype=dtype_init)
+        bias = relay.var("bias", shape=bias_shape, dtype=dtype_init)
+
+        #C = relay.nn.relu(A)
+        conv = relay.nn.conv2d(A, B, data_layout="NHWC", kernel_layout="HWIO",
+                            padding=[1,1,1,1],strides=[2,2],
+                            out_dtype=dtype_init, channels=32, kernel_size=(3,3))
+        D = relay.op.add(conv, bias)
+        D = relay.op.nn.relu(D)
+
+        mod = relay.Function([A, B, bias], D)
+        np.random.seed(0)
+        initializer = relay.testing.init.Xavier()
+        filter_data = np.zeros(filter_shape).astype(dtype_init)
+        bias_data = np.zeros(bias_shape).astype(dtype_init)
+        initializer("weight", filter_data)
+        initializer("bias", bias_data)
+        params = {
+            "weight": tvm.nd.array(filter_data),
+            "bias" : tvm.nd.array(bias_data),
+        }
+
+        # downcast to float16
+        mod = convert_to_dtype(mod, dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+
+        return (mod, params, {"data": input_shape}, dtype, target)
+
+
+    def import_mace_resnet50_v2(self, target="llvm", dtype="float32"):
+        model_url = "https://cnbj1.fds.api.xiaomi.com/mace/miai-models/resnet-v2-50/resnet-v2-50.pb"
+        filename = "mace_resnet-v2-50"
+        input_names = ["input:0"]
+        shape_override = {"input:0": [1, 299, 299, 3]}
+        output_names = ["resnet_v2_50/predictions/Reshape_1:0"]
+        onnx_model_file = self.get_onnx_from_tf1(model_url, filename, input_names, output_names, shape_override)
+        import onnx
+        model = onnx.load(onnx_model_file)
+        mod, params = relay.frontend.from_onnx(model, shape_override, freeze_params=True)
+
+        # downcast to float16
+        mod = convert_to_dtype(mod["main"], dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+
+        return (mod, params, shape_override, dtype, target, \
+                ImageNetValidator(shape_override, "NHWC", preproc="keras"))
+
+
+    def import_ac_resnet50_tf(self, target="llvm", dtype="float32"):
+        model_url = "https://download.01.org/opencv/public_models/012020/resnet-50-tf/resnet_v1-50.pb"
+        filename = "resnet_v1-50"
+        input_names = ["map/TensorArrayStack/TensorArrayGatherV3:0"]
+        shape_override = {"map/TensorArrayStack/TensorArrayGatherV3:0": [1, 224, 224, 3]}
+        output_names = ["softmax_tensor:0"]
+        onnx_model_file = self.get_onnx_from_tf1(model_url, filename, input_names, output_names, shape_override)
+        import onnx
+        model = onnx.load(onnx_model_file)
+        shape_dict = {'map/TensorArrayStack/TensorArrayGatherV3:0': [1, 224, 224, 3]}
+        mod, params = relay.frontend.from_onnx(model, shape_dict, freeze_params=True)
+
+        mod = relay.quantize.prerequisite_optimize(mod, params)
+
+        return (mod, params, shape_dict, dtype, target, ImageNetValidator(shape_dict, "NHWC", preproc="keras_mobilenetv1"))
+
+
+    def import_mace_inceptionv3(self, target="llvm", dtype="float32"):
+        model_url = "https://cnbj1.fds.api.xiaomi.com/mace/miai-models/inception-v3/inception-v3.pb"
+        filename = "mace_inception-v3"
+        input_names = ["input:0"]
+        output_names = ["InceptionV3/Predictions/Reshape_1:0"]
+        onnx_model_file = self.get_onnx_from_tf1(model_url, filename, input_names, output_names)
+        import onnx
+        model = onnx.load(onnx_model_file)
+        shape_dict = {'input:0': [1, 299, 299, 3]}
+        mod, params = relay.frontend.from_onnx(model, shape_dict, freeze_params=True)
+
+        # downcast to float16
+        mod = convert_to_dtype(mod["main"], dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+
+        return (mod, params, shape_dict, dtype, target, ImageNetValidator(shape_dict, "NHWC", preproc="keras"))
+
+    def import_mxnet_vgg16(self, target="llvm", dtype="float32"):
+        model, input_shape = gluon_model("vgg16", batch_size=1)
+        shape_dict = {"data": input_shape}
+        mod, params = relay.frontend.from_mxnet(model, shape_dict)
+
+        # downcast to float16
+        mod = convert_to_dtype(mod["main"], dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+
+        return (mod, params, shape_dict, dtype, target, ImageNetValidator(shape_dict, preproc="mxnet"))
+
+    def import_mace_deeplabv3(self, target="llvm", dtype="float32"):
+        model_url = "https://cnbj1.fds.api.xiaomi.com/mace/miai-models/deeplab-v3-plus/deeplab-v3-plus-mobilenet-v2.pb"
+        filename = "mace_deeplab-v3-plus-mobilenet-v2"
+        graph_def = self.get_graphdef_from_tf1(model_url, filename)
+        shape_dict = {"sub_7": (1, 513, 513, 3)}
+        mod, params = relay.frontend.from_tensorflow(graph_def, shape=shape_dict,
+                                        outputs=["ResizeBilinear_2"])
+
+        # hack for insufficient pattern support in FlattenAtrousConv
+        # if it is called after convert to fp16 with mixed precision, it will not be able
+        # to catch cast.
+        # TODO(amalyshe) We need to extend FlattenAtrousConv but for now we are calling it
+        # explicitly
+        mod = tvm.relay.transform.FlattenAtrousConv()(mod)
+        # downcast to float16
+        mod = convert_to_dtype(mod["main"], dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+
+        return (mod, params, shape_dict, dtype, target, Deeplabv3Validator(shape_dict, dtype))
+
+
+    def import_mace_yolov3(self, target="llvm", dtype="float32"):
+        model_url = "http://cnbj1.fds.api.xiaomi.com/mace/miai-models/yolo-v3/yolo-v3.pb"
+        filename = "mace_yolo-v3"
+        graph_def = self.get_graphdef_from_tf1(model_url, filename)
+        shape_dict = {"input_1": (1, 416, 416, 3)}
+        mod, params = relay.frontend.from_tensorflow(graph_def, shape=shape_dict,
+                                        outputs=["conv2d_59/BiasAdd","conv2d_67/BiasAdd","conv2d_75/BiasAdd"])
+
+        # downcast to float16
+        mod = convert_to_dtype(mod["main"], dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+
+        return (mod, params, shape_dict, dtype, target, Yolov3Validator(shape_dict))
+
+
+    def import_onnx_ssd_resnet34(self, target="llvm", dtype="float32"):
+        archive_url = "https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/ssd/model/ssd-12.tar.gz"
+        filename = "ssd-12.tar.gz"
+        from tvm.contrib import download
+        import onnx
+        import tarfile
+        download.download(archive_url, filename)
+        archive = tarfile.open(filename)
+        directory = "ssd_resnet34"
+        archive.extractall(directory)
+        archive.close()
+        directory = os.path.join(directory, "ssd-12")
+        model_file = os.path.join(directory, "ssd-12.onnx")
+        onnx_model = onnx.load(model_file)
+        shape_dict = {"image": (1, 3, 1200, 1200)}
+        mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, freeze_params=True)
+        test_files_dir = os.path.join(directory, "test_data_set_0")
+
+        # downcast to float16
+        mod = convert_to_dtype(mod["main"], dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+
+        return (mod, params, shape_dict, dtype, target, ONNXTestSamplesValidator(test_files_dir, input_names=list(shape_dict.keys())))
+
+
+    def import_onnx_yolo_v3(self, target="llvm", dtype="float32"):
+        archive_url = "https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/yolov3/model/yolov3-12.tar.gz"
+        filename = "yolov3-12.tar.gz"
+        from tvm.contrib import download
+        import onnx
+        import tarfile
+        download.download(archive_url, filename)
+        archive = tarfile.open(filename)
+        directory = "onnx_yolov3"
+        archive.extractall(directory)
+        archive.close()
+        directory = os.path.join(directory, "yolov3-12")
+        model_file = os.path.join(directory, "yolov3-12.onnx")
+        onnx_model = onnx.load(model_file)
+        shape_dict = {
+            "input_1": (1, 3, 416, 416),
+            "image_shape": (1, 2),
+        }
+        mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, freeze_params=True)
+        test_files_dir = os.path.join(directory, "test_data_set_0")
+
+        # downcast to float16
+        mod = convert_to_dtype(mod["main"], dtype)
+        dtype = "float32" if dtype == "float32" else "float16"
+        print("=" * 10)
+        print(mod)
+        print("=" * 10)
+
+        return (mod, params, shape_dict, dtype, target, ONNXTestSamplesValidator(test_files_dir, input_names=list(shape_dict.keys())))
+
+
+    def import_onnx_faster_rcnn(self, target="llvm", dtype="float32"):
+        archive_url = "https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12.onnx"
+        filename = "FasterRCNN-12"
+        from tvm.contrib import download
+        import onnx
+        download.download(archive_url, filename)
+        onnx_model = onnx.load(filename)
+        shape_dict = {
+            "image": (3, 800, 800),
+        }
+        mod_file = "onnx_faster_rcnn_mod.json"
+        params_file = "onnx_faster_rcnn_params.json"
+        if not os.path.exists(mod_file):
+            mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, freeze_params=True)
+
+            # downcast to float16
+            mod = convert_to_dtype(mod["main"], dtype)
+            with open(mod_file, "w") as file:
+                file.write(tvm.ir.save_json(mod))
+
+            with open(params_file, "wb") as file:
+                file.write(relay.save_param_dict(params))
+        else:
+            with open(mod_file, "r") as file:
+                mod = tvm.ir.load_json(file.read())
+
+            with open(params_file, "rb") as file:
+                params = relay.load_param_dict(file.read())
+        dtype = "float32" if dtype == "float32" else "float16"
+        print("=" * 10)
+        print(mod)
+        print("=" * 10)
+
+        return (mod, params, shape_dict, dtype, target)
+
+
+def get_args():
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Tune and/or evaluate a curated set of models"
+    )
+    models = ModelImporter().available_models()
+
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        default=None,
+        required=True,
+        help="Model to tune and/or evaluate",
+        choices=models,
+    )
+    parser.add_argument(
+        "-t",
+        "--type",
+        type=str,
+        default="float16",
+        choices=["float32", "float16", "float16_acc32"],
+        help="Specify whether the model should be run with single or half precision floating point values",
+    )
+    parser.add_argument(
+        "-l", "--log", type=str, default=None, help="AutoTVM tuning logfile name"
+    )
+    parser.add_argument(
+        "-k", "--rpc_key", type=str, default="android", help="RPC key to use"
+    )
+    parser.add_argument(
+        "-r",
+        "--rpc_tracker_host",
+        type=str,
+        default=os.environ["TVM_TRACKER_HOST"],
+        help="RPC tracker host IP address",
+    )
+    parser.add_argument(
+        "-p",
+        "--rpc_tracker_port",
+        type=str,
+        default=os.environ["TVM_TRACKER_PORT"],
+        help="RPC tracker host port",
+    )
+    parser.add_argument(
+        "-T",
+        "--target",
+        type=str,
+        default="opencl --device=mali",
+        help="Compilation target",
+    )
+    parser.add_argument(
+        "--tune", action="store_true", help="Whether or not to run autotuning"
+    )
+    parser.add_argument(
+        "--debug",
+        action="store_true",
+        help="Use graph runtime debugger to output per layer perf. data and other statistics",
+    )
+    parser.add_argument(
+        "--VM",
+        action="store_true",
+        help="Use VM compiling and benchmarking",
+    )
+
+    args = parser.parse_args()
+    if args.rpc_tracker_port != None:
+        args.rpc_tracker_port = int(args.rpc_tracker_port)
+    args.tuning_options = {
+        "log_filename": args.log,
+        "early_stopping": None,
+        "measure_option": autotvm.measure_option(
+            builder=autotvm.LocalBuilder(build_func=ndk.create_shared, timeout=15),
+            runner=autotvm.RPCRunner(
+                args.rpc_key,
+                host=args.rpc_tracker_host,
+                port=args.rpc_tracker_port,
+                number=50,
+                timeout=15,
+                #min_repeat_ms=150,
+                #cooldown_interval=150
+            ),
+        ),
+    }
+    return args
+
+
+args = get_args()
+
+
+def main():
+    if "opencl" in args.target:
+        executor = Executor(use_tracker="android")
+    else:
+        executor = Executor()
+    executor.schedule(args.model, target=args.target, dtype=args.type)
+    if args.tune:
+        executor.tune_pending_benchmarks()
+    else:
+        executor.tune_pending_benchmarks(apply_previous_tune=True)
+    executor.run_pending_benchmarks()
+
+
+def convert_to_dtype(mod, dtype):
+    # downcast to float16
+    if dtype == "float16" or dtype == "float16_acc32":
+        global conv2d_acc
+        conv2d_acc = "float16" if dtype == "float16" else "float32"
+        from tvm.ir import IRModule
+        mod = IRModule.from_expr(mod)
+        seq = tvm.transform.Sequential(
+            [
+                relay.transform.InferType(),
+                relay.transform.ToMixedPrecision()
+            ]
+        )
+        with tvm.transform.PassContext(
+                config={"relay.ToMixedPrecision.keep_orig_output_dtype": True},
+                opt_level=3):
+            mod = seq(mod)
+    return mod
+
+def downcast_fp16(func, module):
+    from tvm.relay.expr_functor import ExprMutator
+    from tvm.relay.expr import Call, Var, Constant, TupleGetItem
+    from tvm.relay import transform as _transform
+    from tvm.relay import cast
+    from tvm.ir import IRModule
+    from tvm.relay import function as _function
+
+    """Downcast to fp16 mutator
+    Parameters
+    ---------
+    graph: Function
+        The original graph.
+
+    Retruns
+    -------
+    The graph after dowmcasting to half-precision floating-point.
+    """
+    filter_list = ["vision.get_valid_counts", "vision.non_max_suppression"]
+
+    class DowncastMutator(ExprMutator):
+        """Downcast to fp16 mutator"""
+
+        def visit_call(self, call):
+            dtype = "float32" if call.op.name in filter_list else "float16"
+            new_fn = self.visit(call.op)
+            # Collect the original dtypes
+            type_list = []
+            if call.op.name in filter_list:
+                # For NMS
+                for arg in call.args:
+                    if isinstance(arg, TupleGetItem) and isinstance(
+                        arg.tuple_value, Call
+                    ):
+                        tuple_types = arg.tuple_value.checked_type.fields
+                        type_list.append(tuple_types[arg.index].dtype)
+                if call.op.name == "vision.get_valid_counts":
+                    tuple_types = call.checked_type.fields
+                    for cur_type in tuple_types:
+                        type_list.append(cur_type.dtype)
+
+            args = [self.visit(arg) for arg in call.args]
+            new_args = list()
+            arg_idx = 0
+            for arg in args:
+                if isinstance(arg, (Var, Constant)):
+                    new_args.append(cast(arg, dtype=dtype))
+                else:
+                    if call.op.name in filter_list:
+                        if (
+                            isinstance(arg, TupleGetItem)
+                            and type_list[arg_idx] == "int32"
+                        ):
+                            new_args.append(arg)
+                        else:
+                            new_args.append(cast(arg, dtype=dtype))
+                    else:
+                        new_args.append(arg)
+                arg_idx += 1
+            if (
+                call.op.name in filter_list
+                and call.op.name != "vision.get_valid_counts"
+            ):
+                return cast(Call(new_fn, new_args, call.attrs), dtype="float16")
+            return Call(new_fn, new_args, call.attrs)
+
+    class UpcastMutator(ExprMutator):
+        """upcast output back to fp32 mutator"""
+
+        def visit_call(self, call):
+            return cast(call, dtype="float32")
+
+    def infer_type(node, mod=None):
+        """A method to infer the type of an intermediate node in the relay graph."""
+        if isinstance(mod, IRModule):
+            mod["main"] = _function.Function(tvm.relay.analysis.free_vars(node), node)
+            mod = _transform.InferType()(mod)
+            entry = mod["main"]
+            ret = entry.body
+        else:
+            new_mod = IRModule.from_expr(node)
+            if mod is not None:
+                new_mod.update(mod)
+                new_mod = _transform.InferType()(new_mod)
+                entry = new_mod["main"]
+                ret = entry if isinstance(node, _function.Function) else entry.body
+
+        return ret
+
+    func = infer_type(func, module)
+    downcast_pass = DowncastMutator()
+    func = downcast_pass.visit(func)
+    upcast_pass = UpcastMutator()
+    func = upcast_pass.visit(func)
+    func = infer_type(func, module)
+    new_mod = IRModule.from_expr(func)
+    # new_mod.update(module)
+    return new_mod
+
+
+def get_input_data_shape_dict(graph_def, input_shape):
+    if isinstance(input_shape, list):
+        input_names = {}
+        shape_dict = {}
+        for i in range(len(input_shape)):
+            input_names[i] = graph_def.graph.input[i].name
+            shape_dict[input_names[i]] = input_shape[i]
+    else:
+        input_names = graph_def.graph.input[0].name
+        shape_dict = {input_names: input_shape}
+
+    return input_names, shape_dict
+
+
+def gluon_model(name, batch_size=None):
+    if "resnet50_v1" in name or "mobilenet1.0" in name or "resnet50_v2" in name or "vgg16" in name:
+        model = gluon.model_zoo.vision.get_model(name, pretrained=True)
+        data_shape = (batch_size, 3, 224, 224)
+    elif "inceptionv3" in name:
+        model = gluon.model_zoo.vision.inception_v3(pretrained=True)
+        data_shape = (batch_size, 3, 299, 299)
+    else:
+        raise ValueError("Input shape unknown for gluon model: " + name)
+    return model, data_shape
+
+
+def gluoncv_model(name, batch_size=None):
+    from gluoncv import model_zoo
+    if "yolo3" in name:
+        model = model_zoo.get_model(name, pretrained=True)
+        data_shape = (batch_size, 3, 416, 416)
+    return model, data_shape
+
+class Validator(object):
+    def __init__(self, inputs):
+        if isinstance(inputs, dict):
+            self.inputs = inputs
+        else:
+            assert len(inputs) == 1
+            self.inputs = {"data" : inputs[0]}
+    def GetReference(self):
+        return []
+    def Validate(self):
+        return None
+    def GetInputDictionary(self):
+        return self.inputs
+
+class ImageNetValidator(Validator):
+    def __init__(self, shape_dict, layout="NCHW", preproc=None):
+        assert layout in ("NCHW", "NHWC"), "Requested layout is not currently supported: " + layout
+        assert len(shape_dict) == 1
+        from PIL import Image
+        from tvm.contrib import download
+        from os.path import join, isfile
+        from matplotlib import pyplot as plt
+
+        name = list(shape_dict.keys())[0]
+
+        # Download ImageNet categories
+        categ_url = "https://github.com/uwsampl/web-data/raw/main/vta/models/"
+        categ_fn = "synset.txt"
+        download.download(join(categ_url, categ_fn), categ_fn)
+        self.synset = eval(open(categ_fn).read())
+
+        # Download test image
+        image_url = "https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg"
+        image_fn = "cat.png"
+        download.download(image_url, image_fn)
+
+        # Prepare test image for inference
+        #import ipdb; ipdb.set_trace()
+        image = Image.open(image_fn)
+        if layout == "NHWC":
+            image = image.resize(shape_dict[name][1:-1])
+        elif layout == "NCHW":
+            image = image.resize(shape_dict[name][2:])
+
+        #image = self.preprocess(np.array(image))
+        if "mxnet" in preproc:
+            image = np.array(image) - np.array([123.0, 117.0, 104.0])
+            image /= np.array([58.395, 57.12, 57.375])
+            image = image.transpose((2, 0, 1))
+            image = image[np.newaxis, :]
+        elif "keras" in preproc:
+            image = np.array(image)[np.newaxis, :].astype("float32")
+            from tensorflow.keras.applications.inception_v3 import preprocess_input
+            image = preprocess_input(image)
+        elif "keras_mobilenetv1" in preproc:
+            image = np.array(image)[np.newaxis, :].astype("float32")
+            from tensorflow.keras.applications.mobilenet import preprocess_input
+            image = preprocess_input(image)
+
+        self.inputs = {name : image}
+
+    def Validate(self, m, ref_outputs=[], data=[]):
+        if isinstance(m, tvm.runtime.vm.VirtualMachine) or isinstance(m, tvm.runtime.profiler_vm.VirtualMachineProfiler):
+            tvm_output = m.invoke("main", **data)
+        else:
+            tvm_output = m.get_output(0)
+        #import ipdb; ipdb.set_trace()
+        top_categories = np.argsort(tvm_output.asnumpy()[0])  # TODO: top_categories = np.argsort(tvm_output.asnumpy()[0]) AttributeError: 'NoneType' object has no attribute 'asnumpy'
+        # Report top-5 classification results
+        print("\nTop5 predictions: \n")
+        top5 = np.flip(top_categories, axis=0)[:5]
+        # print("\t#1:", self.synset[top_categories[-1]])
+        # print("\t#2:", self.synset[top_categories[-2]])
+        # print("\t#3:", self.synset[top_categories[-3]])
+        # print("\t#4:", self.synset[top_categories[-4]])
+        # print("\t#5:", self.synset[top_categories[-5]])
+        print("\t#1:", self.synset[top5[1-1]])
+        print("\t#2:", self.synset[top5[2-1]])
+        print("\t#3:", self.synset[top5[3-1]])
+        print("\t#4:", self.synset[top5[4-1]])
+        print("\t#5:", self.synset[top5[5-1]])
+        print("\t", top5)
+        ImageNetClassifier = False
+        for k in top_categories[-5:]:
+            if "cat" in self.synset[k]:
+                ImageNetClassifier = True
+        assert ImageNetClassifier, "Failed ImageNet classifier validation check"
+
+
+class VOCValidator(Validator):
+    # this function is from yolo3.utils.letterbox_image
+    def letterbox_image(self, image, size):
+        '''resize image with unchanged aspect ratio using padding'''
+        iw, ih = image.size
+        w, h = size
+        scale = min(w/iw, h/ih)
+        nw = int(iw*scale)
+        nh = int(ih*scale)
+
+        from PIL import Image
+        image = image.resize((nw,nh), Image.BICUBIC)
+        new_image = Image.new('RGB', size, (128,128,128))
+        new_image.paste(image, ((w-nw)//2, (h-nh)//2))
+        return new_image
+
+    def preprocess(self, img):
+        model_image_size = (416, 416)
+        boxed_image = self.letterbox_image(img, tuple(reversed(model_image_size)))
+        image_data = np.array(boxed_image, dtype='float32')
+        image_data /= 255.
+        image_data = np.transpose(image_data, [2, 0, 1])
+        image_data = np.expand_dims(image_data, 0)
+        return image_data
+
+    def __init__(self, shape_dict, layout="NCHW", preproc=None):
+        assert layout in ("NCHW", "NHWC"), "Requested layout is not currently supported: " + layout
+        assert len(shape_dict) == 1
+        from PIL import Image
+        from tvm.contrib import download
+        from os.path import join, isfile
+        from matplotlib import pyplot as plt
+
+        name = list(shape_dict.keys())[0]
+
+        # Download test image
+        image_url = "https://raw.githubusercontent.com/zhreshold/mxnet-ssd/master/data/demo/dog.jpg"
+        image_fn = "dog.png"
+        download.download(image_url, image_fn)
+
+        # Prepare test image for inference
+        #import ipdb; ipdb.set_trace()
+        image = Image.open(image_fn)
+        image_data = self.preprocess(image)
+
+        self.inputs = {name : image_data}
+
+    def Validate(self, m, ref_outputs=[]):
+        # class_IDs, scores, bounding_boxs
+        classid = m.get_output(0)
+        scores = m.get_output(1)
+        bounding_boxs = m.get_output(2)
+        for a in classid:
+            print(a)
+
+class Deeplabv3Validator(Validator):
+    def __init__(self, input_shape, dtype):
+        from os.path import join
+        from tvm.contrib import download
+        assert isinstance(input_shape, dict)
+        assert dtype in ["float16", "float32"]
+        np.random.seed(1)
+        self.dtype = dtype
+        self.inputs = {}
+        for key in input_shape:
+            self.inputs[key] = np.random.normal(size=input_shape[key]).astype("float32")
+
+        categ_url = "https://github.com/Deelvin/qualcomm/raw/avoronov/rebase_master_v2/"
+        categ_fn = "deeplabv3_reference_output_{}".format(dtype)
+        download.download(join(categ_url, categ_fn), categ_fn)
+        # genered by target="llvm -keys=cpu" at np.random.seed(1)
+        self.ref_outputs = eval(open(categ_fn).read())
+
+    def GetReference(self):
+        return self.ref_outputs
+
+    def Validate(self, m, ref_outputs=[]):
+        if self.dtype == "float16":
+            rtol=1e-1
+            atol=1e-1
+        if self.dtype == "float32":
+            rtol=1e-3
+            atol=1e-3
+        if isinstance(m, tvm.runtime.vm.VirtualMachine) or isinstance(m, tvm.runtime.profiler_vm.VirtualMachineProfiler):
+            outputs = m.get_outputs()
+            for i in range(len(outputs)):
+                tvm_output = outputs[i]
+                np.testing.assert_allclose(tvm_output.asnumpy(), ref_outputs[i], rtol=rtol, atol=atol)
+            print("Deeplabv3Validator pass:", "rtol", rtol, "atol",atol)
+        else:
+            for i in range(m.get_num_outputs()):
+                tvm_output = m.get_output(i)
+                np.testing.assert_allclose(tvm_output.asnumpy(), ref_outputs[i], rtol=rtol, atol=atol)
+            print("Deeplabv3Validator pass:", "rtol", rtol, "atol",atol)
+
+class Yolov3Validator(Validator):
+    class BoundBox:
+        def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
+            self.xmin = xmin
+            self.ymin = ymin
+            self.xmax = xmax
+            self.ymax = ymax
+            self.objness = objness
+            self.classes = classes
+            self.label = -1
+            self.score = -1
+
+        def get_label(self):
+            if self.label == -1:
+                self.label = np.argmax(self.classes)
+
+            return self.label
+
+        def get_score(self):
+            if self.score == -1:
+                self.score = self.classes[self.get_label()]
+
+            return self.score
+
+    def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
+        grid_h, grid_w = netout.shape[:2]
+        nb_box = 3
+        netout = netout.reshape((grid_h, grid_w, nb_box, -1))
+        nb_class = netout.shape[-1] - 5
+        boxes = []
+
+        def _sigmoid(x):
+            return 1. / (1. + np.exp(-x))
+
+        netout[..., :2]  = _sigmoid(netout[..., :2])
+        netout[..., 4:]  = _sigmoid(netout[..., 4:])
+        netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
+        netout[..., 5:] *= netout[..., 5:] > obj_thresh
+
+        for i in range(grid_h*grid_w):
+            row = i / grid_w
+            col = i % grid_w
+            for b in range(nb_box):
+                # 4th element is objectness score
+                objectness = netout[int(row)][int(col)][b][4]
+                if(objectness.all() <= obj_thresh): continue
+                # first 4 elements are x, y, w, and h
+                x, y, w, h = netout[int(row)][int(col)][b][:4]
+                x = (col + x) / grid_w # center position, unit: image width
+                y = (row + y) / grid_h # center position, unit: image height
+                w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
+                h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
+                # last elements are class probabilities
+                classes = netout[int(row)][col][b][5:]
+                box = Yolov3Validator.BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
+                boxes.append(box)
+        return boxes
+
+    def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
+        new_w, new_h = net_w, net_h
+        for i in range(len(boxes)):
+            x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
+            y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
+            boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
+            boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
+            boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
+            boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
+
+    def bbox_iou(box1, box2):
+        def _interval_overlap(interval_a, interval_b):
+            x1, x2 = interval_a
+            x3, x4 = interval_b
+            if x3 < x1:
+                if x4 < x1:
+                    return 0
+                else:
+                    return min(x2,x4) - x1
+            else:
+                if x2 < x3:
+                    return 0
+                else:
+                    return min(x2,x4) - x3
+        intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
+        intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
+        intersect = intersect_w * intersect_h
+        w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
+        w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
+        union = w1*h1 + w2*h2 - intersect
+        return float(intersect) / union
+
+    def do_nms(boxes, nms_thresh):
+        if len(boxes) > 0:
+            nb_class = len(boxes[0].classes)
+        else:
+            return
+        for c in range(nb_class):
+            sorted_indices = np.argsort([-box.classes[c] for box in boxes])
+            for i in range(len(sorted_indices)):
+                index_i = sorted_indices[i]
+                if boxes[index_i].classes[c] == 0: continue
+                for j in range(i+1, len(sorted_indices)):
+                    index_j = sorted_indices[j]
+                    if Yolov3Validator.bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
+                        boxes[index_j].classes[c] = 0
+
+    # load and prepare an image
+    @staticmethod
+    def load_image_pixels(filename, shape):
+        try:
+            from keras.preprocessing.image import load_img
+            from keras.preprocessing.image import img_to_array
+        except:
+            from tensorflow.keras.utils import load_img
+            from tensorflow.keras.utils import img_to_array
+        # load the image to get its shape
+        image = load_img(filename)
+        width, height = image.size
+        # load the image with the required size
+        image = load_img(filename, target_size=shape)
+        # convert to numpy array
+        image = img_to_array(image)
+        # scale pixel values to [0, 1]
+        image = image.astype('float32')
+        image /= 255.0
+        # add a dimension so that we have one sample
+        image = np.expand_dims(image, 0)
+        return image, width, height
+
+    # get all of the results above a threshold
+    @staticmethod
+    def get_boxes(boxes, labels, thresh):
+        v_boxes, v_labels, v_scores = list(), list(), list()
+        # enumerate all boxes
+        for box in boxes:
+            # enumerate all possible labels
+            for i in range(len(labels)):
+                # check if the threshold for this label is high enough
+                if box.classes[i] > thresh:
+                    v_boxes.append(box)
+                    v_labels.append(labels[i])
+                    v_scores.append(box.classes[i]*100)
+                    # don't break, many labels may trigger for one box
+        return v_boxes, v_labels, v_scores
+
+    # draw all results
+    @staticmethod
+    def draw_boxes(filename, v_boxes, v_labels, v_scores):
+        from matplotlib import pyplot
+        from matplotlib.patches import Rectangle
+        # load the image
+        from PIL import Image
+        if ".png" not in filename:
+            name, extension = filename.rsplit('.', 1)
+            im1 = Image.open(filename)
+            filename = "{}.png".format(name)
+            im1.save(filename)
+
+        data = pyplot.imread(filename)
+        # plot the image
+        pyplot.imshow(data)
+        # get the context for drawing boxes
+        ax = pyplot.gca()
+        # plot each box
+        for i in range(len(v_boxes)):
+            box = v_boxes[i]
+            # get coordinates
+            y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
+            # calculate width and height of the box
+            width, height = x2 - x1, y2 - y1
+            # create the shape
+            rect = Rectangle((x1, y1), width, height, fill=False, color='white')
+            # draw the box
+            ax.add_patch(rect)
+            # draw text and score in top left corner
+            label = "%s (%.3f)" % (v_labels[i], v_scores[i])
+            pyplot.text(x1, y1, label, color='white')
+        # show the plot
+        pyplot.show()
+
+    def __init__(self, input_shape, dtype="float32"):
+        from tvm.contrib import download
+        from os.path import join
+        n, h, w, c = list(input_shape.values())[0]
+        self.input_w, self.input_h = h, w
+
+        # Download Coco names
+        names_url = "https://github.com/pjreddie/darknet/raw/master/data/"
+        names_fn = "coco.names"
+        download.download(join(names_url, names_fn), names_fn, overwrite=True)
+        self.labels = [line.rstrip() for line in open(names_fn).readlines()]
+
+        # Download test image
+        image_url = "https://raw.githubusercontent.com/pjreddie/darknet/master/data/dog.jpg"
+        self.image_fn = "dog.jpg"
+        download.download(image_url, self.image_fn)
+
+        # # load and prepare image
+        image, image_w, image_h = Yolov3Validator.load_image_pixels(self.image_fn, (self.input_w, self.input_h))
+        self.image_w = image_w
+        self.image_h = image_h
+        self.image = image
+        self.inputs = { list(input_shape.keys())[0]: image }
+
+class ONNXTestSamplesValidator(Validator):
+    def __init__(self, test_data_dir, input_names, dtype="float32"):
+        import onnx
+        import glob
+        from onnx import numpy_helper
+
+        self.test_data_dir = test_data_dir
+        inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb')))
+        self.inputs = {}
+        for i in range(inputs_num):
+            input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i))
+            tensor = onnx.TensorProto()
+            with open(input_file, 'rb') as f:
+                tensor.ParseFromString(f.read())
+            inp = numpy_helper.to_array(tensor)
+            self.inputs[input_names[i]] = inp
+
+    def Validate(self, m, ref_outputs=[], show=False):
+        import onnx
+        import glob
+        from onnx import numpy_helper
+        # output
+        if isinstance(m, tvm.runtime.vm.VirtualMachine) or isinstance(m, tvm.runtime.profiler_vm.VirtualMachineProfiler):
+            outputs = []
+            tmp = m.get_outputs()
+            for i in range(len(tmp)):
+                tvm_output = tmp[i]
+                outputs.append(tvm_output.asnumpy())
+        else:
+            num_outputs = m.get_num_outputs()
+            outputs = []
+            for i in range(num_outputs):
+                tvm_output = m.get_output(i)
+                outputs.append(tvm_output.asnumpy())
+        refs = []
+        inputs_num = len(glob.glob(os.path.join(self.test_data_dir, 'output_*.pb')))
+        self.inputs = {}
+        for i in range(inputs_num):
+            input_file = os.path.join(self.test_data_dir, 'output_{}.pb'.format(i))
+            tensor = onnx.TensorProto()
+            with open(input_file, 'rb') as f:
+                tensor.ParseFromString(f.read())
+            refs.append(numpy_helper.to_array(tensor))
+
+        for i in range(len(outputs)):
+            np.testing.assert_allclose(outputs[i], refs[i], rtol=1e-2, atol=1e-2)
+
+
+class Executor(object):
+    def __init__(self, use_tracker=False):
+        self.benchmarks = []
+        self.tuning_jobs = []
+        self.tracker = None
+        self.remote = None
+        self.host_target = "llvm"
+        self.use_tracker = use_tracker
+        if use_tracker == "android":
+            self.host_target = "llvm -mtriple=arm64-linux-android"
+        elif use_tracker != False:
+
+            class BackendNotImplementedForRPCBenchmarking(Exception):
+                pass
+
+            raise BackendNotImplementedForRPCBenchmarking
+
+    def schedule(self, model, *args, **kwargs):
+        importer = ModelImporter()
+        self._schedule_jobs(*importer(model, *args, **kwargs))
+
+    def run_pending_benchmarks(self):
+        for bench in self.benchmarks:
+            bench()
+
+    def tune_pending_benchmarks(
+        self, apply_previous_tune=False, opt=args.tuning_options
+    ):
+        for tune in self.tuning_jobs:
+            tune(apply_previous_tune, options=args.tuning_options)
+
+    def _connect_tracker(self):
+        from tvm import rpc
+
+        print(
+            "Tracker attempting connection on {}:{}".format(
+                args.rpc_tracker_host, args.rpc_tracker_port
+            )
+        )
+        self.tracker = rpc.connect_tracker(args.rpc_tracker_host, args.rpc_tracker_port)
+        self.remote = self.tracker.request(
+            args.rpc_key, priority=0, session_timeout=6000
+        )
+        print("Tracker connected to remote RPC server")
+
+    def _disconnect_tracker(self):
+        self.remote = None
+        self.tracker = None
+
+    def advanced_time_evaluator(self, m, func_name, ctx, number=1, repeat=1, min_repeat_ms=0, time_to_work_ms=0, cooldown_interval_ms=0, f_preproc="", mod_func_name=None):
+        import inspect
+        import math
+        def ms_to_s(ms):
+            return ms / 1000
+        if mod_func_name is None:
+            one_run_time = m.module.time_evaluator(func_name, ctx, number=1,repeat=1,min_repeat_ms=0)().results[0]
+        else:
+            one_run_time = m.module.time_evaluator(func_name, ctx, number=1,repeat=1,min_repeat_ms=0)(mod_func_name).results[0]
+        repeats_to_cooldown = max(round(ms_to_s(time_to_work_ms)/one_run_time), 1)
+
+        def _time_evaluator(func_name, m, ctx, number=1, repeat=1, min_repeat_ms=0, cooldown_interval_ms=0, repeats_to_cooldown=1, f_preproc=""):
+            def evaluator(mod_func_name):
+                import time
+                from tvm.runtime.module import BenchmarkResult
+                results = []
+                for _ in range(math.ceil(repeat / repeats_to_cooldown)):
+                    time_f = m.module.time_evaluator(func_name, ctx, number=number, repeat=repeats_to_cooldown, min_repeat_ms=min_repeat_ms, f_preproc=f_preproc)
+                    if mod_func_name is None:
+                        results.append(time_f().results)
+                    else:
+                        results.append(time_f(mod_func_name).results)
+                    time.sleep(ms_to_s(cooldown_interval_ms))
+                return BenchmarkResult([np.mean(r) for r in results])
+            return evaluator
+
+        if inspect.signature(m.module.time_evaluator).parameters.get("cooldown_interval_ms"):
+            time_f = m.module.time_evaluator(func_name, ctx, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, cooldown_interval_ms=cooldown_interval_ms, repeats_to_cooldown=repeats_to_cooldown, f_preproc=f_preproc)
+        else:
+            time_f = _time_evaluator(func_name, m, ctx, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, cooldown_interval_ms=cooldown_interval_ms, repeats_to_cooldown=repeats_to_cooldown, f_preproc=f_preproc)
+
+        return time_f
+
+    def check_distribution(self, y, tolerance=0.05, show_plot=False):
+        import warnings
+        from sklearn.linear_model import LinearRegression
+
+        num_samples = len(y)
+        x = np.array(list(range(num_samples))).reshape((-1, 1))
+
+        model = LinearRegression(fit_intercept=True, copy_X=True)
+        model.fit(x, y)
+        print("intercept (b0):", model.intercept_)
+        print("slope (b1):", model.coef_)
+        print("coefficient of determination:", model.score(x, y))
+        if (model.score(x, y) >= tolerance):
+            warnings.warn("The coefficient of determination is higher than the acceptable coefficient of determination, use cooling of the device.", UserWarning)
+            show_plot = True
+
+        if show_plot:
+            import matplotlib.pyplot as plt
+            plt.plot(y)
+            plt.plot(model.predict(x))
+            plt.show()
+
+    def _benchmark(
+        self,
+        tvm_mod,
+        params,
+        input_shape,
+        target="llvm",
+        target_host="llvm",
+        dtype="float32",
+        validator=None
+    ):
+        if args.debug:
+            from tvm.contrib.debugger import debug_runtime as graph_executor
+        else:
+            from tvm.contrib import graph_executor
+        
+        print("Benchmark GraphExecutor")
+        if self.use_tracker and self.remote == None:
+            self._connect_tracker()
+
+        with relay.build_config(opt_level=3):
+            # lib2 = relay.build(tvm_mod, target=target, target_host=target_host, params=params)
+            # lib2.export_library("_model.so", ndk.create_shared)
+            graph, lib, params = relay.build(
+                tvm_mod, target_host=target_host, target=target, params=params
+            )
+            # print("JSON:\n", graph)
+
+        if self.remote:
+            print("Using Android OpenCL runtime over RPC")
+            temp = utils.tempdir()
+            dso_binary = "dev_lib_cl.so"
+            dso_binary_path = temp.relpath(dso_binary)
+            if "opencl" in target:
+                ctx = self.remote.cl(0)
+            else:
+                ctx = self.remote.cpu(0)
+            lib.export_library(dso_binary_path, ndk.create_shared)
+            remote_path = "/data/local/tmp/" + dso_binary
+            self.remote.upload(dso_binary_path)
+            print("Uploading binary...")
+            rlib = self.remote.load_module(dso_binary)
+            m = graph_executor.create(graph, rlib, ctx)
+        else:
+            print("Using local runtime")
+            ctx = tvm.device(target, 0)
+            m = graph_executor.create(graph, lib, ctx)
+
+        m.set_input(**params)
+        inputs = []
+        if isinstance(validator, Validator):
+            inputs = validator.GetInputDictionary()
+            for key, data in inputs.items():
+                m.set_input(key, data)
+        elif isinstance(input_shape, dict):
+            for key in input_shape:
+                inputs.append(np.random.normal(size=input_shape[key]).astype(dtype))
+                m.set_input(key, inputs[-1])
+        else:
+            inputs.append(np.random.normal(size=input_shape).astype(dtype))
+            m.set_input("data", inputs[-1])
+
+        print("Evaluating...", flush=True)
+        number = 1
+        repeat = 100
+        min_repeat_ms = 0
+        time_to_work_ms = 1000
+        cooldown_interval_ms=1000
+        if args.debug:
+            m.run()
+            time_f = self.advanced_time_evaluator(m, "run", ctx, number, repeat, min_repeat_ms, time_to_work_ms, cooldown_interval_ms)
+        else:
+            time_f = self.advanced_time_evaluator(m, "run", ctx, number, repeat, min_repeat_ms, time_to_work_ms, cooldown_interval_ms)
+
+        benchmarkResult = time_f()
+        cost = benchmarkResult.mean
+        print("%g secs/iteration\n" % cost)
+        results = benchmarkResult.results
+        self.check_distribution(results)
+        print(benchmarkResult)
+
+        if validator:
+            if isinstance(validator, Validator):
+                ref_outputs = validator.GetReference()
+                validator.Validate(m, ref_outputs)
+            else:
+                ref_outputs = validator(inputs)
+                for i, ref_output in enumerate(ref_outputs):
+                    tvm_output = m.get_output(i)
+                    output = tvm_output.asnumpy()
+                    np.testing.assert_allclose(output, ref_output, rtol=1e-3, atol=1e-3)
+            print("Validation done")
+
+
+    def _benchmark_vm(
+        self,
+        tvm_mod,
+        params,
+        input_shape,
+        target="llvm",
+        target_host="llvm",
+        dtype="float32",
+        validator=None
+    ):
+        from tvm.runtime.vm import VirtualMachine
+        print("Benchmark GraphExecutor")
+        if self.use_tracker and self.remote == None:
+            self._connect_tracker()
+
+        if isinstance(tvm_mod, tvm.IRModule):
+            mod = tvm_mod
+        else:
+            mod = tvm.IRModule()
+            mod["main"] = tvm_mod
+
+        with tvm.transform.PassContext(opt_level=3):
+            vmc = relay.vm.compile(mod, target_host=target_host, target=target, params=params)
+
+        if self.remote:
+            print("Using Android OpenCL runtime over RPC")
+            temp = utils.tempdir()
+            dso_binary = "dev_lib_cl.so"
+            dso_binary_path = temp.relpath(dso_binary)
+            if "opencl" in target:
+                ctx = self.remote.cl(0)
+            else:
+                ctx = self.remote.cpu(0)
+            vmc.mod.export_library(dso_binary_path, ndk.create_shared)
+            self.remote.upload(dso_binary_path)
+            print("Uploading binary...")
+            rlib = self.remote.load_module(dso_binary)
+            if args.debug:
+                vm = tvm.runtime.profiler_vm.VirtualMachineProfiler(rlib, ctx, "naive")
+            else:
+                vm = VirtualMachine(rlib, ctx, "naive")
+        else:
+            print("Using local runtime")
+            ctx = tvm.device(target, 0)
+            if args.debug:
+                vm = tvm.runtime.profiler_vm.VirtualMachineProfiler(vmc, ctx, "naive")
+            else:
+                vm = VirtualMachine(vmc, ctx, "naive")
+        inputs = []
+        if isinstance(validator, Validator):
+            inputs = validator.GetInputDictionary()
+            data = {}
+            for k, v in inputs.items():
+                data[k] = tvm.nd.array(v, ctx)
+            vm.set_input("main", **data)
+        elif isinstance(input_shape, dict):
+            data = {}
+            for key in input_shape:
+                data[key] = tvm.nd.array(np.random.normal(size=input_shape[key]).astype("float32"), ctx)
+            vm.set_input("main", **data)
+        else:
+            data = tvm.nd.array(np.random.normal(size=input_shape).astype("float32"), ctx)
+            vm.set_input("main", data)
+        
+        print("Evaluating...", flush=True)
+        if args.debug:
+            res = vm.profile(**data, func_name="main")
+            print(res)
+        
+        number = 1
+        repeat = 100
+        min_repeat_ms = 0
+        time_to_work_ms = 1000
+        cooldown_interval_ms=1000
+        time_f = self.advanced_time_evaluator(vm, "invoke_stateful", ctx, number, repeat, min_repeat_ms, time_to_work_ms, cooldown_interval_ms, mod_func_name="main")
+
+        benchmarkResult = time_f("main")
+        cost = benchmarkResult.mean
+        print("%g secs/iteration\n" % cost)
+        print(benchmarkResult)
+
+        if validator:
+            if isinstance(validator, Validator):
+                ref_outputs = validator.GetReference()
+                validator.Validate(vm, ref_outputs, data)
+            else:
+                ref_outputs = validator(inputs)
+                for i, ref_output in enumerate(ref_outputs):
+                    tvm_output = vm.get_outputs(i)
+                    output = tvm_output.asnumpy()
+                    np.testing.assert_allclose(output, ref_output, rtol=1e-3, atol=1e-3)
+            print("Validation done")
+
+
+    def _schedule_jobs(self, mod, params, input_shape, dtype, target, validator=None):
+        if args.VM:
+            def bench():
+                self._benchmark_vm(
+                    mod,
+                    params,
+                    input_shape,
+                    target=target,
+                    target_host=self.host_target,
+                    dtype=dtype,
+                    validator=validator
+                )
+        else:
+            def bench():
+                self._benchmark(
+                    mod,
+                    params,
+                    input_shape,
+                    target=target,
+                    target_host=self.host_target,
+                    dtype=dtype,
+                    validator=validator
+                )
+        
+
+        benchmark_index = len(self.benchmarks)
+        self.benchmarks.append(bench)
+
+        def tune(apply_previous_tune=False, options=args.tuning_options):
+            print("Extracting tasks")
+            tasks = autotvm.task.extract_from_program(
+                mod, target=target, target_host=self.host_target, params=params
+            )
+            if apply_previous_tune == False:
+                print("Tuning kernels")
+                Executor.tune_tasks(tasks, **options)
+
+            def tuned_benchmark():
+                print("Apply best performing tuning profiles:")
+
+                if (options["log_filename"]):
+                    with autotvm.apply_history_best(options["log_filename"]):
+                        bench()
+                else:
+                    bench()
+
+            self.benchmarks.pop(benchmark_index)
+            self.benchmarks.append(tuned_benchmark)
+
+        self.tuning_jobs.append(tune)
+
+    @staticmethod
+    def tune_tasks(
+        tasks,
+        measure_option,
+        tuner="xgb",
+        n_trial=333,
+        early_stopping=None,
+        log_filename="tuning.log",
+        use_transfer_learning=False,
+    ):
+        from tvm.autotvm.tuner import XGBTuner
+        from tvm.autotvm.tuner import GATuner
+
+        tmp_log_file = log_filename + ".tmp"
+        #if os.path.exists(tmp_log_file) and use_transfer_learning == False:
+        #    os.remove(tmp_log_file)
+
+        for i, tsk in enumerate(reversed(tasks)):
+            print("Task: ", tsk)
+            prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
+            if tuner == "xgb" or tuner == "xgb-rank":
+                tuner_obj = XGBTuner(tsk, loss_type="rank")
+            elif tuner == "xgb_knob":
+                tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
+            elif tuner == "ga":
+                tuner_obj = GATuner(tsk, pop_size=50)
+            elif tuner == "random":
+                tuner_obj = RandomTuner(tsk)
+            elif tuner == "gridsearch":
+                tuner_obj = GridSearchTuner(tsk)
+            else:
+                raise ValueError("Invalid tuner: " + tuner)
+
+            if use_transfer_learning:
+                if os.path.isfile(tmp_log_file):
+                    tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
+
+            tsk_trial = min(n_trial, len(tsk.config_space))
+            tuner_obj.tune(
+                n_trial=tsk_trial,
+                early_stopping=early_stopping,
+                measure_option=measure_option,
+                callbacks=[
+                    autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
+                    autotvm.callback.log_to_file(tmp_log_file),
+                ],
+            )
+
+        autotvm.record.pick_best(tmp_log_file, log_filename)
+        # os.remove(tmp_log_file)
+
+if __name__ == "__main__":
+    main()

From 75807cfaffab33e3b9ccde1d708aa3abc2880975 Mon Sep 17 00:00:00 2001
From: dsbarinov1 <dsbarinov_1@edu.hse.ru>
Date: Fri, 21 Jul 2023 12:32:25 +0300
Subject: [PATCH 2/3] Resolve conflicts

---
 evaluate.py | 318 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 280 insertions(+), 38 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index a482f8aef2de..8701585ee718 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -330,7 +330,7 @@ def import_onnx_ssd_resnet34(self, target="llvm", dtype="float32"):
         mod = convert_to_dtype(mod["main"], dtype)
         dtype = "float32" if dtype == "float32" else "float16"
 
-        return (mod, params, shape_dict, dtype, target, ONNXTestSamplesValidator(test_files_dir, input_names=list(shape_dict.keys())))
+        return (mod, params, shape_dict, dtype, target, SSDResnetValidator())
 
 
     def import_onnx_yolo_v3(self, target="llvm", dtype="float32"):
@@ -361,10 +361,29 @@ def import_onnx_yolo_v3(self, target="llvm", dtype="float32"):
         print(mod)
         print("=" * 10)
 
-        return (mod, params, shape_dict, dtype, target, ONNXTestSamplesValidator(test_files_dir, input_names=list(shape_dict.keys())))
+        return (mod, params, shape_dict, dtype, target, ONNXYolov3Validator())
+        #return (mod, params, shape_dict, dtype, target, ONNXTestSamplesValidator(test_files_dir, input_names=list(shape_dict.keys())))
 
 
     def import_onnx_faster_rcnn(self, target="llvm", dtype="float32"):
+        min_shape = 800.0
+        def _get_shape():
+            from PIL import Image
+            from tvm.contrib import download
+            # Download test image
+            image_url = "https://raw.githubusercontent.com/zhreshold/mxnet-ssd/master/data/demo/dog.jpg"
+            image_fn = "dog.png"
+            image_url = "https://raw.githubusercontent.com/onnx/models/main/vision/object_detection_segmentation/faster-rcnn/dependencies/demo.jpg"
+            image_fn = "demo.png"
+            download.download(image_url, image_fn)
+
+            # Prepare test image for inference
+            #import ipdb; ipdb.set_trace()
+            image = Image.open(image_fn)
+            print(image.size)
+            ratio = min_shape / min(image.size[0], image.size[1])
+            #return (3, int(ratio * image.size[1]), int(ratio * image.size[0])) # [c, h, w]
+            return (3, int(min_shape), int(min_shape)) # [c, h, w]
         archive_url = "https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12.onnx"
         filename = "FasterRCNN-12"
         from tvm.contrib import download
@@ -372,10 +391,13 @@ def import_onnx_faster_rcnn(self, target="llvm", dtype="float32"):
         download.download(archive_url, filename)
         onnx_model = onnx.load(filename)
         shape_dict = {
-            "image": (3, 800, 800),
+            "image": _get_shape(),
         }
-        mod_file = "onnx_faster_rcnn_mod.json"
-        params_file = "onnx_faster_rcnn_params.json"
+        prefix = "/home/echuraev/Workspace/OctoML/tvm/"
+        mod_file = prefix + f"onnx_faster_rcnn_mod_{dtype}.json"
+        params_file = prefix + f"onnx_faster_rcnn_params_{dtype}.json"
+        mod_file = prefix + f"onnx_faster_rcnn_mod.json"
+        params_file = prefix + f"onnx_faster_rcnn_params.json"
         if not os.path.exists(mod_file):
             mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, freeze_params=True)
 
@@ -397,7 +419,7 @@ def import_onnx_faster_rcnn(self, target="llvm", dtype="float32"):
         print(mod)
         print("=" * 10)
 
-        return (mod, params, shape_dict, dtype, target)
+        return (mod, params, shape_dict, dtype, target, FasterRCNNValidator(min_shape))
 
 
 def get_args():
@@ -717,13 +739,13 @@ def __init__(self, shape_dict, layout="NCHW", preproc=None):
 
         self.inputs = {name : image}
 
-    def Validate(self, m, ref_outputs=[], data=[]):
+    def Validate(self, m, ref_outputs=[]):
         if isinstance(m, tvm.runtime.vm.VirtualMachine) or isinstance(m, tvm.runtime.profiler_vm.VirtualMachineProfiler):
             tvm_output = m.invoke("main", **data)
         else:
             tvm_output = m.get_output(0)
         #import ipdb; ipdb.set_trace()
-        top_categories = np.argsort(tvm_output.asnumpy()[0])  # TODO: top_categories = np.argsort(tvm_output.asnumpy()[0]) AttributeError: 'NoneType' object has no attribute 'asnumpy'
+        top_categories = np.argsort(tvm_output.asnumpy()[0])
         # Report top-5 classification results
         print("\nTop5 predictions: \n")
         top5 = np.flip(top_categories, axis=0)[:5]
@@ -1086,11 +1108,243 @@ def Validate(self, m, ref_outputs=[], show=False):
             with open(input_file, 'rb') as f:
                 tensor.ParseFromString(f.read())
             refs.append(numpy_helper.to_array(tensor))
-
+        labels = []
+        scores_list = []
+        boxes_list = []
+        from tvm.contrib import download
+        classes_url = "https://raw.githubusercontent.com/qqwweee/keras-yolo3/master/model_data/coco_classes.txt"
+        classes_fn = "coco_classes.txt"
+        download.download(classes_url, classes_fn)
+        classes = [line.rstrip('\n') for line in open(classes_fn)]
+        for idx_ in outputs[2]:
+            class_idx = idx_[1]
+            score = outputs[1][tuple(idx_)]
+            idx_1 = (idx_[0], idx_[2])
+            box = outputs[0][idx_1]
+            labels.append(classes[class_idx])
+            scores_list.append(score)
+            boxes_list.append(box)
+            print("bigger: label: {}, score: {}, box: {}".format(classes[class_idx], score, box))
+
+        print(outputs[0].shape)
+        print(outputs[1].shape)
+        print(outputs[2].shape)
         for i in range(len(outputs)):
             np.testing.assert_allclose(outputs[i], refs[i], rtol=1e-2, atol=1e-2)
 
 
+class FasterRCNNValidator(Validator):
+    def preprocess(self, image):
+        from PIL import Image
+        # Resize
+        ratio = self.min_shape / min(image.size[0], image.size[1])
+        #image = image.resize((int(ratio * image.size[0]), int(ratio * image.size[1])), Image.BILINEAR)
+        image = image.resize((int(self.min_shape), int(self.min_shape)), Image.BILINEAR)
+
+        # Convert to BGR
+        image = np.array(image)[:, :, [2, 1, 0]].astype('float32')
+
+        # HWC -> CHW
+        image = np.transpose(image, [2, 0, 1])
+
+        # Normalize
+        mean_vec = np.array([102.9801, 115.9465, 122.7717])
+        for i in range(image.shape[0]):
+            image[i, :, :] = image[i, :, :] - mean_vec[i]
+
+        # Pad to be divisible of 32
+        import math
+        padded_h = int(math.ceil(image.shape[1] / 32) * 32)
+        padded_w = int(math.ceil(image.shape[2] / 32) * 32)
+
+        padded_image = np.zeros((3, padded_h, padded_w), dtype=np.float32)
+        padded_image[:, :image.shape[1], :image.shape[2]] = image
+        image = padded_image
+
+        return image
+
+    def __init__(self, min_shape, preproc=None):
+        from PIL import Image
+        from tvm.contrib import download
+        from os.path import join, isfile
+        from matplotlib import pyplot as plt
+        self.min_shape = min_shape
+
+        # Download test image
+        image_url = "https://raw.githubusercontent.com/zhreshold/mxnet-ssd/master/data/demo/dog.jpg"
+        image_fn = "dog.png"
+        download.download(image_url, image_fn)
+
+        # Prepare test image for inference
+        #import ipdb; ipdb.set_trace()
+        self.image = Image.open(image_fn)
+        image_data = self.preprocess(self.image)
+
+        self.inputs = {"image" : image_data}
+
+    def Validate(self, m, ref_outputs=[]):
+        from tvm.contrib import download
+        classes_url = "https://raw.githubusercontent.com/onnx/models/main/vision/object_detection_segmentation/faster-rcnn/dependencies/coco_classes.txt"
+        classes_fn = "coco_classes_faster.txt"
+        download.download(classes_url, classes_fn)
+        classes = [line.rstrip('\n') for line in open(classes_fn)]
+
+        # class_IDs, scores, bounding_boxs
+        if isinstance(m, tvm.runtime.vm.VirtualMachine) or isinstance(m, tvm.runtime.profiler_vm.VirtualMachineProfiler):
+            tvm_output = m.get_outputs()
+            boxes = tvm_output[0].asnumpy()
+            labels = tvm_output[1].asnumpy()
+            scores = tvm_output[2].asnumpy()
+        else:
+            boxes = m.get_output(0).asnumpy()
+            labels = m.get_output(1).asnumpy()
+            scores = m.get_output(2).asnumpy()
+        score_threshold = 0.7
+        assert boxes.shape[0] == labels.shape[0] and labels.shape[0] == scores.shape[0]
+        #for box, label, score in zip(boxes, labels, scores):
+        for i in range(len(boxes)):
+            if scores[i] > score_threshold:
+                print("label: {}, score: {}, box: {}".format(classes[labels[i]], scores[i], boxes[i]))
+                #assert classes[labels[i]] == 'dog' or classes[labels[i]] == 'bicycle'
+
+
+class SSDResnetValidator(Validator):
+    def preprocess(self, image):
+        from PIL import Image
+        img = image.resize((1200, 1200), Image.BILINEAR)
+        img_data = np.array(img)
+        img_data = np.transpose(img_data, [2, 0, 1])
+        img_data = np.expand_dims(img_data, 0)
+        mean_vec = np.array([0.485, 0.456, 0.406])
+        stddev_vec = np.array([0.229, 0.224, 0.225])
+        norm_img_data = np.zeros(img_data.shape).astype('float32')
+        for i in range(img_data.shape[1]):
+            norm_img_data[:,i,:,:] = (img_data[:,i,:,:]/255 - mean_vec[i]) / stddev_vec[i]
+        return norm_img_data
+
+    def __init__(self, preproc=None):
+        from PIL import Image
+        from tvm.contrib import download
+        from os.path import join, isfile
+        from matplotlib import pyplot as plt
+
+        # Download test image
+        image_url = "https://raw.githubusercontent.com/zhreshold/mxnet-ssd/master/data/demo/dog.jpg"
+        image_fn = "dog.png"
+        download.download(image_url, image_fn)
+
+        # Prepare test image for inference
+        #import ipdb; ipdb.set_trace()
+        self.image = Image.open(image_fn)
+        image_data = self.preprocess(self.image)
+
+        self.inputs = {"image" : image_data}
+
+    def Validate(self, m, ref_outputs=[]):
+        from tvm.contrib import download
+        classes_url = "https://raw.githubusercontent.com/qqwweee/keras-yolo3/master/model_data/coco_classes.txt"
+        classes_fn = "coco_classes.txt"
+        download.download(classes_url, classes_fn)
+        classes = [line.rstrip('\n') for line in open(classes_fn)]
+
+        # class_IDs, scores, bounding_boxs
+        if isinstance(m, tvm.runtime.vm.VirtualMachine) or isinstance(m, tvm.runtime.profiler_vm.VirtualMachineProfiler):
+            tvm_output = m.get_outputs()
+            boxes = tvm_output[0].asnumpy()
+            labels = tvm_output[1].asnumpy()
+            scores = tvm_output[2].asnumpy()
+        else:
+            boxes = m.get_output(0).asnumpy()
+            labels = m.get_output(1).asnumpy()
+            scores = m.get_output(2).asnumpy()
+        score_threshold = 0.7
+        for i in range(len(boxes)):
+            if scores[0][i] > score_threshold:
+                print("label: {}, score: {}, box: {}".format(classes[labels[0][i]], scores[0][i], boxes[0][i]))
+                assert classes[labels[0][i]] == 'dog' or classes[labels[0][i]] == 'bicycle'
+
+
+class ONNXYolov3Validator(Validator):
+    def preprocess(self, img):
+        #from PIL import Image
+        def _letterbox_image(image, size):
+            from PIL import Image
+            '''resize image with unchanged aspect ratio using padding'''
+            iw, ih = image.size
+            w, h = size
+            scale = min(w/iw, h/ih)
+            nw = int(iw*scale)
+            nh = int(ih*scale)
+
+            image = image.resize((nw,nh), Image.BICUBIC)
+            new_image = Image.new('RGB', size, (128,128,128))
+            new_image.paste(image, ((w-nw)//2, (h-nh)//2))
+            return new_image
+
+        model_image_size = (416, 416)
+        boxed_image = _letterbox_image(img, tuple(reversed(model_image_size)))
+        #boxed_image = img.resize(model_image_size, Image.BICUBIC)
+        image_data = np.array(boxed_image, dtype='float32')
+        image_data /= 255.
+        image_data = np.transpose(image_data, [2, 0, 1])
+        image_data = np.expand_dims(image_data, 0)
+        return image_data
+
+    def __init__(self, preproc=None):
+        from PIL import Image
+        from tvm.contrib import download
+        from os.path import join, isfile
+        from matplotlib import pyplot as plt
+
+        # Download test image
+        image_url = "https://raw.githubusercontent.com/zhreshold/mxnet-ssd/master/data/demo/dog.jpg"
+        image_fn = "dog.png"
+        download.download(image_url, image_fn)
+
+        # Prepare test image for inference
+        #import ipdb; ipdb.set_trace()
+        self.image = Image.open(image_fn)
+        image_data = self.preprocess(self.image)
+        image_size = np.array([self.image.size[1], self.image.size[0]], dtype="float32").reshape(1, 2)
+
+        self.inputs = {
+            "input_1" : image_data,
+            "image_shape" : image_size,
+        }
+
+    def Validate(self, m, ref_outputs=[]):
+        from tvm.contrib import download
+        classes_url = "https://raw.githubusercontent.com/qqwweee/keras-yolo3/master/model_data/coco_classes.txt"
+        #classes_url = "https://raw.githubusercontent.com/onnx/models/main/vision/object_detection_segmentation/faster-rcnn/dependencies/coco_classes.txt"
+        classes_fn = "coco_classes.txt"
+        download.download(classes_url, classes_fn)
+        classes = [line.rstrip('\n') for line in open(classes_fn)]
+
+        # class_IDs, scores, bounding_boxs
+        if isinstance(m, tvm.runtime.vm.VirtualMachine) or isinstance(m, tvm.runtime.profiler_vm.VirtualMachineProfiler):
+            tvm_output = m.get_outputs()
+            boxes = tvm_output[0].asnumpy()
+            scores = tvm_output[1].asnumpy()
+            indices = tvm_output[2].asnumpy()
+        else:
+            boxes = m.get_output(0).asnumpy()
+            scores = m.get_output(1).asnumpy()
+            indices = m.get_output(2).asnumpy()
+        score_threshold = 0.7
+        print(boxes.shape)
+        print(scores.shape)
+        print(indices.shape)
+        for idx_ in indices:
+            class_idx = idx_[1]
+            score = scores[tuple(idx_)]
+            idx_1 = (idx_[0], idx_[2])
+            box = boxes[idx_1]
+            possible_objs = ['dog', 'bicycle', 'truck']
+            if score > score_threshold and score <= 100:
+                print("label: {}, score: {}, box: {}".format(classes[class_idx], score, box))
+                assert classes[class_idx] in possible_objs
+
+
 class Executor(object):
     def __init__(self, use_tracker=False):
         self.benchmarks = []
@@ -1299,7 +1553,7 @@ def _benchmark_vm(
         validator=None
     ):
         from tvm.runtime.vm import VirtualMachine
-        print("Benchmark GraphExecutor")
+        print("Benchmark Virtual Machine")
         if self.use_tracker and self.remote == None:
             self._connect_tracker()
 
@@ -1383,39 +1637,27 @@ def _benchmark_vm(
 
 
     def _schedule_jobs(self, mod, params, input_shape, dtype, target, validator=None):
-        if args.VM:
-            def bench():
-                self._benchmark_vm(
-                    mod,
-                    params,
-                    input_shape,
-                    target=target,
-                    target_host=self.host_target,
-                    dtype=dtype,
-                    validator=validator
-                )
-        else:
-            def bench():
-                self._benchmark(
-                    mod,
-                    params,
-                    input_shape,
-                    target=target,
-                    target_host=self.host_target,
-                    dtype=dtype,
-                    validator=validator
-                )
-        
+        def bench():
+            #self._benchmark(
+            self._benchmark_vm(
+                mod,
+                params,
+                input_shape,
+                target=target,
+                target_host=self.host_target,
+                dtype=dtype,
+                validator=validator
+            )
 
         benchmark_index = len(self.benchmarks)
         self.benchmarks.append(bench)
 
         def tune(apply_previous_tune=False, options=args.tuning_options):
-            print("Extracting tasks")
-            tasks = autotvm.task.extract_from_program(
-                mod, target=target, target_host=self.host_target, params=params
-            )
             if apply_previous_tune == False:
+                print("Extracting tasks")
+                tasks = autotvm.task.extract_from_program(
+                    mod, target=target, target_host=self.host_target, params=params
+                )
                 print("Tuning kernels")
                 Executor.tune_tasks(tasks, **options)
 
@@ -1485,4 +1727,4 @@ def tune_tasks(
         # os.remove(tmp_log_file)
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file

From e9d2468b8f6a0e3352a3b884babbe74fe7e9678f Mon Sep 17 00:00:00 2001
From: dsbarinov1 <dsbarinov_1@edu.hse.ru>
Date: Fri, 21 Jul 2023 13:10:24 +0300
Subject: [PATCH 3/3] Resolve reviewer notes

---
 evaluate.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/evaluate.py b/evaluate.py
index 8701585ee718..049991a558bc 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -24,7 +24,6 @@
 from tvm import autotvm
 from tvm.contrib import utils, ndk
 from tvm.topi import testing
-from tvm.runtime import profiler_vm
 
 from tvm.relay.op import register_mixed_precision_conversion
 
@@ -739,7 +738,7 @@ def __init__(self, shape_dict, layout="NCHW", preproc=None):
 
         self.inputs = {name : image}
 
-    def Validate(self, m, ref_outputs=[]):
+    def Validate(self, m, ref_outputs=[], data={}):
         if isinstance(m, tvm.runtime.vm.VirtualMachine) or isinstance(m, tvm.runtime.profiler_vm.VirtualMachineProfiler):
             tvm_output = m.invoke("main", **data)
         else:
@@ -1464,7 +1463,6 @@ def _benchmark(
         else:
             from tvm.contrib import graph_executor
         
-        print("Benchmark GraphExecutor")
         if self.use_tracker and self.remote == None:
             self._connect_tracker()
 
@@ -1553,7 +1551,7 @@ def _benchmark_vm(
         validator=None
     ):
         from tvm.runtime.vm import VirtualMachine
-        print("Benchmark Virtual Machine")
+        from tvm.runtime import profiler_vm
         if self.use_tracker and self.remote == None:
             self._connect_tracker()