Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,19 @@ tvm_file_glob(GLOB CODEGEN_SRCS

list(APPEND COMPILER_SRCS ${CODEGEN_SRCS})

file(GLOB DMLC_SRC
3rdparty/dmlc-core/src/*.cc
3rdparty/dmlc-core/src/io/filesys.cc
3rdparty/dmlc-core/src/io/indexed_recordio_split.cc
3rdparty/dmlc-core/src/io/input_split_base.cc
3rdparty/dmlc-core/src/io/line_split.cc
3rdparty/dmlc-core/src/io/local_filesys.cc
3rdparty/dmlc-core/src/io/recordio_split.cc
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please point which function requires compilation of this set of files?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

list(APPEND COMPILER_SRCS ${DMLC_SRC})



tvm_file_glob(GLOB_RECURSE RELAY_OP_SRCS
src/relay/op/*.cc
)
Expand Down
14 changes: 14 additions & 0 deletions include/tvm/auto_scheduler/measure.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
#ifndef TVM_AUTO_SCHEDULER_MEASURE_H_
#define TVM_AUTO_SCHEDULER_MEASURE_H_

#include <tvm/runtime/ndarray.h>

#include <tvm/auto_scheduler/loop_state.h>
#include <tvm/auto_scheduler/search_task.h>

Expand Down Expand Up @@ -322,6 +324,10 @@ class ProgramRunnerNode : public Object {
virtual Array<MeasureResult> Run(const Array<MeasureInput>& inputs,
const Array<BuildResult>& build_results, int verbose) = 0;


virtual Array<tvm::runtime::NDArray> GetOutput(const Array<MeasureInput>& inputs,
const Array<BuildResult>& build_results, int verbose) = 0;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Documentation string is required.
GetXXX sounds like a getter, but suppose it's one more version of method Run with reported outputs.


static constexpr const char* _type_key = "auto_scheduler.ProgramRunner";
TVM_DECLARE_BASE_OBJECT_INFO(ProgramRunnerNode, Object);
};
Expand Down Expand Up @@ -373,6 +379,10 @@ class LocalRunnerNode : public ProgramRunnerNode {
Array<MeasureResult> Run(const Array<MeasureInput>& inputs,
const Array<BuildResult>& build_results, int verbose) final;

Array<tvm::runtime::NDArray> GetOutput(const Array<MeasureInput>& inputs,
const Array<BuildResult>& build_results, int verbose) final;


static constexpr const char* _type_key = "auto_scheduler.LocalRunner";
TVM_DECLARE_FINAL_OBJECT_INFO(LocalRunnerNode, ProgramRunnerNode);
};
Expand Down Expand Up @@ -422,6 +432,10 @@ class RPCRunnerNode : public ProgramRunnerNode {
Array<MeasureResult> Run(const Array<MeasureInput>& inputs,
const Array<BuildResult>& build_results, int verbose) final;

virtual Array<tvm::runtime::NDArray> GetOutput(const Array<MeasureInput>& inputs,
const Array<BuildResult>& build_results, int verbose) final;


static constexpr const char* _type_key = "auto_scheduler.RPCRunner";
TVM_DECLARE_FINAL_OBJECT_INFO(RPCRunnerNode, ProgramRunnerNode);
};
Expand Down
17 changes: 16 additions & 1 deletion include/tvm/auto_scheduler/search_task.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,12 @@ class SearchTaskNode : public Object {
LayoutRewriteOption layout_rewrite_option;
/*! \brief Names of some user defined input data used in program measuring. */
Array<String> task_input_names;
/*! \brief keeping custom seed to reproduce randomly generated input values */
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comments should start from capital letter.

int custom_seed;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still wondering about random generator behaviour on different platforms. I guess different std library may produce different random arrays with same seed. I don't think that using seed is acceptable in that case.


/*! \brief ref value for output values */
Array<tvm::runtime::NDArray> ref_output_tensors;


void VisitAttrs(tvm::AttrVisitor* v) {
v->Visit("compute_dag", &compute_dag);
Expand All @@ -135,8 +141,14 @@ class SearchTaskNode : public Object {
v->Visit("hardware_params", &hardware_params);
v->Visit("layout_rewrite_option", &layout_rewrite_option);
v->Visit("task_input_names", &task_input_names);
v->Visit("custom_seed",&custom_seed);
v->Visit("ref_output_tensors",&ref_output_tensors);
}

void SetReferenceTensors(Array<tvm::runtime::NDArray> arr);

void SetTarget(Target target, Target target_host);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need these setters? All this fields are public.


static constexpr const char* _type_key = "auto_scheduler.SearchTask";
TVM_DECLARE_FINAL_OBJECT_INFO(SearchTaskNode, Object);
};
Expand All @@ -160,7 +172,10 @@ class SearchTask : public ObjectRef {
*/
SearchTask(ComputeDAG compute_dag, String workload_key, Target target, Target target_host,
Optional<HardwareParams> hardware_params, LayoutRewriteOption layout_rewrite_option,
Array<String> task_input_names, String desc = "");
Array<String> task_input_names,
String desc = "",
Array<tvm::runtime::NDArray> ref_output_tensors = Array<tvm::runtime::NDArray>(),
int custom_seed = 42);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pls update documentation of method


TVM_DEFINE_OBJECT_REF_METHODS(SearchTask, ObjectRef, SearchTaskNode);
};
Expand Down
159 changes: 156 additions & 3 deletions python/tvm/auto_scheduler/measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from tvm.ir import transform
from tvm.runtime import Object, module, ndarray
from tvm.target import Target
import numpy as np

from . import _ffi_api
from .loop_state import StateObject
Expand Down Expand Up @@ -230,6 +231,7 @@ def recover_measure_input(inp, rebuild_state=False):
hardware_params=task.hardware_params,
layout_rewrite_option=task.layout_rewrite_option,
task_inputs=list(task.task_input_names),
ref_output_tensors=task.ref_output_tensors,
)

if rebuild_state:
Expand Down Expand Up @@ -283,6 +285,10 @@ def run(self, measure_inputs, build_results, verbose=1):
"""
return _ffi_api.ProgramRunnerRun(self, measure_inputs, build_results, verbose)

def get_ouput(self, measure_inputs, build_results, verbose=1):

return _ffi_api.ProgramRunnerGetOutput(self, measure_inputs, build_results, verbose)


@tvm._ffi.register_object("auto_scheduler.ProgramMeasurer")
class ProgramMeasurer(Object):
Expand Down Expand Up @@ -630,7 +636,7 @@ def _local_build_worker(inp_serialized, build_func, verbose):

try:
with transform.PassContext().current():
func = build_module.build(sch, args, target=task.target)
func = build_module.build(sch, args, target=task.target, target_host=task.target_host)
func.export_library(filename, build_func)
# pylint: disable=broad-except
except Exception:
Expand Down Expand Up @@ -920,7 +926,7 @@ def _timed_eval_func(
empty_array = ndarray.empty(
get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev
)
random_fill(empty_array)
random_fill(empty_array, inp.task.custom_seed)
loc_args.append(empty_array)
else:
loc_args.append(ndarray.array(args[idx], dev))
Expand All @@ -944,6 +950,90 @@ def _timed_eval_func(
return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc


def _get_output_func(
inp_serialized,
build_res,
args,
number,
repeat,
min_repeat_ms,
cooldown_interval,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cooldown_interval is not seen as necessary.
it might make sense to use(cooldown_interval_ms, limit_zero_time_iteration, repeats_to_cooldown) for func.time_evaluator

enable_cpu_cache_flush,
verbose,
):

inp = MeasureInput.deserialize(inp_serialized)
tic = time.time()
error_no = 0
error_msg = None
try:
func = module.load_module(build_res.filename)
dev = ndarray.device(str(inp.task.target), 0)

f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else ""
time_f = func.time_evaluator(
func.entry_name,
dev,
number=number,
repeat=repeat,
min_repeat_ms=min_repeat_ms,
f_preproc=f_prepare,
)


# pylint: disable=broad-except
except Exception:
costs = (MAX_FLOAT,)
error_no = MeasureErrorNo.COMPILE_DEVICE
error_msg = make_traceback_info()

result = []
if error_no == 0:
try:
random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True)
assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake"
assert len(args) == len(build_res.args)

loc_args = []

# pylint: disable=consider-using-enumerate
for idx in range(len(args)):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for idx, arg in enumerate(args): will simplify the code a little bit

if args[idx] is None:
build_res_arg = build_res.args[idx]
empty_array = ndarray.empty(
get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev
)
random_fill(empty_array, inp.task.custom_seed)
loc_args.append(empty_array)
else:
loc_args.append(ndarray.array(args[idx], dev))
dev.sync()
costs = time_f(*loc_args).results

idx = len(loc_args) - 1
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idx is unused

arr = ndarray.array(loc_args[len(loc_args) - 1], dev)
result.append(arr.numpy())


# pylint: disable=broad-except
except Exception:
costs = (MAX_FLOAT,)
error_no = MeasureErrorNo.RUNTIME_DEVICE
error_msg = make_traceback_info()

shutil.rmtree(os.path.dirname(build_res.filename))
toc = time.time()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

toc unused

time.sleep(cooldown_interval)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is no loop for which we would do this cooldown


if verbose >= 1:
if error_no == MeasureErrorNo.NO_ERROR:
print("*", end="", flush=True)
else:
print("*E", end="", flush=True) # Run error

return result


@tvm._ffi.register_func("auto_scheduler.local_runner.run")
def local_run(
inputs,
Expand Down Expand Up @@ -1063,6 +1153,58 @@ def local_run(

return measure_results

@tvm._ffi.register_func("auto_scheduler.local_runner.get_output")
def local_get_ouput(
inputs,
build_results,
timeout=10,
number=3,
repeat=1,
min_repeat_ms=0,
cooldown_interval=0,
enable_cpu_cache_flush=False,
verbose=1,
):

measure_results = []
assert len(inputs) == len(build_results), "Measure input size should be equal to build results"
worker = PopenWorker()
for inp, build_res in zip(inputs, build_results):
if build_res.error_no != 0:
res = None
if verbose >= 1:
print("*B", end="", flush=True) # Build error
else:
args = prepare_runner_args(inp, build_res)
res = call_func_with_timeout(
worker,
timeout,
_get_output_func,
args=(
inp.serialize(),
build_res,
args,
number,
repeat,
min_repeat_ms,
cooldown_interval,
enable_cpu_cache_flush,
verbose,
),
)
if isinstance(res, TimeoutError):
if verbose >= 1:
print("*T", end="", flush=True) # Run timeout
elif isinstance(res, Exception):
if verbose >= 1:
print("*E", end="", flush=True) # Run error

measure_results.append(*res)

if verbose >= 1:
print("", flush=True)

return [tvm.nd.array(x) for x in measure_results]

def _rpc_run(
inp_serialized,
Expand Down Expand Up @@ -1129,7 +1271,7 @@ def _rpc_run(
empty_array = ndarray.empty(
get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev
)
random_fill(empty_array)
random_fill(empty_array, inp.task.custom_seed)
loc_args.append(empty_array)
else:
loc_args.append(ndarray.array(args[idx], dev))
Expand All @@ -1139,6 +1281,17 @@ def _rpc_run(
func.entry_func(*loc_args)
dev.sync()

# check vs ref values
arr = ndarray.array(loc_args[len(loc_args) - 1], dev).numpy()
ref = inp.task.ref_output_tensors[0].numpy()
diff = np.abs(arr - ref)
if (diff <= 1e-3).all() == False:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like you tries to implement function np.allclose. Suggest to just reuse it.

Why do you use only absolute threshold? Why threshold is 1e-3? Looks like magic number, should we specify it as one more tuning argument?

print(f'\nAccuracy verification: FAILED\nmaximum element difference: {np.amax(diff)}, l2 diff: {np.linalg.norm(diff)}')
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

redundant printf. Please use logger.info or logger.warning instead.

raise ValueError("Accuracy verification: FAILED ")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

raising of exception here will lead to status MeasureErrorNo.RUNTIME_DEVICE. I guess MeasureErrorNo.WRONG_ANSWER is more correct.

else:
print(f'\nAccuracy verification: PASSED\nmaximum element difference: {np.amax(diff)}, l2 diff: {np.linalg.norm(diff)}')


costs = time_f(*loc_args).results

# clean up remote files
Expand Down
6 changes: 6 additions & 0 deletions python/tvm/auto_scheduler/relay_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ def extract_tasks(
# create search tasks
tasks = []
weights = []

#faked ref to extract task
#TODO: without it, initial serialization crashes... how to solve it more elegantly?
ref = [tvm.nd.empty((1, 1))]
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very suspicious comment. Looks like constructor of SearchTask object with default arguments produces invalid object. I guess it should be clarified and improved.

Looks like None value in field of type Array<NDArray> leads to wrong searialization.


for wkl_key, (weight, func_names) in env.wkl_key_to_weight.items():
tasks.append(
SearchTask(
Expand All @@ -160,6 +165,7 @@ def extract_tasks(
),
task_inputs_save_to_file=True,
desc=",".join(func_names),
ref_output_tensors=ref,
)
)
weights.append(int(weight))
Expand Down
6 changes: 6 additions & 0 deletions python/tvm/auto_scheduler/search_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,8 @@ def __init__(
task_inputs_overwrite=False,
task_inputs_save_to_file=False,
desc="",
ref_output_tensors=None,
custom_seed = 42,
):
assert (
func is not None or workload_key is not None
Expand Down Expand Up @@ -479,6 +481,8 @@ def __init__(
layout_rewrite_option,
task_input_names,
desc,
ref_output_tensors,
custom_seed,
)

def tune(self, tuning_options, search_policy=None, adaptive_training=False):
Expand Down Expand Up @@ -598,6 +602,8 @@ def __setstate__(self, state):
state["layout_rewrite_option"],
state["task_input_names"],
state["desc"],
state["ref_ouput_tensors"],
state["custom_seed"],
)


Expand Down
Loading