From 96b6f3411a36b2f990b1fb161092b592ed702ac8 Mon Sep 17 00:00:00 2001 From: Jackson Woodruff Date: Wed, 11 May 2022 16:48:18 -0700 Subject: [PATCH 1/2] Add initial cgra compiler --- compiler_gym/datasets/datasets.py | 1 + compiler_gym/envs/BUILD | 1 + compiler_gym/envs/CMakeLists.txt | 1 + compiler_gym/envs/__init__.py | 2 + compiler_gym/envs/cgra/.gitignore | 4 + compiler_gym/envs/cgra/BUILD | 26 + compiler_gym/envs/cgra/CMakeLists.txt | 25 + compiler_gym/envs/cgra/DFG.py | 206 +++++ compiler_gym/envs/cgra/Driver.py | 98 +++ compiler_gym/envs/cgra/Model.py | 2 + compiler_gym/envs/cgra/Operations.py | 54 ++ compiler_gym/envs/cgra/__init__.py | 34 + compiler_gym/envs/cgra/cgra_rewards.py | 66 ++ compiler_gym/envs/cgra/compiler_gym_test.py | 30 + compiler_gym/envs/cgra/datasets/BUILD | 15 + compiler_gym/envs/cgra/datasets/__init__.py | 45 + compiler_gym/envs/cgra/datasets/dfg_bench.py | 84 ++ compiler_gym/envs/cgra/service/BUILD | 17 + compiler_gym/envs/cgra/service/CMakeLists.txt | 14 + compiler_gym/envs/cgra/service/cgra_env.py | 53 ++ .../envs/cgra/service/cgra_service.py | 814 ++++++++++++++++++ .../cgra/service/compiler_gym-cgra-service | 15 + ...mpiler_gym-relative-placement-cgra-service | 13 + .../envs/cgra/service/relative_cgra_env.py | 39 + .../service/relative_placement_service.py | 198 +++++ compiler_gym/envs/cgra/test/test_dfg.json | 54 ++ examples/cgra/.gitignore | 2 + examples/cgra/ga.py | 332 +++++++ examples/cgra/ga_scripts/.gitignore | 2 + .../cgra/ga_scripts/ga_score_extractor.sh | 19 + examples/cgra/ga_scripts/plot_CDFs.py | 67 ++ examples/cgra/relative_placement_model.py | 116 +++ examples/cgra/run_ga_relative_placement.sh | 6 + examples/random_walk.py | 26 +- setup.py | 5 + 35 files changed, 2477 insertions(+), 9 deletions(-) create mode 100644 compiler_gym/envs/cgra/.gitignore create mode 100644 compiler_gym/envs/cgra/BUILD create mode 100644 compiler_gym/envs/cgra/CMakeLists.txt create mode 100644 compiler_gym/envs/cgra/DFG.py create mode 100644 compiler_gym/envs/cgra/Driver.py create mode 100644 compiler_gym/envs/cgra/Model.py create mode 100644 compiler_gym/envs/cgra/Operations.py create mode 100644 compiler_gym/envs/cgra/__init__.py create mode 100644 compiler_gym/envs/cgra/cgra_rewards.py create mode 100644 compiler_gym/envs/cgra/compiler_gym_test.py create mode 100644 compiler_gym/envs/cgra/datasets/BUILD create mode 100644 compiler_gym/envs/cgra/datasets/__init__.py create mode 100644 compiler_gym/envs/cgra/datasets/dfg_bench.py create mode 100644 compiler_gym/envs/cgra/service/BUILD create mode 100644 compiler_gym/envs/cgra/service/CMakeLists.txt create mode 100644 compiler_gym/envs/cgra/service/cgra_env.py create mode 100644 compiler_gym/envs/cgra/service/cgra_service.py create mode 100755 compiler_gym/envs/cgra/service/compiler_gym-cgra-service create mode 100755 compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service create mode 100644 compiler_gym/envs/cgra/service/relative_cgra_env.py create mode 100644 compiler_gym/envs/cgra/service/relative_placement_service.py create mode 100644 compiler_gym/envs/cgra/test/test_dfg.json create mode 100644 examples/cgra/.gitignore create mode 100644 examples/cgra/ga.py create mode 100644 examples/cgra/ga_scripts/.gitignore create mode 100755 examples/cgra/ga_scripts/ga_score_extractor.sh create mode 100644 examples/cgra/ga_scripts/plot_CDFs.py create mode 100644 examples/cgra/relative_placement_model.py create mode 100755 examples/cgra/run_ga_relative_placement.sh diff --git a/compiler_gym/datasets/datasets.py b/compiler_gym/datasets/datasets.py index 05ecf0e45..1cbb92e44 100644 --- a/compiler_gym/datasets/datasets.py +++ b/compiler_gym/datasets/datasets.py @@ -152,6 +152,7 @@ def dataset_from_parsed_uri(self, uri: BenchmarkUri) -> Dataset: key = self._dataset_key_from_uri(uri) if key not in self._datasets: + print("datasets are ", str(self._datasets)) raise LookupError(f"Dataset not found: {key}") return self._datasets[key] diff --git a/compiler_gym/envs/BUILD b/compiler_gym/envs/BUILD index 6d41958eb..707a36399 100644 --- a/compiler_gym/envs/BUILD +++ b/compiler_gym/envs/BUILD @@ -11,6 +11,7 @@ py_library( deps = [ ":compiler_env", "//compiler_gym:config", + "//compiler_gym/envs/cgra", "//compiler_gym/envs/gcc", "//compiler_gym/envs/llvm", "//compiler_gym/envs/loop_tool", diff --git a/compiler_gym/envs/CMakeLists.txt b/compiler_gym/envs/CMakeLists.txt index 79a115bf1..85acbb201 100644 --- a/compiler_gym/envs/CMakeLists.txt +++ b/compiler_gym/envs/CMakeLists.txt @@ -8,6 +8,7 @@ cg_add_all_subdirs() set(ENVS_DEPS ::compiler_env compiler_gym::envs::gcc::gcc + compiler_gym::envs::cgra::cgra compiler_gym::envs::loop_tool::loop_tool ) if(COMPILER_GYM_ENABLE_LLVM_ENV) diff --git a/compiler_gym/envs/__init__.py b/compiler_gym/envs/__init__.py index f8b8829df..4a85057a0 100644 --- a/compiler_gym/envs/__init__.py +++ b/compiler_gym/envs/__init__.py @@ -5,6 +5,7 @@ from compiler_gym import config from compiler_gym.envs.compiler_env import CompilerEnv from compiler_gym.envs.gcc import GccEnv +from compiler_gym.envs.cgra import CgraEnv if config.enable_llvm_env: from compiler_gym.envs.llvm.llvm_env import LlvmEnv # noqa: F401 @@ -18,6 +19,7 @@ "COMPILER_GYM_ENVS", "CompilerEnv", "GccEnv", + "CgraEnv", "LoopToolEnv", ] diff --git a/compiler_gym/envs/cgra/.gitignore b/compiler_gym/envs/cgra/.gitignore new file mode 100644 index 000000000..11e1f8d09 --- /dev/null +++ b/compiler_gym/envs/cgra/.gitignore @@ -0,0 +1,4 @@ +__pycache__ +env.sh +cart-pole.py +env diff --git a/compiler_gym/envs/cgra/BUILD b/compiler_gym/envs/cgra/BUILD new file mode 100644 index 000000000..b545e4e8a --- /dev/null +++ b/compiler_gym/envs/cgra/BUILD @@ -0,0 +1,26 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +load("@rules_python//python:defs.bzl", "py_library", "py_test") + +py_library( + name = "cgra", + srcs = [ + "__init__.py", + "cgra_rewards.py", + "Operations.py", + "DFG.py" + ], + data = [ + "//compiler_gym/envs/cgra/service", + ], + visibility = ["//visibility:public"], + deps = [ + "//compiler_gym/envs/cgra/datasets", + "//compiler_gym/errors", + "//compiler_gym/service:client_service_compiler_env", + "//compiler_gym/service/runtime", # Implicit dependency of service. + "//compiler_gym/util" + ], +) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/CMakeLists.txt b/compiler_gym/envs/cgra/CMakeLists.txt new file mode 100644 index 000000000..ecac8500f --- /dev/null +++ b/compiler_gym/envs/cgra/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +cg_add_all_subdirs() + +cg_py_library( + NAME + gcc + SRCS + "__init__.py" + "gcc.py" + "gcc_env.py" + "gcc_rewards.py" + DATA + compiler_gym::envs::gcc::service::service + DEPS + compiler_gym::service::client_service_compiler_env + compiler_gym::envs::gcc::datasets::datasets + compiler_gym::errors::errors + compiler_gym::service::runtime::runtime + compiler_gym::util::util + PUBLIC +) diff --git a/compiler_gym/envs/cgra/DFG.py b/compiler_gym/envs/cgra/DFG.py new file mode 100644 index 000000000..a03208296 --- /dev/null +++ b/compiler_gym/envs/cgra/DFG.py @@ -0,0 +1,206 @@ +import json +from pathlib import Path +import random + +from importlib_metadata import entry_points +from compiler_gym.service.proto import ( +Benchmark +) +from typing import Optional, List +from compiler_gym.third_party.inst2vec import Inst2vecEncoder +import compiler_gym.third_party.llvm as llvm +from compiler_gym.envs.cgra.Operations import Operation, operation_from_name + +class Edge(object): + def __init__(self, type): + self.type = type + +class Node(object): + def __init__(self, name, operation): + self.name = name + self.operation = operation + + def __str__(self): + return "Node with name " + self.name + " and op " + str(self.operation) + +class DFG(object): + def __init__(self, working_directory: Optional[Path] = None, benchmark: Optional[Benchmark] = None, from_json: Optional[Path] = None, from_text: Optional[str] = None): + # Copied from here: https://github.com/facebookresearch/CompilerGym/blob/development/examples/loop_optimizations_service/service_py/loops_opt_service.py + # self.inst2vec = _INST2VEC_ENCODER + + if from_json is not None: + self.load_dfg_from_json(from_json) + elif from_text is not None: + self.load_dfg_from_text(from_text) + elif benchmark is not None: + # Only re-create the JSON file if we aren't providing an existing one. + # The existing ones are mostly a debugging functionality. + with open(self.working_directory / "benchmark.c", "wb") as f: + f.write(benchmark.program.contents) + + # We use CGRA-Mapper to produce a DFG in JSON. + run_command( + ["cgra-mapper", self.src_path, self.dfg_path] + ) + + # Now, load in the DFG. + self.load_dfg_from_json(self.dfg_path) + + def __str__(self): + res = "nodes are: " + str(self.nodes) + " and edges are " + str(self.adj) + return res + + def load_dfg_from_json(self, path): + import json + with open(path, 'r') as p: + # This isnt' text, but I think the json.loads + # that this calls just works? + self.load_dfg_from_text(p) + + def load_dfg_from_text(self, text): + import json + f = json.loads(text) + self.nodes = {} + self.node_names = [] + self.edges = [] + self.adj = {} + self.entry_points = f['entry_points'] + + # build the nodes first. + for node in f['nodes']: + self.nodes[node['name']] = (Node(node['name'], operation_from_name(node['operation']))) + self.adj[node['name']] = [] + self.node_names.append(node['name']) + + for edge in f['edges']: + self.edges.append(Edge(edge['type'])) + + # Build the adj matrix: + for edge in f['edges']: + fnode = edge['from'] + tnode = edge['to'] + + self.adj[fnode].append(tnode) + + # Bit slow this one --- the adjacency matrix is backwards for it :'( + def get_preds(self, node): + preds = [] + for n in self.adj: + if node.name in self.adj[n]: + preds.append(self.nodes[n]) + + return preds + + def get_succs(self, node): + succs = [] + for n in self.adj[node.name]: + succs.append(self.nodes[n]) + return succs + + # TODO -- fix this, because for a graph with multiple entry nodes, + # this doesn't actually give the right answer :) + # (should do in most cases) + def bfs(self): + to_explore = self.entry_points[:] + print ("Doing BFS, entry points are ") + print(self.entry_points) + seen = set() + + while len(to_explore) > 0: + head = to_explore[0] + to_explore = to_explore[1:] + if head in seen: + continue + seen.add(head) + yield self.nodes[head] + + # Get the following nodes. + following_nodes = self.adj[head] + to_explore += following_nodes + +# Generate a test DFG using the operations in +# 'operations'. +def generate_DFG(operations: List[Operation], size, seed=0): + random.seed(seed) + # Start with some 0-input ops: + start_ops = random.randint(1, min(size, 3)) + + # Jump-start this --- in reality, these can be + # phi nodes coming from previous tiers of the loop, + # or variables coming from outside the loop. + start_options = [] + print("Generating DFG with ", start_ops, " starting nodes") + for op in operations: + if op.inputs == 0: + start_options.append(op) + + node_number = 0 + edge_number = 0 + + entry_points = [] + nodes = {} + node_names = [] + nodes_list = [] + edges = [] + adj = {} + + # Keep track of variables that we should probably use somewhere. + unused_outputs = [] + for i in range(start_ops): + name = "node" + str(node_number) + node_names.append(name) + n = Node(name, random.choice(start_options)) + node_number += 1 + + nodes[name] = n + nodes_list.append(n) + entry_points.append(name) + unused_outputs.append(n) + adj[name] = [] + + while len(nodes) < size: + # Generate a new node. + operation = random.choice(operations) + name = "node" + str(node_number) + node_names.append(name) + node_number += 1 + + # Get inputs for this: + inputs = [] + while len(inputs) < operation.inputs: + # Select random nodes: baised towards the unused ones. + if random.randint(0, 10) > 6 and len(unused_outputs) > 0: + inputs.append(unused_outputs[0]) + unused_outputs = unused_outputs[1:] + else: + inputs.append(random.choice(nodes_list)) + # If the node has no arguments, then we should add it + # as an entry point. --- todo --- should we just skip + # this avoid creating graphs with too many constant loads? + if operation.inputs == 0: + entry_points.append(name) + + # now create the edges. + for inp in inputs: + edge = Edge('data') + # Not too sure why this doens't have the start/end points. + # Think it's a dead datafield. + edges.append(edge) + + adj[inp.name].append(name) + + this_node = Node(name, operation) + nodes[name] = this_node + nodes_list.append(this_node) + unused_outputs.append(this_node) + adj[name] = [] + + res = DFG() + res.adj = adj + res.nodes = nodes + res.entry_points = entry_points + res.edges = edges + res.node_names = node_names + print(res.nodes) + + return res \ No newline at end of file diff --git a/compiler_gym/envs/cgra/Driver.py b/compiler_gym/envs/cgra/Driver.py new file mode 100644 index 000000000..95374938c --- /dev/null +++ b/compiler_gym/envs/cgra/Driver.py @@ -0,0 +1,98 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""This script demonstrates how the Python example service without needing +to use the bazel build system. Usage: + + $ python example_compiler_gym_service/demo_without_bazel.py + +It is equivalent in behavior to the demo.py script in this directory. +""" +import logging +from pathlib import Path +from typing import Iterable + +import CGRA + +import gym + +from compiler_gym.datasets import Benchmark, Dataset +from compiler_gym.datasets.uri import BenchmarkUri +from compiler_gym.spaces import Reward +from compiler_gym.util.logging import init_logging +from compiler_gym.util.registration import register + +EXAMPLE_PY_SERVICE_BINARY: Path = Path( + "CompileCGRA.py" +) +assert EXAMPLE_PY_SERVICE_BINARY.is_file(), "Service script not found" + + +class ExampleDataset(Dataset): + def __init__(self, *args, **kwargs): + super().__init__( + name="benchmark://example-v0", + license="MIT", + description="An example dataset", + ) + self._benchmarks = { + "/foo": Benchmark.from_file_contents( + "benchmark://example-v0/foo", "Ir data".encode("utf-8") + ), + "/bar": Benchmark.from_file_contents( + "benchmark://example-v0/bar", "Ir data".encode("utf-8") + ), + } + + def benchmark_uris(self) -> Iterable[str]: + yield from (f"benchmark://example-v0{k}" for k in self._benchmarks.keys()) + + def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: + if uri.path in self._benchmarks: + return self._benchmarks[uri.path] + else: + raise LookupError("Unknown program name") + + +# Register the environment for use with gym.make(...). +register( + id="example-v0", + entry_point="compiler_gym.envs:CompilerEnv", + kwargs={ + "service": EXAMPLE_PY_SERVICE_BINARY, + "rewards": [RuntimeReward()], + "datasets": [ExampleDataset()], + }, +) + +def main(): + # Use debug verbosity to print out extra logging information. + init_logging(level=logging.DEBUG) + + # Create the environment using the regular gym.make(...) interface. + iteration = 0 + with gym.make("example-v0") as env: + env.reset() + done = False + while not done: + # Not 100% sure why this needs to go in an array, but it seems + # to complain about dimensionality errors due to an issue + # in compiler_gym --- maybe once there is a + action = env.action_space.sample() + print("Starting Iteration " + str(iteration)) + print ("Action is:") + print(action) + observation, reward, done, info = env.step(action, observation_spaces=["ir", "CurrentInstruction", "CurrentInstructionIndex", "II"], reward_spaces=["II"]) + print ("Got observation") + print (observation) + print ("Got reward") + print (reward) + if done: + env.reset() + print ("Overall reward is ", reward) + iteration += 1 + + +if __name__ == "__main__": + main() diff --git a/compiler_gym/envs/cgra/Model.py b/compiler_gym/envs/cgra/Model.py new file mode 100644 index 000000000..ca262325d --- /dev/null +++ b/compiler_gym/envs/cgra/Model.py @@ -0,0 +1,2 @@ +class ScheduleModel(object): + def \ No newline at end of file diff --git a/compiler_gym/envs/cgra/Operations.py b/compiler_gym/envs/cgra/Operations.py new file mode 100644 index 000000000..c4de467bd --- /dev/null +++ b/compiler_gym/envs/cgra/Operations.py @@ -0,0 +1,54 @@ + +class Operation(object): + def __init__(self, name, inputs, outputs, latency): + self.name = name + self.inputs = inputs + self.outputs = outputs + self.latency = latency + + def __str__(self): + return self.name + +Operations = [ + # TODO --- should we support more operations as heterogeneous? + # IMO most of the other things that are scheduled are + # pretty vacuous, although we could explore supporting those. + # Operation is: name, inputs, outputs, cycles. + Operation("add", 2, 1, 1), + Operation("mul", 2, 1, 1), + Operation("sub", 2, 1, 1), + Operation("div", 2, 1, 1), + Operation("and", 2, 1, 1), + Operation("or", 2, 1, 1), + Operation("xor", 2, 1, 1), + Operation("fmul", 2, 1, 1), + Operation("fsub", 2, 1, 1), + Operation("fadd", 2, 1, 1), + Operation("fdiv", 2, 1, 1), + Operation("rsh", 2, 1, 1), + Operation("lsh", 2, 1, 1), + Operation("load", 1, 1, 1), + Operation("store", 1, 1, 1), + Operation("const", 0, 1, 1), + Operation("noop", 0, 0, 1), +] + +def operation_index_of(op): + ind = 0 + for e in Operations: + if e.name == op.name: + return ind + else: + print (e.name + " uneq " + str(op)) + ind += 1 + return -1 + +def operation_latency(op): + # TODO --- model latency --- or at least expost this + # to a configuration. + return op.latency + + +def operation_from_name(n): + ind = operation_index_of(n) + return Operations[ind] \ No newline at end of file diff --git a/compiler_gym/envs/cgra/__init__.py b/compiler_gym/envs/cgra/__init__.py new file mode 100644 index 000000000..79cbcba6b --- /dev/null +++ b/compiler_gym/envs/cgra/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""This module demonstrates how to """ +from pathlib import Path + +from compiler_gym.envs.cgra.DFG import DFG +from compiler_gym.envs.cgra.service.cgra_service import Schedule, CGRA +from compiler_gym.envs.cgra.service.cgra_env import CgraEnv +from compiler_gym.envs.cgra.service.relative_cgra_env import RelativeCgraEnv +from compiler_gym.util.registration import register +from compiler_gym.util.runfiles_path import runfiles_path + +CGRA_SERVICE_BINARY: Path = runfiles_path( + "compiler_gym/envs/cgra/service/compiler_gym-cgra-service" +) +RELATIVE_CGRA_SERVICE_BINARY: Path = runfiles_path( + "compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service" +) + +register( + id="relative-cgra-v0", + entry_point="compiler_gym.envs.cgra:RelativeCgraEnv", + kwargs={ "service": RELATIVE_CGRA_SERVICE_BINARY }, +) + +register( + id="cgra-v0", + entry_point="compiler_gym.envs.cgra:CgraEnv", + kwargs={"service": CGRA_SERVICE_BINARY}, +) + +__all__ = ["CgraEnv", "DFG", "CGRA", "Schedule", "RelativeCgraEnv"] diff --git a/compiler_gym/envs/cgra/cgra_rewards.py b/compiler_gym/envs/cgra/cgra_rewards.py new file mode 100644 index 000000000..3e3be6781 --- /dev/null +++ b/compiler_gym/envs/cgra/cgra_rewards.py @@ -0,0 +1,66 @@ +from compiler_gym.spaces import Reward + +class IntermediateIIReward(Reward): + """An example reward that uses changes in the "runtime" observation value + to compute incremental reward. + """ + + def __init__(self): + super().__init__( + name="II", + observation_spaces=["II"], + default_value=0, + default_negates_returns=True, + deterministic=True, + platform_dependent=True, + ) + pass + + def reset(self, benchmark: str, observation_view): + del benchmark # unused + + def update(self, action, observations, observation_view): + del action + del observation_view + + print("Computing Reward: got II of ", observations[0]) + if observations[0] is None: + # If we just failed to generate a valid schedule all together, + # return a punishment. Not 100% sure what this punishment should + # be though. + return -1.0 + # Add a constant negative reward for not figuring it out? + return -float(observations[0]) - 0.1 + + +""" +For algorithms where a 'right' answer is quick to arrive at, +the intermediate rewards are less important. +""" +class FinalIIReward(Reward): + def __init__(self): + super().__init__( + name='II', + observation_spaces=['II', 'Done'], + default_value=0, + default_negates_returns=True, + deterministic=True, + platform_dependent=True + ) + + def reset(self, benchmark: str, observation_view): + del benchmark + + def update(self, action, observations, observation_view): + del action + del observation_view + + print ("Computing Reward: get II of ", observations[0]) + print ("Got finished: ", observations[1]) + + if observations[0] is None: + return -0.1 + if observations[1]: + return -float(observations[0]) - 0.1 + else: + return -0.1 \ No newline at end of file diff --git a/compiler_gym/envs/cgra/compiler_gym_test.py b/compiler_gym/envs/cgra/compiler_gym_test.py new file mode 100644 index 000000000..0cefdd412 --- /dev/null +++ b/compiler_gym/envs/cgra/compiler_gym_test.py @@ -0,0 +1,30 @@ +import compiler_gym + +from compiler_gym.service.proto import ( + ActionSpace, + Benchmark, + DoubleRange, + Event, + Int64Box, + Int64Tensor, + NamedDiscreteSpace, + ObservationSpace, + Space, + StringSpace +) + +env.reset(benchmark="benchmark://npb-v0/50") +episode_reward = 0 + +while len(nodes_to_schedule) > 0: + + observation, reward, done, info = env.step(env.action_space.sample()) + if done: + break + + episode_reward += reward + + print(f"Ste {i}, quality={episode_reward:.2%}") + +with compiler_gym.make("llvm-autophase-ic-v0") as env: + env.reset() \ No newline at end of file diff --git a/compiler_gym/envs/cgra/datasets/BUILD b/compiler_gym/envs/cgra/datasets/BUILD new file mode 100644 index 000000000..5f355c61b --- /dev/null +++ b/compiler_gym/envs/cgra/datasets/BUILD @@ -0,0 +1,15 @@ +load("@rules_python//python:defs.bzl", "py_library") + +py_library( + name = "datasets", + srcs = [ + "__init__.py", + "dfg_bench.py" + ], + visibility = ["//visibility:public"], + deps = [ + "//compiler_gym/datasets", + "//compiler_gym/service/proto", + "//compiler_gym/util", + ] +) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/datasets/__init__.py b/compiler_gym/envs/cgra/datasets/__init__.py new file mode 100644 index 000000000..40992c67d --- /dev/null +++ b/compiler_gym/envs/cgra/datasets/__init__.py @@ -0,0 +1,45 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from functools import lru_cache +from pathlib import Path +from typing import Iterable, List, Optional, Union + +from compiler_gym.datasets import Dataset +from compiler_gym.envs.cgra.datasets.dfg_bench import GeneratedDFGs, GeneratedDFGs10, GeneratedDFGs15, GeneratedDFGs20, GeneratedDFGs5 +from compiler_gym.util.runfiles_path import site_data_path + + +def _get_cgra_datasets( + site_data_base: Optional[Path] = None +) -> Iterable[Dataset]: + site_data_base = site_data_base or site_data_path("cgra-v0") + + yield GeneratedDFGs5(site_data_base=site_data_base) + yield GeneratedDFGs10(site_data_base=site_data_base) + yield GeneratedDFGs15(site_data_base=site_data_base) + yield GeneratedDFGs20(site_data_base=site_data_base) + + +@lru_cache(maxsize=16) +def get_cgra_datasets( + site_data_base: Optional[Path] = None +) -> List[Dataset]: + """Instantiate the builtin cgra datasets. + + :param site_data_base: The root of the site data path. + + :return: An iterable sequence of :class:`Dataset + ` instances. + """ + return list(_get_cgra_datasets(site_data_base)) + + +__all__ = [ + "GeneratedDFGs5", + "GeneratedDFGs10", + "GeneratedDFGs15", + "GeneratedDFGs20", + "get_cgra_datasets", +] diff --git a/compiler_gym/envs/cgra/datasets/dfg_bench.py b/compiler_gym/envs/cgra/datasets/dfg_bench.py new file mode 100644 index 000000000..13749f53c --- /dev/null +++ b/compiler_gym/envs/cgra/datasets/dfg_bench.py @@ -0,0 +1,84 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import enum +import io +import logging +import os +import re +import shutil +import subprocess +import sys +import tarfile +import tempfile +import numpy as np +from collections import defaultdict +from pathlib import Path +from threading import Lock +from typing import Callable, Dict, Iterable, List, NamedTuple, Optional + +import fasteners +from compiler_gym.datasets.dataset import Dataset + +from compiler_gym.datasets import Benchmark, TarDatasetWithManifest +from compiler_gym.datasets.benchmark import ValidationCallback +from compiler_gym.datasets.uri import BenchmarkUri +from compiler_gym.envs.llvm import llvm_benchmark +from compiler_gym.errors import ValidationError +from compiler_gym.service.proto import BenchmarkDynamicConfig, Command +from compiler_gym.third_party import llvm +from compiler_gym.util.commands import Popen +from compiler_gym.util.download import download +from compiler_gym.util.runfiles_path import cache_path, site_data_path +from compiler_gym.util.timer import Timer + +from compiler_gym.envs.cgra.Operations import Operations +from compiler_gym.envs.cgra.DFG import generate_DFG +import pickle + +class GeneratedDFGs(Dataset): + def __init__(self, size: int, site_data_base=None): + super().__init__( + "benchmark://dfg_" + str(size), + "A dataset of automatically generated DFGs of a particular size.", + "None", + site_data_base=site_data_base + ) + + self.dfg_size = size + + def benchmark_uris_without_index(self): + return "benchmark://dfg_" + str(self.dfg_size) + "/" + + def benchmark_uris(self) -> Iterable[str]: + ind = 0 + while True: + yield (self.benchmark_uris_without_index() + str(ind)) + ind += 1 + + def benchmark_from_index(self, dfg_index, uri): + dfg = generate_DFG(Operations, self.dfg_size, seed=dfg_index) + + return Benchmark.from_file_contents(uri=uri, data=pickle.dumps(dfg)) + + def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: + dfg_index = int(uri.path[1:]) + return self.benchmark_from_index(dfg_index, uri) + + def _random_benchmark(self, random_state: np.random.Generator) -> Benchmark: + index = random_state.randomint(10000000000) + return self.benchmark_from_index(index, self.benchmark_uris_without_index() + str(index)) + +class GeneratedDFGs5(GeneratedDFGs): + def __init__(self, site_data_base=None): + super().__init__(5, site_data_base) +class GeneratedDFGs10(GeneratedDFGs): + def __init__(self, site_data_base=None): + super().__init__(10, site_data_base) +class GeneratedDFGs15(GeneratedDFGs): + def __init__(self, site_data_base=None): + super().__init__(15, site_data_base) +class GeneratedDFGs20(GeneratedDFGs): + def __init__(self, site_data_base=None): + super().__init__(20, site_data_base) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/BUILD b/compiler_gym/envs/cgra/service/BUILD new file mode 100644 index 000000000..d7991242d --- /dev/null +++ b/compiler_gym/envs/cgra/service/BUILD @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +filegroup( + name = "service", + srcs = [ + "compiler_gym-cgra-service", + "compiler_gym-relative-placement-cgra-service", + "cgra_service.py", + "cgra_env.py", + "relative_cgra_env.py", + "relative_placement_service.py", + ], + visibility = ["//visibility:public"], +) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/CMakeLists.txt b/compiler_gym/envs/cgra/service/CMakeLists.txt new file mode 100644 index 000000000..3537a51e3 --- /dev/null +++ b/compiler_gym/envs/cgra/service/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +cg_add_all_subdirs() + +cg_filegroup( + NAME "service" + FILES + "${CMAKE_CURRENT_LIST_DIR}/cgra_service.py" + "${CMAKE_CURRENT_LIST_DIR}/compiler_gym-cgra-service" + "${CMAKE_CURRENT_LIST_DIR}/compiler_gym-cgra-relative-placement-service" +) diff --git a/compiler_gym/envs/cgra/service/cgra_env.py b/compiler_gym/envs/cgra/service/cgra_env.py new file mode 100644 index 000000000..0dcf578db --- /dev/null +++ b/compiler_gym/envs/cgra/service/cgra_env.py @@ -0,0 +1,53 @@ +import os +import shutil +from pathlib import Path +from typing import Iterable, List, Optional, Union, cast +from compiler_gym.util.gym_type_hints import ObservationType, OptionalArgumentValue + +import numpy as np +from compiler_gym.util.runfiles_path import site_data_path + +from compiler_gym.datasets import Benchmark, Dataset +from compiler_gym.envs.cgra.datasets import get_cgra_datasets + +from compiler_gym.errors import BenchmarkInitError +from compiler_gym.service.client_service_compiler_env import ClientServiceCompilerEnv +from compiler_gym.spaces import Box, Commandline +from compiler_gym.spaces import Dict as DictSpace +from compiler_gym.spaces import Scalar, Sequence + +from compiler_gym.envs.cgra.cgra_rewards import IntermediateIIReward + +class CgraEnv(ClientServiceCompilerEnv): + def __init__(self, *args, datasets_site_path: Optional[Path] = None, benchmark: Optional[Union[str, Benchmark]] = None, datasets_set_path: Optional[Path] = None, **kwargs): + super().__init__( + *args, + **kwargs, + benchmark = benchmark or "dfg_10/1", + datasets=get_cgra_datasets(site_data_base=datasets_site_path), + rewards=[IntermediateIIReward()] + , + derived_observation_spaces=[ + # { + # "id": "CurrentOperation", + # "base_id": + # } + ] + ) + + def reset(self, reward_space = OptionalArgumentValue.UNCHANGED, *args, **kwargs): + observation = super().reset(reward_space=reward_space, *args, **kwargs) + + return observation + + def make_benchmark( + self, inputs, copt, system_include: bool = True, timeout: int=600 + ): + # TOOD + return None + + def render(self, mode="human"): + if mode == "human": + print("human-visible schedule") + else: + return self.render(mode) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/cgra_service.py b/compiler_gym/envs/cgra/service/cgra_service.py new file mode 100644 index 000000000..f2231ea0b --- /dev/null +++ b/compiler_gym/envs/cgra/service/cgra_service.py @@ -0,0 +1,814 @@ +import logging +from re import I +import pickle + +from typing import Optional, Tuple, List, Dict, Set, Union +from pathlib import Path +from compiler_gym.views import ObservationSpaceSpec +from compiler_gym.spaces import Reward +from compiler_gym.envs.llvm.llvm_rewards import CostFunctionReward +from compiler_gym.service.client_service_compiler_env import ClientServiceCompilerEnv +from compiler_gym.util.gym_type_hints import ObservationType, OptionalArgumentValue +from compiler_gym.service import CompilationSession +from compiler_gym.util.commands import run_command +from compiler_gym.service.proto import ( +ActionSpace, +Benchmark, +DoubleRange, +Event, +Int64Box, +Int64Range, +Int64Tensor, +NamedDiscreteSpace, +ObservationSpace, +Space, +StringSpace +) +import compiler_gym.third_party.llvm as llvm +from compiler_gym.third_party.inst2vec import Inst2vecEncoder +from compiler_gym.envs.cgra.DFG import DFG, Node, Edge, generate_DFG +from compiler_gym.envs.cgra.Operations import * + +from compiler_gym.service.proto.compiler_gym_service_pb2 import Int64SequenceSpace +#from compiler_gym.service.runtime import create_and_run_compiler_gym_service + +CompileSettings = { + # When this is set, the scheduler will take schedules + # that don't account for delays appropriately, and try + # to stretch them out to account for delays correctly. + # When this is false, the compiler will just reject + # such invalid schedules. + # (when set, something like x + (y * z), scheduled + # as +: PE0 on cycle 0, *: PE1 on cycle 0 is valid). + "IntroduceRequiredDelays": False +} + +def load_CGRA(self, file): + # TODO -- properly load CGRA + return CGRA(5, 5) + +def load_NOC(self, file): + # TODO -- properly load NOC. + + # Initialize to a straight-line NOC + return NOC([(x, x + 1) for x in range(5)]) + +class CGRA(object): + # Assume a rectangular matrix with neighbour + # connections. We can handle the rest later. + def __init__(self, nodes, noc): + self.nodes = nodes + self.noc = noc + self.dim = len(self.nodes) + + def __str__(self): + return "CGRA: (" + (str(self.nodes)) + " nodes)" + + def is_supported(node_index, op): + # TODO -- support heterogeneity + return True + + def cells_as_list(self): + return self.nodes[:] + + def get_neighbour(self, direction, location_from): + return self.noc.get_neighbour(direction, location_from) + +# This is just a wrapper around an actual object that +# is a schedule --- see the Schedule object for something +# that can be interacted with. +class InternalSchedule(object): + def __init__(self, cgra, dfg): + self.dfg = dfg # For tensorization. + self.cgra = cgra + self.operations = self.initialize_schedule() + + # Returns a fixed-length tensor for this schedule. + # It focuses on the last few cycles. + def to_rlmap_tensor(self, node, time_window_size=1): + # Build up a tensor of timesxcgra.dimxcgra.dim as per + # RLMap paper. + # Note that they don't use a times dimension, as their + # PEs are fixed within a single schedule. + # We want to fcous the results ardoung the operation + # that we are looking at. + time_window, _ = self.get_location(node) + # Aim is to be symmetric around the central time window. + start_time = time_window - (time_window_size // 2) + end_time = time_window + ((time_window_size - 1) // 2) + + result_tensor = [] + for t in range(start_time, end_time + 1): + if t < 0: + # If this is a time before the start of the schedule, just + # add some zeroes. + result_tensor += ([0] * ((self.cgra.dim + 1) * (self.cgra.dim + 1))) + continue + if t >= len(self.operations): + # Likewise if we are past the end fo the current schedule + result_tensor += ([0] * ((self.cgra.dim + 1) * (self.cgra.dim + 1))) + continue + + for loc in range(self.cgra.dim + 1): + elem = self.operations[t][loc] + if elem is None: + result_tensor += [0] * (self.cgra.dim + 1) + else: + # Get preds and succs from this node: + pred_nodes = self.dfg.get_preds(elem) + succ_nodes = self.dfg.get_succs(elem) + state_vector = [0] * (self.cgra.dim + 1) + + for l in pred_nodes: + time, loc = self.get_location(l) + state_vector[loc] = 1 + for l in succ_nodes: + time, loc = self.get_location(l) + state_vector[loc] = 2 + result_tensor += state_vector + + return result_tensor + + def __str__(self): + res = "Schedule is \n" + for t in range(len(self.operations)): + res += "time " + str(t) + ": " + res += str([str(n) for n in self.operations[t]]) + res += "\n" + + return res + + def locations(self): + for x in range(self.cgra.dim + 1): + yield x + + def add_timestep(self): + ops = [] + for x in range(self.cgra.dim + 1): + ops.append(None) + return ops + + def initialize_schedule(self): + ops = [] + + ops.append(self.add_timestep()) + return ops + + def get_node(self, optime, oploc): + if optime < len(self.operations): + return self.operations[optime][oploc] + else: + return None + + # See how long the thing scheduled at (T, X) lasts + # for --- note that if you pass in T + N, and the op + # started at T, you'll get true_latnecy - N. + def get_latency(self, optime, oploc): + op = self.get_node(optime, oploc) + old_op = op + t = optime + + while op is not None and op == old_op: + t += 1 + old_op = op + op = self.get_node(t, oploc) + + return t - optime + + # Return true if the CGRA slots are free between + # start_tiem and end_time in location (x, y) + def slots_are_free(self, x, start_time, end_time): + for t in range(start_time, end_time): + # Add more timesteps to the schedule as required. + while t >= len(self.operations): + self.operations.append(self.add_timestep()) + + print ("Looking at time ", t, "op location", x) + print( "oplen is ", len(self.operations[t])) + if self.operations[t][x] is not None: + return False + return True + + # Return the earliest time after earliest time that we can + # fit an op of length 'length' in location x, y + def get_free_time(self, earliest_time, length, loc): + while not self.slots_are_free(loc, earliest_time, earliest_time + length): + earliest_time += 1 + return earliest_time + + def set_operation(self, time, loc, node, latency): + while time + latency >= len(self.operations): + self.operations.append(self.add_timestep()) + + if self.slots_are_free(loc, time, time + latency): + for t in range(time, time + latency): + self.operations[t][loc] = node + return True + else: + # Not set + return False + + # Blindly clear the operation from time to time + latency. + def clear_operation(self, time, loc, latency): + while time + latency >= len(self.operations): + self.operaitons.append(self.add_timestep()) + + cleared = False + for t in range(time, time + latency): + cleared = True + self.operations[t][loc] = None + + assert cleared # sanity-check that we actually did something. + + def get_location(self, node: Node): + # TODO -- make a hash table or something more efficient if required. + for t in range(len(self.operations)): + for x in range(self.cgra.dim + 1): + if self.operations[t][x] is None: + continue + if self.operations[t][x].name == node.name: + return t, x + return None, None + + def free_times(self, x): + occupied_before = False + for t in range(len(self.operations)): + if self.operations[t][x] is not None: + occupied_before = True + else: + if occupied_before: + # This was occupired at the last timestep t, + # so it's become freed at this point. + occupied_before = False + yield t + + def has_use(self, x): + for t in range(len(self.operations)): + if self.operations[t][x] is not None: + return True + return False + + def alloc_times(self, x): + free_before = True + for t in range(len(self.operations)): + if self.operations[t][x] is not None: + # Was previously free. + if free_before: + # Now was not free before. + free_before = False + yield t + else: + free_before = True + +class NOCSchedule(object): + def __init__(self): + self.schedule = [] + + def occupy_path(self, start_cycle, path): + for hop in path: + self.occupy_connection(start_cycle, hop) + start_cycle += 1 + + def occupy_connection(self, time, connection): + while time >= len(self.schedule): + self.schedule.append(set()) + + if connection in self.schedule[time]: + # Can't occuoy an already occupied connection. + assert False + else: + self.schedule[time].add(connection) + + def is_occupied(self, time, hop): + if time >= len(self.schedule): + # Not occupied if beyond current suecule + return False + else: + return hop in self.schedule[time] + +# A class representing a NOC. +class NOC(object): + def __init__(self, nodes, neighbours: Dict[str, List[str]]): + # A list of all the nodes. + self.nodes = nodes + # A directed list of one-hop connections between nodes. + self.neighbours = neighbours + + # Returns the neighbour within a 3D space. I don't really + # know how best to set this up in reality ---- espc if a node + # doesn't really have e.g. an 'up' neighbour, but only a 'up and left at + # the same time neighbour'. The key constraint currently implemented + # here is that only six directions are supported (up, down, north, south, east + # west) + def get_neighbour(self, direction, location): + ns = self.neighbours[location] + index = None + # TODO --- we need a better way of storing these + # so it isn't implicit in the connection --- this implies + # that everything that has a 'south' connection must + # also have a north connection. + if direction == 'north': + index = 0 + elif direction == 'south': + index = 1 + elif direction == 'east': + index = 2 + elif direction == 'west': + index = 3 + elif direction == 'up': + index = 4 + elif direction == 'down': + index = 5 + + if index is None: + print("Unknown index ", direction) + assert False + + if index < len(ns): + print("Returning a node ", len(ns)) + return ns[index] + else: + return None + + # Work out the shortest path from from_n to to_n in + # the current NoC. + def shortest_path(self, from_n, to_n): + return self.shortest_available_path(0, from_n, to_n, None) + + def shortest_available_path(self, start_time, from_n, to_n, schedule): + # So we should obviously do this better. + # Just a hack-y BFS search. + seen = set() + # Keep track of node and path so far. + # Invariant: this is sorted by shortest + # path. + to_see = [(from_n, [])] + + while len(to_see) > 0: + n, path_to = to_see[0] + to_see = to_see[1:] + + if n == to_n: + # Found the path. By invariant, this is the shortest + # path. + return path_to + + nexts = self.neighbours[n] + for node in nexts: + if node in seen: + pass + else: + curr_time = start_time + len(path_to) + if schedule is not None: + if schedule.is_occupied(curr_time, (n, node)): + # Can't use this as a path if it's currently + # occupied. + # TODO --- Add support for buffered delays. + # continue + if CompileSettings['IntroduceRequiredDelays']: + continue + else: + return None + # This is BFS, so everything must bewithin + # one hop of the current search. Therefore + # this is the longest one, and can go at the back. + to_see.append((node, path_to + [(n, node)])) + # No path between nodes. + return None + +class Schedule(object): + def __init__(self, cgra, dfg): + # Note that we don't store the DFG because this actually + # creates the schedule, but so that this can be tensorized. + self.dfg = dfg + self.cgra = cgra + + self.operations = InternalSchedule(cgra, self.dfg) + + def __str__(self): + return "CGRA:" + str(self.operations) + + def set_operation(self, time, index, node, latency): + return self.operations.set_operation(time, index, node, latency) + + def to_rlmap_tensor(self, node, time_window_size=1): + # Get the RLMap Tensor --- note that it is node dependent + # as this is a compiler that can support time-multiplexing + # of operations on nodes. + return self.operations.to_rlmap_tensor(node, time_window_size=time_window_size) + + def swap(self, origin_time, origin_index, target_time, target_index): + # This is a slightly non-trivial function since operations may have non-one + # latency. We treat swap-points as the starting-points of the operation --- + # if the target point is in the middle of another operation, we choose to + # schedule this /at the start of the other operation/ + + # First, we need to make sure that the whole target + # window is clear: + op_latency = self.operations.get_latency(origin_time, origin_index) + operation_node = self.operations.get_node(origin_time, origin_index) + # Check that the target window is clear: + # IF its' not clear, the easiest thing to do is a no-op. + assert target_time is not None + assert target_index is not None + assert operation_node is not None + + target_window_is_clear = self.operations.set_operation(target_time, target_index, operation_node, operation_node.operation.latency) + # Now do the swap of operations + if target_window_is_clear: + print("Doing swap between ", origin_index, 'at', origin_time, 'to', target_index, 'at', target_time, 'with latency', op_latency) + self.operations.set_operation(target_time, target_index, operation_node, op_latency) + self.operations.clear_operation(origin_time, origin_index, op_latency) + return True + else: + return False + + + def clear_operation(self, time, index, latency): + self.operations.clear_operation(time, index, latency) + + def get_location(self, node): + return self.operations.get_location(node) + + def compute_and_reserve_communication_distance(self, cycle, n1, n2, noc_schedule): + # Compute the shortest path: + n1_t, n1_loc = self.get_location(n1) + n2_t, n2_loc = self.get_location(n2) + + # TODO -- a sanity-check that cycle is after this might be a good idea. + path = self.cgra.noc.shortest_available_path(cycle, n1_loc, n2_loc, noc_schedule) + + if path is None: + # TODO --- we should probably punish the agent a lot here + # rather than crashing? + print("Schedule has not valid path between ", n1_loc, "and", n2_loc, "at time", cycle) + return None + else: + noc_schedule.occupy_path(cycle, path) + + # I think we don't need the whole path? Not too sure though. + return len(path) + + def get_II(self, dfg): + # Compute the II of the current schedule. + + # We don't require the placement part to be actually correct --- + # do the actual schedule what we generate can differ + # from the schedule we have internally. + actual_schedule = InternalSchedule(self.cgra, dfg) + noc_schedule = NOCSchedule() # The NOC schedule is recomputed + # every time because it is dependent on the actual + # schedule. + + # What cycle does this node get executed on? + cycles_start = {} + # What cycle does the result of this node become + # available on? + cycles_end = {} + + # Keep track of when resources can be re-used. + freed = {} # When we're done + used = {} # When we start + + # We keep track of whether scheduling is finished + # elsewhere --- this is just a sanity-check. + finished = True + + # Step 1 is to iterate over all the nodes + # in a BFS manner. + for node in dfg.bfs(): + # For each node, compute the latency, + # and the delay to get the arguments to + # reach it. + preds = dfg.get_preds(node) + + # Get the time that this operation has + # been scheduled for. + scheduled_time, loc = self.get_location(node) + earliest_time = scheduled_time + + if scheduled_time is None: + finished = False + # This is not a complete operation + continue + + print("Looking at node ", node) + print("Has preds ", [str(p) for p in preds]) + for pred in preds: + if pred.name not in cycles_end: + finished = False + continue + + pred_cycle = cycles_end[pred.name] + print ("Have pred that finishes at cycle", pred_cycle) + + # Compute the time to this node, and + # reserve those paths on the NoC. + distance = self.compute_and_reserve_communication_distance(pred_cycle, pred, node, noc_schedule) + + if distance is None: + # This schedule isn't possible due to conflicting memory requirements. + return None, False + + # Compute when this predecessor reaches this node: + arrival_time = distance + pred_cycle + earliest_time = max(earliest_time, arrival_time) + + # TODO --- compute a penalty based on the gap between + # operations to account for buffering. + + # Check that the PE is actually free at this time --- if it + # isn't, push the operation back. + latency = operation_latency(node.operation) + free_time = actual_schedule.get_free_time(earliest_time, latency, loc) + actual_schedule.set_operation(free_time, loc, node, latency) + if free_time != earliest_time: + # We should probably punish the agent for this. + # Doesn't have any correctness issues as long as we + # assume infinite buffering (which we shouldn't do, and + # will eventually fix). + print("Place failed to place node in a sensible place: it is already in use!") + + # TODO --- do we need to punish this more? (i.e. integrate + # buffering requirements?) + + # This node should run at the earliest time available. + cycles_start[node.name] = free_time + cycles_end[node.name] = free_time + operation_latency(node.operation) + + print ("Node ", node.name, "has earliest time", earliest_time) + + # Now that we've done that, we need to go through all the nodes and + # work out the II. + # When was this computation slot last used? (i.e. when could + # we overlap the next iteration?) + min_II = 0 + for loc in actual_schedule.locations(): + # Now, we could achieve better performance + # by overlapping these in a more fine-grained + # manner --- but that seems like a lot of effort + # for probably not much gain? + # there ar probably loops where the gain + # is not-so-marginal. + if actual_schedule.has_use(loc): + # Can only do this for PEs that actually have uses! + last_free = max(actual_schedule.free_times(loc)) + first_alloc = min(actual_schedule.alloc_times(loc)) + + difference = last_free - first_alloc + print ("Diff at loc", loc, "is", difference) + min_II = max(min_II, difference) + + # TODO --- we should probably return some kind of object + # that would enable final compilation also. + return min_II, finished + +# Create a dummy CGRA that is a bunch of PEs in a row with neighbor-wise communciations +nodes = [1, 2, 3, 4] +neighbours_dict = {} +for n in range(1, len(nodes)): + neighbours_dict[n] = [n + 1, n - 1] +neighbours_dict[0] = [n + 1] +neighbours_dict[len(nodes)] = [n - 1] + +compilation_session_noc = NOC(nodes, neighbours_dict) +compilation_session_cgra = CGRA(nodes, compilation_session_noc) + +action_space = [ActionSpace(name="Schedule", + space=Space( + named_discrete=NamedDiscreteSpace( + name=[str(x) for x in compilation_session_cgra.cells_as_list()] + ) + # int64_box=Int64Box( + # low=Int64Tensor(shape=[2], value=[0, 0]), + # high=Int64Tensor(shape=[2], value=[compilation_session_cgra.x_dim, compilation_session_cgra.y_dim]) + # ) + ) + ) + ] + +MAX_WINDOW_SIZE = 100 + +# This is here rather than in the RP environment because +# it's needed to define the observation space. +rlmap_time_depth = 20 +# Have an entry for each cell in the compilation_session CGRA and also +# a note of the current operation +rlmap_tensor_size = ((compilation_session_cgra.dim + 1) * (compilation_session_cgra.dim + 1)) * rlmap_time_depth + 1 +relative_placement_directions = ["no_action", "up", "down", "north", "south", "east", "west", "sooner", "later"] +observation_space = [ + # ObservationSpace( + # name="dfg", + # space=Space( + # string_value=StringSpace(length_range=(Int64Range(min=0))) + # ), + # deterministic=True, + # platform_dependent=False, + # default_observation=Event(string_value="") + # ), + ObservationSpace(name="ir", + space=Space( + # TODO -- I think this should be a window of operations + # around the current one. + int64_sequence=Int64SequenceSpace(length_range=Int64Range(min=0, max=MAX_WINDOW_SIZE), scalar_range=Int64Range(min=0, max=len(Operations))) + ) + ), + ObservationSpace(name="CurrentInstruction", + space=Space( + int64_value=Int64Range(min=0, max=len(Operations)), + # TODO -- also need to figure out how to make this + # a graph? + ), + deterministic=True, + platform_dependent=False + ), + ObservationSpace(name="CurrentInstructionIndex", + space=Space( + int64_value=Int64Range(min=0, max=MAX_WINDOW_SIZE) + )), + ObservationSpace(name="II", + space=Space( + int64_value=Int64Range(min=0) + )), + ObservationSpace(name="RLMapObservations", + space=Space( + int64_box=Int64Box( + low=Int64Tensor(shape=[rlmap_tensor_size], value=([0] * rlmap_tensor_size)), + high=Int64Tensor(shape=[rlmap_tensor_size], value=([100000] * rlmap_tensor_size)) + ) + ) + ) + + # ObservationSpace( + # name="Schedule", + # space=Space( + # int64_box=Int64Box( + # low=Int64Tensor(shape=[2], value=[0, 0]), + # high=Int64Tensor(shape=[2], value=[cgra.x_dim, cgra.y_dim]) + # ) + # ) + # ) + ] + +class CGRASession(CompilationSession): + def __init__(self, working_directory: Path, action_space: ActionSpace, benchmark: Benchmark): + super().__init__(working_directory, action_space, benchmark) + logging.info("Starting a compilation session for CGRA" + str(self.cgra)) + # Load the DFG (from a test_dfg.json file): + self.dfg = pickle.loads(benchmark.program.contents) + self.schedule = Schedule(self.cgra, self.dfg) + + self.current_operation_index = 0 + self.time = 0 # Starting schedulign time --- we could do + # this another way also, by asking the agent to come up with a raw + # time rather than stepping through. + # TODO -- load this properly. + self.dfg_to_ops_list() + + def reset(self, + benchmark: Optional[Union[str, Benchmark]] = None, + action_space: Optional[str] = None, + observation_space: Union[ + OptionalArgumentValue, str, ObservationSpaceSpec + ] = OptionalArgumentValue.UNCHANGED, + reward_space: Union[ + OptionalArgumentValue, str, Reward + ] = OptionalArgumentValue.UNCHANGED, + ): + print("Reset started") + if benchmark is not None: + self.dfg = pickle.loads(benchmark.program.contents) + else: + self.dfg = None + self.schedule = Schedule(self.cgra, self.dfg) + self.current_operation_index = 0 + self.time = 0 + print("Reset complete") + + def dfg_to_ops_list(self): + # Embed the DFG into an operations list that we go through --- + # it contains two things: the name of the node, and the index + # that corresponds to within the Operations list. + self.ops = [] + self.node_order = [] + for op in self.dfg.bfs(): + # Do we need to do a topo-sort here? + ind = operation_index_of(op.operation) + if ind == -1: + print("Did not find operation " + str(op.operation) + " in the set of Operations") + assert False + + self.ops.append(ind) + self.node_order.append(op) + + cgra = compilation_session_cgra + action_spaces = action_space + + observation_spaces = observation_space + # TODO --- a new observation space corresponding to previous actions + + def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool]: + # print("Action has fields {}".format(str(action.__dict__))) + print("Action is {}".format(str(action))) + + response = action.int64_value + if response == -1: + # Do a reset of the env: + self.reset() + return False, None, True + + # Update the CGRA to schedule the current operation at this space: + # Take 0 to correspond to a no-op. + had_effect = False + if response > 0: + # Schedule is set up to take the operation at the response index + # index - 1. + if self.current_operation_index >= len(self.node_order): + # We've scheduled past the end! + return False, None, False + + node = self.node_order[self.current_operation_index] + latency = operation_latency(node.operation) + op_set = self.schedule.set_operation(self.time, response - 1, node, latency) + + # Check that the II still exists: + II, finished = self.schedule.get_II(self.dfg) + has_II = II is not None + if not has_II: + # Unset that operation: + print("Setting operation resulted in failed DFG mapping") + print(self.schedule) + self.schedule.clear_operation(self.time, response - 1, latency) + print("After clearning, have") + print(self.schedule) + op_set = False # Need to punish. + new_II, _ = self.schedule.get_II(self.dfg) + assert (new_II is not None) # This should not + # be non-existent after un-scheduling. + + if op_set: + had_effect = True + print("Scheduled operation", str(self.node_order[self.current_operation_index])) + print("Got an II of ", II) + self.current_operation_index += 1 + elif response == 0: + self.time += 1 + + done = False + if self.current_operation_index >= len(self.ops): + done = True + + print("At end of cycle, have schedule") + print(self.schedule) + print("Done is ", done) + + return done, None, had_effect + + def get_observation(self, observation_space: ObservationSpace) -> Event: + logging.info("Computing an observation over the space") + + if observation_space.name == "ir": + # TODO --- This should be a DFG? + return Event(int64_tensor=Int64Tensor(shape=[len(self.ops)], value=self.ops)) + elif observation_space.name == "Schedule": + # TODO -- needs to return the schedule for the past + # CGRA history also? + box_value = self.schedule.current_iteration + return Event(int64_box_value=box_value) + elif observation_space.name == "CurrentInstruction": + # Return the properties of the current instruction. + if self.current_operation_index >= len(self.ops): + # I don't get why this is ahpepning --- just make + # sure the agent doesn't yse this. I think it + # might happen on the last iteration. + return Event(int64_value=-1) + else: + return Event(int64_value=self.ops[self.current_operation_index]) + elif observation_space.name == "CurrentInstructionIndex": + # Return a way to localize the instruction within the graph. + return Event(int64_value=self.current_operation_index) + elif observation_space.name == "II": + print("Computing II for schedule:") + print(self.schedule) + ii, finished = self.schedule.get_II(self.dfg) + print("Got II", ii) + print ("Finished is ", finished) + return Event(int64_value=ii) + elif observation_space.name == "RLMapObservations": + print("Getting RLMap Observations") + print("Observation space is " + str(type(observation_space))) + current_operation_index = self.current_operation_index + node = self.node_order[current_operation_index] + # TODO --- add encoding of the CGRA constraints (not required for faithful + # reimplementation of RLMap, but probably required for a fair comparison.) + schedule_encoding = self.schedule.to_rlmap_tensor(node, time_window_size=rlmap_time_depth) + + full_res = [current_operation_index] + schedule_encoding + if len(full_res) != rlmap_tensor_size: + print("Tensor sizes don't match!", len(full_res), ' and ', rlmap_tensor_size) + assert False + + return Event(int64_tensor=Int64Tensor(shape=[len(full_res)], value=full_res)) + +def make_cgra_compilation_session(): + return CGRASession \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/compiler_gym-cgra-service b/compiler_gym/envs/cgra/service/compiler_gym-cgra-service new file mode 100755 index 000000000..06db57f41 --- /dev/null +++ b/compiler_gym/envs/cgra/service/compiler_gym-cgra-service @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import os +import traceback + +from compiler_gym.envs.cgra.service.cgra_service import make_cgra_compilation_session +from compiler_gym.service.runtime import create_and_run_compiler_gym_service + +if __name__ == "__main__": + # TODO --- load a CGRA description? + try: + create_and_run_compiler_gym_service(make_cgra_compilation_session()) + except: + print(traceback.format_exc()) + raise \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service b/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service new file mode 100755 index 000000000..54300e6ef --- /dev/null +++ b/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +import os +import traceback + +from compiler_gym.envs.cgra.service.relative_placement_service import make_cgra_compilation_session +from compiler_gym.service.runtime import create_and_run_compiler_gym_service + +if __name__ == "__main__": + try: + create_and_run_compiler_gym_service(make_cgra_compilation_session()) + except: + print(traceback.format_exc()) + raise \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/relative_cgra_env.py b/compiler_gym/envs/cgra/service/relative_cgra_env.py new file mode 100644 index 000000000..6387f7ed9 --- /dev/null +++ b/compiler_gym/envs/cgra/service/relative_cgra_env.py @@ -0,0 +1,39 @@ + + +from compiler_gym.util.gym_type_hints import OptionalArgumentValue +from compiler_gym.service.client_service_compiler_env import ClientServiceCompilerEnv +from pathlib import Path +from typing import Iterable, List, Optional, Union, cast +from compiler_gym.datasets import Benchmark + +from compiler_gym.envs.cgra.datasets import get_cgra_datasets +from compiler_gym.envs.cgra.cgra_rewards import IntermediateIIReward, FinalIIReward + +class RelativeCgraEnv(ClientServiceCompilerEnv): + def __init__(self, *args, punish_intermediate: bool = True, datasets_site_path: Optional[Path] = None, benchmark: Optional[Union[str, Benchmark]], **kwargs): + if punish_intermediate: + reward = IntermediateIIReward() + else: + reward = FinalIIReward() + super().__init__( + *args, + **kwargs, + benchmark = benchmark or "dfg_10/1", + datasets=get_cgra_datasets(site_data_base=datasets_site_path), + rewards=[reward], + derived_observation_spaces=[] + ) + + def reset(self, reward_space = OptionalArgumentValue.UNCHANGED, *args, **kwargs): + observation = super().reset(reward_space=reward_space, *args, **kwargs) + + return observation + + def make_benchmark(self, inputs, copt, system_include: bool = True, timeout: int=600): + return None + + def render(self, mode="human"): + if mode == "human": + print("Human visible schedule") + else: + return self.render(mode) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/relative_placement_service.py b/compiler_gym/envs/cgra/service/relative_placement_service.py new file mode 100644 index 000000000..0314d16f0 --- /dev/null +++ b/compiler_gym/envs/cgra/service/relative_placement_service.py @@ -0,0 +1,198 @@ +from pathlib import Path +from typing import Tuple, Optional, Union, List +from compiler_gym.envs.cgra.service.cgra_service import CGRASession, observation_space, Schedule, CGRA, relative_placement_directions +from compiler_gym.spaces import Reward +import traceback +from compiler_gym.service import CompilationSession +from compiler_gym.util.gym_type_hints import ObservationType, OptionalArgumentValue +from compiler_gym.views import ObservationSpaceSpec +from compiler_gym.service.proto import ( +ActionSpace, +Benchmark, +DoubleRange, +Event, +Int64Box, +Int64Range, +Int64Tensor, +NamedDiscreteSpace, +ObservationSpace, +Space, +StringSpace +) +import pickle + +""" +Unlike in direct placement, in relative placement, we take an operation and schedule it +to it's nearby neighbours that support the operation. + +""" + +action_space = [ + ActionSpace(name="move", + space=Space( + named_discrete=NamedDiscreteSpace( + # has a max of 9 connection dimensions (none, up, down, n, s, e, w, sooner, later) + name=relative_placement_directions + ) + )) +] + +class RelativePlacementCGRASession(CGRASession): + def __init__(self, working_directory: Path, action_space: ActionSpace, benchmark: Benchmark): + try: + print("Initailziing relplace session") + super().__init__(working_directory, action_space, benchmark) + + # For the relative placmenet CGRA, we need to come up with an initial placement strategy. + # While it may not be important for all classes of algorithm that this is consistent + # after every reset, it is important for some (e.g. genetic algorithms) + self.dfg = pickle.loads(benchmark.program.contents) + print("Loaded DFG " + str(self.dfg)) + # TODO --- support better seeds. + self.schedule = Schedule(self.cgra, self.dfg) + self.initial_placement = self.get_initial_placement(self.dfg, 0) + + # At the same time, the results of this are sensitive to the starting position, + # so, it's important that we can control the starting position. + self.current_operation_index = 0 + + # This is a constant that says how many times we should iterate over the array. + self.max_iterations = 10 + + self.iteration_number = 0 + except Exception as e: + print(traceback.format_exc()) + raise e + + + observation_spaces: List[ObservationSpace] = observation_space + action_spaces = action_space + + def reset(self, + benchmark: Optional[Union[str, Benchmark]] = None, + action_space: Optional[str] = None, + observation_space: Union[ + OptionalArgumentValue, str, ObservationSpaceSpec + ] = OptionalArgumentValue.UNCHANGED, + reward_space: Union[ + OptionalArgumentValue, str, Reward + ] = OptionalArgumentValue.UNCHANGED, + ): + try: + return super().reset(benchmark, action_space, observation_space, reward_space) + except Exception as e: + print(traceback.format_exc()) + raise e + + def get_initial_placement(self, dfg, seed): + try: + # For now, just place the nodes in order on the CGRA. + # Iterate through the PEs, and then increment the clock cycle + # if we can't place. + pe_ind = 0 + time = 0 + max_pe = self.cgra.cells_as_list() + nodes = dfg.bfs() + iterating = True + was_set = True + while iterating: + # only move to next node if we properly set the operation last time. + if was_set: + n = next(nodes, None) + if n is None: + # Finished scheduling! + iterating = False + continue + was_set = self.schedule.set_operation(time, pe_ind, n, n.operation.latency) + if was_set: + print("Set initial placement for node", str(n)) + print("Position is ", self.schedule.get_location(n)) + + # TODO -- should we check that this produces a schedule with an II? + # Aim is to start with a very spread-out schedule that should just work --- + # let the SA algorithm compress it, rather than trying to make + # the SA algorithm find a valid schedule. + pe_ind += 1 + time += n.operation.latency + if pe_ind >= len(max_pe): + pe_ind = 0 + except Exception as e: + print(traceback.format_exc()) + raise e + + def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool]: + try: + if self.iteration_number == self.max_iterations: + # The iteration is finished. + return True, None, False + + step = action.int64_value + print("Got step ", step) + action_to_do = relative_placement_directions[step] + print("Got step", step, "which entails moving in direction", action_to_do) + # Get the ndoe --- the dfg.nodes is a dict, so need to access through the names + # list. + current_operation_node_name = self.dfg.node_names[self.current_operation_index] + current_operation = self.dfg.nodes[current_operation_node_name] + + current_time, current_location = self.schedule.get_location(current_operation) + print("For node ", current_operation, "found location", current_location) + new_time = current_time + new_location = current_location + if action_to_do == "sooner": + new_time -= 1 + elif action_to_do == "later": + new_time += 1 + else: + if action_to_do == "no_action": + new_location = None + else: + new_location = self.cgra.get_neighbour(action_to_do, current_location) + + if new_location is not None: + print("Swapping between", current_location, 'and', new_location) + swapped = self.schedule.swap(current_time, current_location, new_time, new_location) + else: + # If the new location is none, that means that we picked a direction + # that is invalid (ie. doesn't exist for the node in question). To make + # it easier on the RL/GA algorithms, we'll just silently skiup this here. + swapped = False + + # Prepare for next iteration: + self.current_operation_index += 1 + if self.current_operation_index > len(self.dfg.nodes) - 1: + # Wrap around for another pass through the nodes. + self.current_operation_index = 0 + self.iteration_number += 1 + + print("After iteration, schedule is ", self.schedule) + print("Swapped is ", swapped) + + return False, None, swapped + except Exception as e: + print(traceback.format_exc()) + raise e + + def get_observation(self, observation_space: ObservationSpace) -> Event: + try: + result = super().get_observation(observation_space=observation_space) + if observation_space.name == 'II': + ii, finished = self.schedule.get_II(self.dfg) + if not finished: + # The RLLib library can't handle nones, so + # Just return a large punishment if this fails + # to schedule. + result = Event(int64_value=-100) + else: + result = Event(int64_value=-ii) + print ("got the result.", result) + elif observation_space.name == 'RLMapObservations': + # print("Got RLMap Observations", result.int64_tensor) + pass + return result + except Exception as e: + print(traceback.format_exc()) + raise e + +def make_cgra_compilation_session(): + return RelativePlacementCGRASession \ No newline at end of file diff --git a/compiler_gym/envs/cgra/test/test_dfg.json b/compiler_gym/envs/cgra/test/test_dfg.json new file mode 100644 index 000000000..739212a55 --- /dev/null +++ b/compiler_gym/envs/cgra/test/test_dfg.json @@ -0,0 +1,54 @@ +{ + "entry_points": ["n3", "n5"], + "nodes": [{ + "operation": "add", + "name": "n1" + }, + {"operation": "mul", + "name": "n2"}, + { + "operation": "load", + "name": "n3" + }, + { + "operation": "store", + "name": "n4" + }, + { + "operation": "load", + "name": "n5" + } + ], + "edges": [ + { + "name": "e1", + "from": "n3", + "to": "n1", + "type": "data" + }, + { + "name": "e2", + "from": "n5", + "to": "n1", + "type": "data" + }, + { + "name": "e3", + "from": "n1", + "to": "n2", + "type": "data" + }, + { + "name": "e4", + "from": "n3", + "to": "n2", + "type": "data" + }, + { + "name": "e5", + "from": "n2", + "to": "n4", + "type": "data" + } + ] +} diff --git a/examples/cgra/.gitignore b/examples/cgra/.gitignore new file mode 100644 index 000000000..302961024 --- /dev/null +++ b/examples/cgra/.gitignore @@ -0,0 +1,2 @@ +rewards.png +ga_output diff --git a/examples/cgra/ga.py b/examples/cgra/ga.py new file mode 100644 index 000000000..1afdd997e --- /dev/null +++ b/examples/cgra/ga.py @@ -0,0 +1,332 @@ +"""Perform a GA of the action space of a CompilerGym environment. + +Use the RandomWalk python script to generate the initial candidates, then +use a GA to select the best one. + +To make this somewhat tractable, this assumes that the environemnt +takes -1 as an action, and that -1 resets the environment. +TODO -- Support environments that don't do that. +""" + +import random +import math + +import humanize +from random_walk import run_random_walk +from compiler_gym.datasets import benchmark +from absl import app, flags + +from typing import Set, List + +from compiler_gym.envs import CompilerEnv +from compiler_gym.util.gym_type_hints import ActionType +from compiler_gym.util.flags.benchmark_from_flags import benchmark_from_flags +from compiler_gym.util.flags.env_from_flags import env_from_flags +from compiler_gym.util.shell_format import emph +from compiler_gym.util.timer import Timer + +from compiler_gym.random_search import random_search + +import numpy as np + +# Dict for keeping track of various debugging/information counters. +ga_stats = { } + +def reset_ga_stats(): + global ga_stats + + ga_stats['valid_candidates_during_compute_score'] = 0 + ga_stats['invalid_candidates_during_compute_score'] = 0 + ga_stats['incomplete_candidates_during_compute_score'] = 0 + ga_stats['candidates_randomly_skipped'] = 0 + ga_stats['candidates_scored'] = 0 + +reset_ga_stats() +print("Initialized Counters") + +if __name__ == "__main__": + flags.DEFINE_boolean( + "variable_length_sequences", + False, + "Use a crossover algorithm that supports generation of variable length sequences" + ) + flags.DEFINE_boolean( + "print_counters", + False, + "Print Internal Compile Conters" + ) + flags.DEFINE_integer( + "iters", + 12, + "Min numbrt og iterations" + ) + flags.DEFINE_integer( + "generation_size", + 32, + "number of candidates to track" + ) + flags.DEFINE_integer( + "initialization_steps", + 100, + "number of steps to initialize the initial elements." + ) + flags.DEFINE_integer( + "max_cands", + 1000, + "max number of candidates to add in crossover." + ) + flags.DEFINE_float( + "length_preservation_factor", + 0.9, + "how much to discount different length crossovers. (formual is N^(length difference))" + ) + flags.DEFINE_float( + "reduction_factor", + 0.1, + "what fraction to reduce the number of generated candidates by (0.1 is 10 percent of generated candidates are carried forward to evaluation)" + ) + flags.DEFINE_boolean( + "refill", + False, + "refill the candidates list using randomly generated candidates if it is too small after each generation" + ) + flags.DEFINE_integer( + "expected_candidates", + 1000, + "How many candidates to take during crossover (in expectation) (only for fixed length --- see reduction factor for variable length)" + ) + FLAGS = flags.FLAGS + +class Candidate: + def __init__(self, actions): + self.actions = actions + self.score = None + self.failed = True + + def copy(self): + new_cand = Candidate(self.actions[:]) + new_cand.score = self.score + new_cand.failed = self.failed + + return new_cand + + def __str__(self): + return "Actions: " + str(self.actions) + ", score " + str(self.score) + " (failed: " + str(self.failed) + ")" + + def score(self): + return self.score + + def compute_score(self, env, reward_space_name): + ga_stats['candidates_scored'] += 1 + env.reset() + assert len(self.actions) > 0 + + print ("Computing score for actions " + str(self.actions)) + reward = -100000 # Largest reward is best + failed = True # Empty schedules marked as failing (Is this a good idea?) + try: + done = False + for action in self.actions: + failed = False + observation, reward, done, info = env.step(action) + # TODO -- Should we check if we finsihed early? + if done: + ga_stats['valid_candidates_during_compute_score'] += 1 + failed = False + else: + ga_stats['incomplete_candidates_during_compute_score'] += 1 + failed = True + except: + # TODO -- can we do better? + failed = True + ga_stats['invalid_candidates_during_compute_score'] += 1 + + if failed: + reward = -10000000 + self.score = reward + self.failed = failed + return reward, failed + +def mutate(cands: Set[List[ActionType]]): + new_set = set() + for c in cands: + if random.randint(0, 1) == 0: + # TODO -- pick a better probability of that? + max_action = max(c.actions) # TODO --- select from all actions not just from max seen. + new_c = c.copy() + new_c.actions[random.randint(0, len(c.actions) - 1)] = random.randint(0, max_action) + new_set.add(new_c) + new_set.add(c) + + return new_set + +# This is a much simpler method that produces a much smaller set for +# fixed-lenght sequences. The crossover_variable_length method +# has a tendency to explode under long action sequences. +def crossover_fixed_length(cands: Set[List[ActionType]]): + new_cands = set() + for c in cands: + cand_count = len(c.actions) * len(cands) * len(cands) + fraction_taken = float(FLAGS.expected_candidates) / float(cand_count) + + for c2 in cands: + for i in range(len(c.actions)): + should_add = random.random() < fraction_taken + if not should_add: + continue + + new_cand = Candidate(c.actions[:i] + c2.actions[i:]) + new_cands.add(new_cand) + + print("Generated ", len(new_cands), "candidates") + return new_cands + +def crossover_variable_length(cands: Set[List[ActionType]]): + # Is there a better way to do this? + # For the CGRA env, these are not fixed length. + + # Try to keep the number generated down a bit: + naive_number = 0 + for c in cands: + for c2 in cands: + naive_number += len(c.actions) * len(c2.actions) + + print ("Naively would add " + str(naive_number) + " candidates") + new_candidates = set() + for cand in cands: + for i in range(len(cand.actions)): + cand_head = cand.actions[:i] + + # Before we bother iterating, sandwich the range here. + length_diff = len(cand.actions) - len(cand_head) + # So the mean length added should be that lenght diff + # formula is pow(factor, abs(act_lengh - length_diff)) + # Threshold at like 10% + bound_value = 0.10 + bounds = int(math.log(bound_value, FLAGS.length_preservation_factor)) + for other_cand in cands: + for j in range(max(length_diff - bounds, 0), min(length_diff + bounds, len(other_cand.actions))): + # Don't add everything with certainty. Exponential backoff on size. + probability = pow(FLAGS.length_preservation_factor, abs(len(cand.actions) - (len(cand_head) + j))) + if (random.random() < probability) and (random.random() < FLAGS.reduction_factor): + other_cand_tail = other_cand.actions[j:] + + new_candidates.add(Candidate(cand_head + other_cand_tail)) + else: + ga_stats['candidates_randomly_skipped'] += 1 + print("Generated " + str(len(new_candidates)) + " new candidates") + + return new_candidates.union(cands) + +def get_best(cands, count=1): + filtered_cands = list(filter(lambda f: (not f.failed), cands)) + print("Got ", len(filtered_cands), "filtered cands from ", len(cands), "original cands") + sorted_cands = sorted(filtered_cands, key=lambda e: -e.score) + assert count > 0 + best_score = sorted_cands[0].score + + result = set() + for cand in sorted_cands[:count]: + result.add(cand) + + return result, best_score + +def compute_individual_fitness(inps): + cand, env = inps + cand.compute_score(env, FLAGS.reward) + return cand + +def compute_set_fitness(inps): + cands, env = inps + env.reset() + for c in cands: + compute_individual_fitness((c, env)) + +# For splitting up array a for multi core +def split_array(a, n): + new_arrs = [] + for i in range(n): + new_arrs.append([]) + + ind = 0 + for elem in a: + new_arrs[ind].append(elem) + ind += 1 + ind = ind % n + + return new_arrs + +def compute_fitness(cands, benchmark): + with env_from_flags(benchmark=benchmark) as env: + env.reset() + for cand in cands: + compute_individual_fitness((cand, env)) + + return cands + + +def run_ga(benchmark: benchmark.Benchmark, step_count: int, initial_candidates: Set[List[ActionType]]) -> None: + # Create an optimizer + + iter_number = 0 + candidate_count = len(initial_candidates) + + current_candidates = initial_candidates + + while iter_number < step_count: + if (len(current_candidates)) < candidate_count and FLAGS.refill: + current_candidates += generate_random_candidates(candidate_count - len(current_candidates), benchmark) + + print ("Starting iteration", iter_number, "with", len(current_candidates), "candidates") + mutations = mutate(current_candidates.copy()) + if FLAGS.variable_length_sequences: + crossed = crossover_variable_length(mutations) + else: + crossed = crossover_fixed_length(mutations) + fitness = compute_fitness(crossed, benchmark) + print ("Iter: " + str(iter_number) + " with generation size " + str(len(fitness))) + + current_candidates, best = get_best(fitness, count=candidate_count) + print ("After iteration " + str(iter_number) + " best score is " + str(best)) + + iter_number += 1 + + # Get the best one. + return get_best(current_candidates, count=1) + +def generate_random_candidates(number, this_benchmark): + cands = [] + with env_from_flags(benchmark=this_benchmark) as env: + env.reset() + for i in range(number): + print("Init Candidate ", i) + env.reset() + cands.append(Candidate(run_random_walk(env, FLAGS.initialization_steps))) + + return cands + +def main(argv): + print("Starting GA") + assert len(argv) == 1, f"Unrecognized flags: {argv[1:]}" + + this_benchmark = benchmark_from_flags() + initial_candidates = set(generate_random_candidates(FLAGS.generation_size, this_benchmark)) + + result, best_score = run_ga(this_benchmark, FLAGS.iters, initial_candidates) + for elt in result: + # This loop should only go once. + print ("Result is: ", elt) + + with env_from_flags(benchmark=this_benchmark) as env: + env.reset() + for action in elt.actions: + env.step(action) + + print ("Result env was", env, "best score was", best_score) + + if FLAGS.print_counters: + for field in ga_stats: + print (field, " = ", ga_stats[field]) + +if __name__ == "__main__": + app.run(main) diff --git a/examples/cgra/ga_scripts/.gitignore b/examples/cgra/ga_scripts/.gitignore new file mode 100644 index 000000000..04f7babeb --- /dev/null +++ b/examples/cgra/ga_scripts/.gitignore @@ -0,0 +1,2 @@ +output +cdfs.png diff --git a/examples/cgra/ga_scripts/ga_score_extractor.sh b/examples/cgra/ga_scripts/ga_score_extractor.sh new file mode 100755 index 000000000..f4bb070cc --- /dev/null +++ b/examples/cgra/ga_scripts/ga_score_extractor.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +typeset -a results +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " +fi + +output=$1 +echo "" -n > $output +shift +while [[ $# -gt 0 ]]; do + input=$1 + shift + + # Get the second to last line, which has the II for the graph. + ii=$(tail -n 2 $input | head -n 1 | cut -f 7 -d' ' ) + + echo "$ii, " >> $output +done diff --git a/examples/cgra/ga_scripts/plot_CDFs.py b/examples/cgra/ga_scripts/plot_CDFs.py new file mode 100644 index 000000000..275495999 --- /dev/null +++ b/examples/cgra/ga_scripts/plot_CDFs.py @@ -0,0 +1,67 @@ +import matplotlib.pyplot as plt +import argparse +import numpy + +def load_cdf_from_file(f): + with open(f) as fle: + lines = ''.join(fle.readlines()) + data = [] + # Should only be one-line files? + for item in lines.split(','): + if item.strip(): + try: + data.append(int(item.strip())) + except: + # Plenty of reasons this could fail -- mostly due to + + # incomplete runs + pass + + print ("Loaded ", len(data), "items") + return data + +def compute_cdf(data): + sorted_data = sorted(data) + x_points = range(0, max(sorted_data)) + cdf = [0.0] * len(x_points) + sum_so_far = 0 + cdf_pointer = 0 + value_per_point = 1 #Treated as int to avoid FP accum issues. + for point in sorted_data: + cdf_pointer = int(float(len(x_points)) * float(point) / float(max(sorted_data))) - 1 + sum_so_far += value_per_point + + cdf[cdf_pointer] = float(sum_so_far) / float(len(sorted_data)) + + return x_points, cdf + + +def plot_datas(datas): + # First, compute the CDF from the raw data + cdfs = [] + xvals = [] + for data in datas: + xvalus, cdf = (compute_cdf(data)) + xvals.append(xvalus) + cdfs.append(cdf) + + xvs_max = 0 + for i in range(len(cdfs)): + plt.plot(xvals[i], cdfs[i]) + xvs_max = max(max(xvals[i]), xvs_max) + + plt.ylim([0.0, 1.0]) + plt.xlim([0, xvs_max]) + plt.savefig('cdfs.png') + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('files', nargs='+') + args = parser.parse_args() + + datas = [] + for file in args.files: + data = load_cdf_from_file(file) + datas.append(data) + + plot_datas(datas) diff --git a/examples/cgra/relative_placement_model.py b/examples/cgra/relative_placement_model.py new file mode 100644 index 000000000..7cb22545a --- /dev/null +++ b/examples/cgra/relative_placement_model.py @@ -0,0 +1,116 @@ +from unittest.result import failfast +from compiler_gym.wrappers.datasets import CycleOverBenchmarks +from compiler_gym.envs.compiler_env import CompilerEnv +from compiler_gym.wrappers import TimeLimit +import compiler_gym +import model +from ray import tune +from ray.rllib.agents.ppo import PPOTrainer +import ray +import matplotlib.pyplot as plt +from itertools import islice +import argparse + +class RelativePlacementModel(model.Model): + def __init__(self): + super().__init__() + + def get_action(observations): + return super().get_action() + +def make_env() -> compiler_gym.envs.CompilerEnv: + env = compiler_gym.make( + "relative-cgra-v0", + observation_space="RLMapObservations", + reward_space="II", + action_space="move", + benchmark='dfg_10/0' # I think this gets overwritten in the running loop. + ) + env = TimeLimit(env, max_episode_steps=5) + + return env + +def plot_results(rewards): + plt.bar(range(len(rewards)), rewards) + plt.ylabel("Reward (higher better)") + plt.savefig('rewards.png') + +def run_agent_on_benchmarks(bmarks): + with make_env() as env: + rewards = [] + for i, benchmark in enumerate(bmarks, start=1): + observation, done = env.reset(benchmark=benchmark), False + reward = 0 + while not done: + action = int(agent.compute_action(observation)) + print(type(action)) + observation, reward, done, _ = env.step(action) + # Just append the last reward, because that is the II. (or a large -ve noting + # failure) + rewards.append(reward) + print ("Exectuted ", i, "th benchmark of", len(bmarks)) + return rewards + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run a recreation of the RLMap tool.") + parser.add_argument('--train', dest='train', default=False, action='store_true') + parser.add_argument('--test', dest='test', default=None) + args = parser.parse_args() + + with make_env() as env: + bench = env.datasets['dfg_10'] + train_benchmarks = list(islice(bench.benchmarks(), 650)) + train_benchmarks, val_benchmarks, test_benchmarks = train_benchmarks[:500], train_benchmarks[500:550], train_benchmarks[550:650] + + print("Number of benchmarks for training: ", len(train_benchmarks)) + print("Number of benchmarks for vlaidation: ", len(val_benchmarks)) + print("Number of benchmarks for testing:", len(test_benchmarks)) + + def make_training_env(*args) -> compiler_gym.envs.CompilerEnv: + del args + return CycleOverBenchmarks(make_env(), train_benchmarks) + + tune.register_env("RLMap", make_training_env) + + if args.train: + if ray.is_initialized(): + ray.shutdown() + ray.init(include_dashboard=False, ignore_reinit_error=True) + analysis = tune.run( + PPOTrainer, + checkpoint_at_end=True, + stop={ + "episodes_total": 500 + }, + config={ + "seed": 0xCC, + "num_workers": 1, + "env": "RLMap", + "rollout_fragment_length": 5, + "train_batch_size": 5, + "sgd_minibatch_size": 5, + }, + ) + best_checkpoint = analysis.get_best_checkpoint( + metric="episode_reward_mean", + mode="max", + trial=analysis.trials[0] + ) + print("Best checkpoint is '", best_checkpoint, "'") + if args.test: + checkpoint = args.test + agent = PPOTrainer( + env='RLMap', + config={ + "num_workers": 1, + "seed": 0xCC, + "explore": False + } + ) + + agent.restore(checkpoint) + val_rewards = run_agent_on_benchmarks(val_benchmarks) + + plot_results(val_rewards) + else: + print("Not testing (use --test to also test)") diff --git a/examples/cgra/run_ga_relative_placement.sh b/examples/cgra/run_ga_relative_placement.sh new file mode 100755 index 000000000..80f0fd10c --- /dev/null +++ b/examples/cgra/run_ga_relative_placement.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +echo "Starting iteration $index" +rm -rf ga_output +mkdir -p ga_output +parallel 'echo "Starting iter {}"; python ga.py --nomp --max_cands 100 --env=cgra-v0 --benchmark=dfg_10/{} --reward=II &> ga_output/out_{}' ::: $(seq 0 10000) diff --git a/examples/random_walk.py b/examples/random_walk.py index 44c1a6f33..19eadd93d 100644 --- a/examples/random_walk.py +++ b/examples/random_walk.py @@ -20,18 +20,22 @@ from compiler_gym.util.flags.env_from_flags import env_from_flags from compiler_gym.util.shell_format import emph from compiler_gym.util.timer import Timer +from compiler_gym.util.gym_type_hints import ActionType -flags.DEFINE_integer( - "step_min", - 12, - "The minimum number of steps. Fewer steps may be performed if the " - "environment ends the episode early.", -) -flags.DEFINE_integer("step_max", 256, "The maximum number of steps.") -FLAGS = flags.FLAGS +from typing import List +if __name__ == "__main__": + flags.DEFINE_integer( + "step_min", + 12, + "The minimum number of steps. Fewer steps may be performed if the " + "environment ends the episode early.", + ) + flags.DEFINE_integer("step_max", 256, "The maximum number of steps.") + FLAGS = flags.FLAGS -def run_random_walk(env: CompilerEnv, step_count: int) -> None: + +def run_random_walk(env: CompilerEnv, step_count: int) -> List[ActionType]: """Perform a random walk of the action space. :param env: The environment to use. @@ -40,12 +44,14 @@ def run_random_walk(env: CompilerEnv, step_count: int) -> None: environment to end the episode. """ rewards = [] + actions = [] step_num = 0 with Timer() as episode_time: env.reset() for step_num in range(1, step_count + 1): action_index = env.action_space.sample() + actions.append(action_index) with Timer() as step_time: observation, reward, done, info = env.step(action_index) print( @@ -76,6 +82,8 @@ def reward_percentage(reward, rewards): f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})" ) + return actions + def main(argv): """Main entry point.""" diff --git a/setup.py b/setup.py index 24563f22b..ee9c9fe57 100644 --- a/setup.py +++ b/setup.py @@ -122,6 +122,9 @@ def wheel_filename(**kwargs): "compiler_gym.envs.gcc.datasets", "compiler_gym.envs.gcc.service", "compiler_gym.envs.gcc", + "compiler_gym.envs.cgra.datasets", + "compiler_gym.envs.cgra.service", + "compiler_gym.envs.cgra", "compiler_gym.envs.loop_tool", "compiler_gym.envs.loop_tool.service", "compiler_gym.envs", @@ -146,6 +149,8 @@ def wheel_filename(**kwargs): "package_data": { "compiler_gym": [ "envs/gcc/service/compiler_gym-gcc-service", + # "envs/cgra/service/compiler_gym-cgra-service", + "envs/cgra/service/*", "envs/loop_tool/service/compiler_gym-loop_tool-service", "third_party/csmith/csmith/bin/csmith", "third_party/csmith/csmith/include/csmith-2.3.0/*.h", From bcc5663b36366eea7c36b504d86adf13b34b22da Mon Sep 17 00:00:00 2001 From: Jackson Woodruff Date: Sat, 18 Jun 2022 23:36:30 -0700 Subject: [PATCH 2/2] Address most issues raised in merge request --- compiler_gym/datasets/datasets.py | 1 - compiler_gym/envs/cgra/.gitignore | 4 - compiler_gym/envs/cgra/BUILD | 4 +- compiler_gym/envs/cgra/CMakeLists.txt | 14 +- compiler_gym/envs/cgra/DFG.py | 63 ++- compiler_gym/envs/cgra/Driver.py | 98 ----- compiler_gym/envs/cgra/Model.py | 2 - compiler_gym/envs/cgra/Operations.py | 8 +- compiler_gym/envs/cgra/architectures/BUILD | 15 + compiler_gym/envs/cgra/architectures/CGRA.py | 154 +++++++ .../envs/cgra/architectures/CMakeLists.txt | 15 + .../envs/cgra/architectures/__init__.py | 4 + compiler_gym/envs/cgra/cgra_rewards.py | 21 +- compiler_gym/envs/cgra/compile_settings.py | 67 +++ compiler_gym/envs/cgra/compiler_gym_test.py | 30 -- compiler_gym/envs/cgra/datasets/BUILD | 5 + .../envs/cgra/datasets/CMakeLists.txt | 19 + compiler_gym/envs/cgra/service/cgra_env.py | 22 +- .../envs/cgra/service/cgra_service.py | 413 +++++++++++------- .../cgra/service/compiler_gym-cgra-service | 6 +- ...mpiler_gym-relative-placement-cgra-service | 4 + .../envs/cgra/service/relative_cgra_env.py | 11 +- .../service/relative_placement_service.py | 150 +++++-- examples/cgra/.gitignore | 3 +- .../cgra/ga_scripts/ga_score_extractor.sh | 2 +- examples/cgra/ga_scripts/plot.sh | 3 + examples/cgra/ga_scripts/plot_CDFs.py | 22 +- examples/cgra/relative_placement_model.py | 20 +- .../cgra/relative_placement_output/.gitignore | 1 + .../relative_placement_score_extractor.sh | 18 + examples/cgra/run_ga_relative_placement.sh | 1 - examples/cgra/run_relative_placement.sh | 12 + setup.py | 2 +- 33 files changed, 794 insertions(+), 420 deletions(-) delete mode 100644 compiler_gym/envs/cgra/.gitignore delete mode 100644 compiler_gym/envs/cgra/Driver.py delete mode 100644 compiler_gym/envs/cgra/Model.py create mode 100644 compiler_gym/envs/cgra/architectures/BUILD create mode 100644 compiler_gym/envs/cgra/architectures/CGRA.py create mode 100644 compiler_gym/envs/cgra/architectures/CMakeLists.txt create mode 100644 compiler_gym/envs/cgra/architectures/__init__.py create mode 100644 compiler_gym/envs/cgra/compile_settings.py delete mode 100644 compiler_gym/envs/cgra/compiler_gym_test.py create mode 100644 compiler_gym/envs/cgra/datasets/CMakeLists.txt create mode 100755 examples/cgra/ga_scripts/plot.sh create mode 100644 examples/cgra/relative_placement_output/.gitignore create mode 100755 examples/cgra/relative_placement_output/relative_placement_score_extractor.sh create mode 100755 examples/cgra/run_relative_placement.sh diff --git a/compiler_gym/datasets/datasets.py b/compiler_gym/datasets/datasets.py index 1cbb92e44..05ecf0e45 100644 --- a/compiler_gym/datasets/datasets.py +++ b/compiler_gym/datasets/datasets.py @@ -152,7 +152,6 @@ def dataset_from_parsed_uri(self, uri: BenchmarkUri) -> Dataset: key = self._dataset_key_from_uri(uri) if key not in self._datasets: - print("datasets are ", str(self._datasets)) raise LookupError(f"Dataset not found: {key}") return self._datasets[key] diff --git a/compiler_gym/envs/cgra/.gitignore b/compiler_gym/envs/cgra/.gitignore deleted file mode 100644 index 11e1f8d09..000000000 --- a/compiler_gym/envs/cgra/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -__pycache__ -env.sh -cart-pole.py -env diff --git a/compiler_gym/envs/cgra/BUILD b/compiler_gym/envs/cgra/BUILD index b545e4e8a..b6c28f62f 100644 --- a/compiler_gym/envs/cgra/BUILD +++ b/compiler_gym/envs/cgra/BUILD @@ -2,7 +2,7 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -load("@rules_python//python:defs.bzl", "py_library", "py_test") +load("@rules_python//python:defs.bzl", "py_library") py_library( name = "cgra", @@ -10,6 +10,7 @@ py_library( "__init__.py", "cgra_rewards.py", "Operations.py", + "compile_settings.py", "DFG.py" ], data = [ @@ -18,6 +19,7 @@ py_library( visibility = ["//visibility:public"], deps = [ "//compiler_gym/envs/cgra/datasets", + "//compiler_gym/envs/cgra/architectures", "//compiler_gym/errors", "//compiler_gym/service:client_service_compiler_env", "//compiler_gym/service/runtime", # Implicit dependency of service. diff --git a/compiler_gym/envs/cgra/CMakeLists.txt b/compiler_gym/envs/cgra/CMakeLists.txt index ecac8500f..7a522146c 100644 --- a/compiler_gym/envs/cgra/CMakeLists.txt +++ b/compiler_gym/envs/cgra/CMakeLists.txt @@ -7,17 +7,19 @@ cg_add_all_subdirs() cg_py_library( NAME - gcc + cgra SRCS "__init__.py" - "gcc.py" - "gcc_env.py" - "gcc_rewards.py" + "cgra_rewards.py" + "Operations.py" + "compile_settings.py" + "DFG.py" DATA - compiler_gym::envs::gcc::service::service + compiler_gym::envs::cgra::service::service DEPS compiler_gym::service::client_service_compiler_env - compiler_gym::envs::gcc::datasets::datasets + compiler_gym::envs::cgra::datasets::datasets + compiler_gym::engs::cgra::architectures::architectures compiler_gym::errors::errors compiler_gym::service::runtime::runtime compiler_gym::util::util diff --git a/compiler_gym/envs/cgra/DFG.py b/compiler_gym/envs/cgra/DFG.py index a03208296..ceb26db92 100644 --- a/compiler_gym/envs/cgra/DFG.py +++ b/compiler_gym/envs/cgra/DFG.py @@ -1,3 +1,8 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + import json from pathlib import Path import random @@ -24,7 +29,7 @@ def __str__(self): return "Node with name " + self.name + " and op " + str(self.operation) class DFG(object): - def __init__(self, working_directory: Optional[Path] = None, benchmark: Optional[Benchmark] = None, from_json: Optional[Path] = None, from_text: Optional[str] = None): + def __init__(self, working_directory: Optional[Path] = None, from_json: Optional[Path] = None, from_text: Optional[str] = None): # Copied from here: https://github.com/facebookresearch/CompilerGym/blob/development/examples/loop_optimizations_service/service_py/loops_opt_service.py # self.inst2vec = _INST2VEC_ENCODER @@ -32,33 +37,18 @@ def __init__(self, working_directory: Optional[Path] = None, benchmark: Optional self.load_dfg_from_json(from_json) elif from_text is not None: self.load_dfg_from_text(from_text) - elif benchmark is not None: - # Only re-create the JSON file if we aren't providing an existing one. - # The existing ones are mostly a debugging functionality. - with open(self.working_directory / "benchmark.c", "wb") as f: - f.write(benchmark.program.contents) - - # We use CGRA-Mapper to produce a DFG in JSON. - run_command( - ["cgra-mapper", self.src_path, self.dfg_path] - ) - - # Now, load in the DFG. - self.load_dfg_from_json(self.dfg_path) def __str__(self): res = "nodes are: " + str(self.nodes) + " and edges are " + str(self.adj) return res def load_dfg_from_json(self, path): - import json with open(path, 'r') as p: # This isnt' text, but I think the json.loads # that this calls just works? self.load_dfg_from_text(p) def load_dfg_from_text(self, text): - import json f = json.loads(text) self.nodes = {} self.node_names = [] @@ -97,7 +87,13 @@ def get_succs(self, node): succs.append(self.nodes[n]) return succs - # TODO -- fix this, because for a graph with multiple entry nodes, + def build_preds_lookup(self): + preds_lookup = {} + for n in self.node_names: + preds_lookup[n] = self.get_preds(self.nodes[n]) + return preds_lookup + + # TODO(jcw) -- fix this, because for a graph with multiple entry nodes, # this doesn't actually give the right answer :) # (should do in most cases) def bfs(self): @@ -106,6 +102,10 @@ def bfs(self): print(self.entry_points) seen = set() + # build a lookup based on the predecessors + # for each node. + preds_lookup = self.build_preds_lookup() + while len(to_explore) > 0: head = to_explore[0] to_explore = to_explore[1:] @@ -114,9 +114,30 @@ def bfs(self): seen.add(head) yield self.nodes[head] - # Get the following nodes. - following_nodes = self.adj[head] - to_explore += following_nodes + # Add the next batch of nodes that we have + # visited all the preds for if there are more + # nodes to explore. + if len(to_explore) == 0 and len(seen) != len(self.node_names): + for node_name in self.node_names: + if node_name in seen: + continue + else: + # Unseen --- have we seen all th preds? + failed = False + for p in preds_lookup[node_name]: + if p.name not in seen: + failed = True + if not failed: + to_explore.append(node_name) + if len(to_explore) == 0: # We added nothing despite trying + # to. + + # TODO(jcw) -- Fix this, as support for cyclical DFGs + # is important to be able to support loops with + # cross-loop dependencies. + print("Cyclical DFG --- Impossible to do a true BFS") + print("DFG is ", str(self)) + assert False # Generate a test DFG using the operations in # 'operations'. @@ -175,7 +196,7 @@ def generate_DFG(operations: List[Operation], size, seed=0): else: inputs.append(random.choice(nodes_list)) # If the node has no arguments, then we should add it - # as an entry point. --- todo --- should we just skip + # as an entry point. --- todo(jcw) --- should we just skip # this avoid creating graphs with too many constant loads? if operation.inputs == 0: entry_points.append(name) diff --git a/compiler_gym/envs/cgra/Driver.py b/compiler_gym/envs/cgra/Driver.py deleted file mode 100644 index 95374938c..000000000 --- a/compiler_gym/envs/cgra/Driver.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. -"""This script demonstrates how the Python example service without needing -to use the bazel build system. Usage: - - $ python example_compiler_gym_service/demo_without_bazel.py - -It is equivalent in behavior to the demo.py script in this directory. -""" -import logging -from pathlib import Path -from typing import Iterable - -import CGRA - -import gym - -from compiler_gym.datasets import Benchmark, Dataset -from compiler_gym.datasets.uri import BenchmarkUri -from compiler_gym.spaces import Reward -from compiler_gym.util.logging import init_logging -from compiler_gym.util.registration import register - -EXAMPLE_PY_SERVICE_BINARY: Path = Path( - "CompileCGRA.py" -) -assert EXAMPLE_PY_SERVICE_BINARY.is_file(), "Service script not found" - - -class ExampleDataset(Dataset): - def __init__(self, *args, **kwargs): - super().__init__( - name="benchmark://example-v0", - license="MIT", - description="An example dataset", - ) - self._benchmarks = { - "/foo": Benchmark.from_file_contents( - "benchmark://example-v0/foo", "Ir data".encode("utf-8") - ), - "/bar": Benchmark.from_file_contents( - "benchmark://example-v0/bar", "Ir data".encode("utf-8") - ), - } - - def benchmark_uris(self) -> Iterable[str]: - yield from (f"benchmark://example-v0{k}" for k in self._benchmarks.keys()) - - def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: - if uri.path in self._benchmarks: - return self._benchmarks[uri.path] - else: - raise LookupError("Unknown program name") - - -# Register the environment for use with gym.make(...). -register( - id="example-v0", - entry_point="compiler_gym.envs:CompilerEnv", - kwargs={ - "service": EXAMPLE_PY_SERVICE_BINARY, - "rewards": [RuntimeReward()], - "datasets": [ExampleDataset()], - }, -) - -def main(): - # Use debug verbosity to print out extra logging information. - init_logging(level=logging.DEBUG) - - # Create the environment using the regular gym.make(...) interface. - iteration = 0 - with gym.make("example-v0") as env: - env.reset() - done = False - while not done: - # Not 100% sure why this needs to go in an array, but it seems - # to complain about dimensionality errors due to an issue - # in compiler_gym --- maybe once there is a - action = env.action_space.sample() - print("Starting Iteration " + str(iteration)) - print ("Action is:") - print(action) - observation, reward, done, info = env.step(action, observation_spaces=["ir", "CurrentInstruction", "CurrentInstructionIndex", "II"], reward_spaces=["II"]) - print ("Got observation") - print (observation) - print ("Got reward") - print (reward) - if done: - env.reset() - print ("Overall reward is ", reward) - iteration += 1 - - -if __name__ == "__main__": - main() diff --git a/compiler_gym/envs/cgra/Model.py b/compiler_gym/envs/cgra/Model.py deleted file mode 100644 index ca262325d..000000000 --- a/compiler_gym/envs/cgra/Model.py +++ /dev/null @@ -1,2 +0,0 @@ -class ScheduleModel(object): - def \ No newline at end of file diff --git a/compiler_gym/envs/cgra/Operations.py b/compiler_gym/envs/cgra/Operations.py index c4de467bd..927a8f8c3 100644 --- a/compiler_gym/envs/cgra/Operations.py +++ b/compiler_gym/envs/cgra/Operations.py @@ -1,3 +1,7 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. class Operation(object): def __init__(self, name, inputs, outputs, latency): @@ -10,7 +14,7 @@ def __str__(self): return self.name Operations = [ - # TODO --- should we support more operations as heterogeneous? + # TODO(jcw) --- should we support more operations as heterogeneous? # IMO most of the other things that are scheduled are # pretty vacuous, although we could explore supporting those. # Operation is: name, inputs, outputs, cycles. @@ -44,7 +48,7 @@ def operation_index_of(op): return -1 def operation_latency(op): - # TODO --- model latency --- or at least expost this + # TODO(jcw) --- model latency --- or at least expost this # to a configuration. return op.latency diff --git a/compiler_gym/envs/cgra/architectures/BUILD b/compiler_gym/envs/cgra/architectures/BUILD new file mode 100644 index 000000000..24a222a6f --- /dev/null +++ b/compiler_gym/envs/cgra/architectures/BUILD @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +load("@rules_python//python:defs.bzl", "py_library") + +py_library( + name = "architectures", + srcs = [ + "__init__.py", + "CGRA.py" + ], + visibility = ["//visibility:public"] +) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/architectures/CGRA.py b/compiler_gym/envs/cgra/architectures/CGRA.py new file mode 100644 index 000000000..cedf9e02d --- /dev/null +++ b/compiler_gym/envs/cgra/architectures/CGRA.py @@ -0,0 +1,154 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List +from compiler_gym.envs.cgra.compile_settings import CGRACompileSettings + +""" +This is the most abstract representation of a CGRA ---- +a list of nodes, with an interconnect archtiecture. + +This can be inherited from to make things easier. +""" +class CGRA(object): + def __init__(self, nodes, noc): + self.nodes = nodes + self.noc = noc + self.dim = len(self.nodes) + + def __str__(self): + return "CGRA: (" + (str(self.nodes)) + " nodes)" + + def is_supported(self, node_index, op): + # TODO(jcw) -- support heterogeneity + return True + + def cells_as_list(self): + return self.nodes[:] + + def get_neighbour(self, direction, location_from): + return self.noc.get_neighbour(direction, location_from) + +class DataPath(object): + # Keeps track of a path through a NOC. + # Keeping track of the source node is important to allow + # the same bit of data to share the same path. + def __init__(self, source_node, start_cycle, path): + self.path = path + self.start_cycle = start_cycle + self.source_node = source_node + + def __len__(self): + return len(self.path) + + def __str__(self): + return str(self.path) + ": Starting at " + str(self.start_cycle) + ", Carrying results of " + str(self.source_node) + +# Abstract class for a NOC (network on chip). +class NOC(object): + def __init__(self): + pass + + def get_neighbour(self, direction, location): + assert False + + # Work out the shortest path from from_n to to_n in + # the current NoC. + def shortest_path(self, node, from_n, to_n) -> DataPath: + return self.shortest_available_path(0, node, from_n, to_n, None) + + def shortest_available_path(self, start_time, node, from_n, to_n, schedule) -> DataPath: + assert False # Abstract Class + +# A class representing a NOC (netowrk on chip). +class DictNOC(NOC): + def __init__(self, nodes, neighbours: Dict[str, List[str]]): + super().__init__() + # A list of all the nodes. + self.nodes = nodes + # A directed list of one-hop connections between nodes. + self.neighbours = neighbours + + # Returns the neighbour within a 3D space. I don't really + # know how best to set this up in reality ---- espc if a node + # doesn't really have e.g. an 'up' neighbour, but only a 'up and left at + # the same time neighbour'. The key constraint currently implemented + # here is that only six directions are supported (up, down, north, south, east + # west) + def get_neighbour(self, direction, location): + ns = self.neighbours[location] + index = None + # TODO(jcw) --- we need a better way of storing these + # so it isn't implicit in the connection --- this implies + # that everything that has a 'south' connection must + # also have a north connection. + if direction == 'north': + index = 0 + elif direction == 'south': + index = 1 + elif direction == 'east': + index = 2 + elif direction == 'west': + index = 3 + elif direction == 'up': + index = 4 + elif direction == 'down': + index = 5 + + if index is None: + print("Unknown index ", direction) + assert False + + if index < len(ns): + print("Returning a node ", len(ns)) + return ns[index] + else: + return None + + # Returns a DataPath object. + def shortest_available_path(self, start_time, source_dfg_node, from_n, to_n, schedule) -> DataPath: + # So we should obviously do this better. + # Just a hack-y BFS search. + seen = set() + # Keep track of node and path so far. + # Invariant: this is sorted by shortest + # path. + to_see = [(from_n, [])] + + while len(to_see) > 0: + n, path_to = to_see[0] + to_see = to_see[1:] + + if n == to_n: + # Found the path. By invariant, this is the shortest + # path. + return DataPath(source_dfg_node, start_time, path_to) + + nexts = self.neighbours[n] + for node in nexts: + if node in seen: + pass + else: + curr_time = start_time + len(path_to) + if schedule is not None: + if schedule.is_occupied(source_dfg_node, curr_time, (n, node)): + # Can't use this as a path if it's currently + # occupied. + # TODO(jcw) --- Add support for buffered delays. + # continue + if CGRACompileSettings['IntroduceRequiredDelays']: + continue + else: + if CGRACompileSettings['DebugShortestPath']: + print("Shortest Path failued due to occupied slot") + return None + # This is BFS, so everything must bewithin + # one hop of the current search. Therefore + # this is the longest one, and can go at the back. + to_see.append((node, path_to + [(source_dfg_node, n, node)])) + # No path between nodes. + if CGRACompileSettings['DebugShortestPath']: + print("Shortest Path failued due to no path found") + return None diff --git a/compiler_gym/envs/cgra/architectures/CMakeLists.txt b/compiler_gym/envs/cgra/architectures/CMakeLists.txt new file mode 100644 index 000000000..857c81bc3 --- /dev/null +++ b/compiler_gym/envs/cgra/architectures/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +cg_add_all_subdirs() + +cg_py_library( + NAME + architectures + SRCS + "__init__.py" + "CGRA.py" + PUBLIC +) diff --git a/compiler_gym/envs/cgra/architectures/__init__.py b/compiler_gym/envs/cgra/architectures/__init__.py new file mode 100644 index 000000000..7ac3f01f5 --- /dev/null +++ b/compiler_gym/envs/cgra/architectures/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. \ No newline at end of file diff --git a/compiler_gym/envs/cgra/cgra_rewards.py b/compiler_gym/envs/cgra/cgra_rewards.py index 3e3be6781..5e911ca7a 100644 --- a/compiler_gym/envs/cgra/cgra_rewards.py +++ b/compiler_gym/envs/cgra/cgra_rewards.py @@ -1,14 +1,19 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + from compiler_gym.spaces import Reward -class IntermediateIIReward(Reward): +class IntermediateInitializationIntervalReward(Reward): """An example reward that uses changes in the "runtime" observation value to compute incremental reward. """ def __init__(self): super().__init__( - name="II", - observation_spaces=["II"], + name="InitializationInterval", + observation_spaces=["InitializationInterval"], default_value=0, default_negates_returns=True, deterministic=True, @@ -23,7 +28,7 @@ def update(self, action, observations, observation_view): del action del observation_view - print("Computing Reward: got II of ", observations[0]) + print("Computing Reward: got InitializationInterval of ", observations[0]) if observations[0] is None: # If we just failed to generate a valid schedule all together, # return a punishment. Not 100% sure what this punishment should @@ -37,11 +42,11 @@ def update(self, action, observations, observation_view): For algorithms where a 'right' answer is quick to arrive at, the intermediate rewards are less important. """ -class FinalIIReward(Reward): +class FinalInitializationIntervalReward(Reward): def __init__(self): super().__init__( - name='II', - observation_spaces=['II', 'Done'], + name='InitializationInterval', + observation_spaces=['InitializationInterval', 'Done'], default_value=0, default_negates_returns=True, deterministic=True, @@ -55,7 +60,7 @@ def update(self, action, observations, observation_view): del action del observation_view - print ("Computing Reward: get II of ", observations[0]) + print ("Computing Reward: get InitializationInterval of ", observations[0]) print ("Got finished: ", observations[1]) if observations[0] is None: diff --git a/compiler_gym/envs/cgra/compile_settings.py b/compiler_gym/envs/cgra/compile_settings.py new file mode 100644 index 000000000..8ef350602 --- /dev/null +++ b/compiler_gym/envs/cgra/compile_settings.py @@ -0,0 +1,67 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +CGRACompileSettings = { + # When this is set, the scheduler will take schedules + # that don't account for delays appropriately, and try + # to stretch them out to account for delays correctly. + # When this is false, the compiler will just reject + # such invalid schedules. + # (when set, something like x + (y * z), scheduled + # as +: PE0 on cycle 0, *: PE1 on cycle 0 is valid). + "IntroduceRequiredDelays": False, + + # How much buffering to assume each PE has. (i.e., + # how many operands can be waiting at that node?) + # Set to 0 for inifinite buffering, which seems + # to be a fairly common assumption in literature, + # although is obviously bogus in real life. + "BufferLimits": 0, + + # The relative placement algorithm relies on an + # initial placement of the DFG ndoes. + # There are several options: + # random: uses truly random node placement. This + # seems to perform poorly sometimes as it relies + # on the agent to de-fuck the placement without + # intermediate rewards --- it is a challenging + # enfironment for an agent, although it seems to work + # OK for a GA approach. + # first_avail: This uses the first valid slot + # for every node ordering. Under the current + # scheme, this is guaranteed to work (I think) + # as we have support for infinite buffering --- however + # with infinite buffering disabled, this can walk itself + # into a hole. + "InitialPlacementMode": 'first_avail', + + # if there is more gap between operations than required to transmit + # the operands, should we buffer before or after the transmission? + # buffering after is more intuitive, but can lead to (rare) situations where + # the nth_avail assignment runs itself into a hole and can't generate + # an assignmnet. + # optionsare before_transmit and after_transmit. + "BufferingMode": "before_transmit", + + # These are debug flags. Done this way because various differnet + # frontends use this, and redefining all the flags seems + # like a pain in the ass. + "DebugGetInitializationInterval": True, # Debug the Schedule:get_InitializationInterval() function. + "DebugGetValidSlots": True, # Debug the InternalSchedule:get_valid_slots function + "DebugShortestPath": True # Debug the DictNOC:shortest_avaibale_path function +} + +# These are some settings for the relative placement algorith. +RelativePlacementSettings = { + # Allow swaps that cause invalid states. This may allow a clever agent + # to perform well, but for less clever agents can result in a lot + # of failed compilations. (e.g., for a GA agent, this should + # probably be true, as that can handle lots of wrong compilations) + 'AllowInvalidIntermediateSchedules': False, + + # Number of times to iterate over the placement algorithm. + # We do Iterations * DFG Nodes iterations. + 'Iterations': 100, +} \ No newline at end of file diff --git a/compiler_gym/envs/cgra/compiler_gym_test.py b/compiler_gym/envs/cgra/compiler_gym_test.py deleted file mode 100644 index 0cefdd412..000000000 --- a/compiler_gym/envs/cgra/compiler_gym_test.py +++ /dev/null @@ -1,30 +0,0 @@ -import compiler_gym - -from compiler_gym.service.proto import ( - ActionSpace, - Benchmark, - DoubleRange, - Event, - Int64Box, - Int64Tensor, - NamedDiscreteSpace, - ObservationSpace, - Space, - StringSpace -) - -env.reset(benchmark="benchmark://npb-v0/50") -episode_reward = 0 - -while len(nodes_to_schedule) > 0: - - observation, reward, done, info = env.step(env.action_space.sample()) - if done: - break - - episode_reward += reward - - print(f"Ste {i}, quality={episode_reward:.2%}") - -with compiler_gym.make("llvm-autophase-ic-v0") as env: - env.reset() \ No newline at end of file diff --git a/compiler_gym/envs/cgra/datasets/BUILD b/compiler_gym/envs/cgra/datasets/BUILD index 5f355c61b..028f1a03d 100644 --- a/compiler_gym/envs/cgra/datasets/BUILD +++ b/compiler_gym/envs/cgra/datasets/BUILD @@ -1,3 +1,8 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + load("@rules_python//python:defs.bzl", "py_library") py_library( diff --git a/compiler_gym/envs/cgra/datasets/CMakeLists.txt b/compiler_gym/envs/cgra/datasets/CMakeLists.txt new file mode 100644 index 000000000..1dde7f1a4 --- /dev/null +++ b/compiler_gym/envs/cgra/datasets/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +cg_add_all_subdirs() + +cg_py_library( + NAME + datasets + SRCS + "__init__.py" + "dfg_bench.py" + DEPS + compiler_gym::datasets::datasets + compiler_gym::service::proto::proto + compiler_gym::util::util + PUBLIC +) diff --git a/compiler_gym/envs/cgra/service/cgra_env.py b/compiler_gym/envs/cgra/service/cgra_env.py index 0dcf578db..1aec916a9 100644 --- a/compiler_gym/envs/cgra/service/cgra_env.py +++ b/compiler_gym/envs/cgra/service/cgra_env.py @@ -1,3 +1,8 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + import os import shutil from pathlib import Path @@ -16,7 +21,7 @@ from compiler_gym.spaces import Dict as DictSpace from compiler_gym.spaces import Scalar, Sequence -from compiler_gym.envs.cgra.cgra_rewards import IntermediateIIReward +from compiler_gym.envs.cgra.cgra_rewards import IntermediateInitializationIntervalReward class CgraEnv(ClientServiceCompilerEnv): def __init__(self, *args, datasets_site_path: Optional[Path] = None, benchmark: Optional[Union[str, Benchmark]] = None, datasets_set_path: Optional[Path] = None, **kwargs): @@ -25,14 +30,7 @@ def __init__(self, *args, datasets_site_path: Optional[Path] = None, benchmark: **kwargs, benchmark = benchmark or "dfg_10/1", datasets=get_cgra_datasets(site_data_base=datasets_site_path), - rewards=[IntermediateIIReward()] - , - derived_observation_spaces=[ - # { - # "id": "CurrentOperation", - # "base_id": - # } - ] + rewards=[IntermediateInitializationIntervalReward()] ) def reset(self, reward_space = OptionalArgumentValue.UNCHANGED, *args, **kwargs): @@ -40,12 +38,6 @@ def reset(self, reward_space = OptionalArgumentValue.UNCHANGED, *args, **kwargs) return observation - def make_benchmark( - self, inputs, copt, system_include: bool = True, timeout: int=600 - ): - # TOOD - return None - def render(self, mode="human"): if mode == "human": print("human-visible schedule") diff --git a/compiler_gym/envs/cgra/service/cgra_service.py b/compiler_gym/envs/cgra/service/cgra_service.py index f2231ea0b..522cfab75 100644 --- a/compiler_gym/envs/cgra/service/cgra_service.py +++ b/compiler_gym/envs/cgra/service/cgra_service.py @@ -1,15 +1,22 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + import logging from re import I import pickle from typing import Optional, Tuple, List, Dict, Set, Union from pathlib import Path +from compiler_gym.envs.cgra.compile_settings import CGRACompileSettings from compiler_gym.views import ObservationSpaceSpec from compiler_gym.spaces import Reward from compiler_gym.envs.llvm.llvm_rewards import CostFunctionReward from compiler_gym.service.client_service_compiler_env import ClientServiceCompilerEnv from compiler_gym.util.gym_type_hints import ObservationType, OptionalArgumentValue from compiler_gym.service import CompilationSession +from compiler_gym.envs.cgra.architectures.CGRA import CGRA, NOC, DictNOC, DataPath from compiler_gym.util.commands import run_command from compiler_gym.service.proto import ( ActionSpace, @@ -32,47 +39,15 @@ from compiler_gym.service.proto.compiler_gym_service_pb2 import Int64SequenceSpace #from compiler_gym.service.runtime import create_and_run_compiler_gym_service -CompileSettings = { - # When this is set, the scheduler will take schedules - # that don't account for delays appropriately, and try - # to stretch them out to account for delays correctly. - # When this is false, the compiler will just reject - # such invalid schedules. - # (when set, something like x + (y * z), scheduled - # as +: PE0 on cycle 0, *: PE1 on cycle 0 is valid). - "IntroduceRequiredDelays": False -} - def load_CGRA(self, file): - # TODO -- properly load CGRA + # TODO(jcw) -- properly load CGRA return CGRA(5, 5) def load_NOC(self, file): - # TODO -- properly load NOC. + # TODO(jcw) -- properly load NOC (network on chip). # Initialize to a straight-line NOC - return NOC([(x, x + 1) for x in range(5)]) - -class CGRA(object): - # Assume a rectangular matrix with neighbour - # connections. We can handle the rest later. - def __init__(self, nodes, noc): - self.nodes = nodes - self.noc = noc - self.dim = len(self.nodes) - - def __str__(self): - return "CGRA: (" + (str(self.nodes)) + " nodes)" - - def is_supported(node_index, op): - # TODO -- support heterogeneity - return True - - def cells_as_list(self): - return self.nodes[:] - - def get_neighbour(self, direction, location_from): - return self.noc.get_neighbour(direction, location_from) + return DictNOC([(x, x + 1) for x in range(5)]) # This is just a wrapper around an actual object that # is a schedule --- see the Schedule object for something @@ -83,6 +58,120 @@ def __init__(self, cgra, dfg): self.cgra = cgra self.operations = self.initialize_schedule() + # Returns a complicated 4-tuple. + # (Timeslot, Location, Path Requirements, BufferRequirements) + # Path Requirements is a List[DataPath] + # BufferRequirements is List[(Start Time, End Time)] with + # the buffering location implicitly at the Location. + def get_valid_slots(self, dependencies, latency, noc_schedule, buffer_schedule): + # First, get the finished location of each of the dependencies. + min_time = 0 + dep_locations = [] + for dep in dependencies: + time, location = self.get_location(dep) + # The operation must be scheduled --- it's a dependnecy! (support for loops needed.) + if time is None: + print ("Operation dependency", dep, " has not been scheduled (cross-dependencies not supported).") + assert False + latency = self.get_latency(time, location) + + min_time = max(min_time, time + latency) + dep_locations.append((dep, time, location, latency)) + + # Starting from the min time, iterate over every unoccupied + # tile and see if it's reachable. If it is both unoccupied + # and reachable by all the deps, then we can use it. + t = min_time + # Keep track of the number of tries we've had without + # finding a valid slot. We fail if this gets too high--- + # the point is just to make this easier to debug, although + # perhaps some algorithms could make better use of this. + tries_since_slot_found = 0 + while True: + if tries_since_slot_found > 1000: + print("It has been more than 1000 slots looked at since we found a valid slot --- likely in an infinite loop.") + assert False + while t >= len(self.operations): + # Make sure we aren't past the end of the schedule. + self.operations.append(self.add_timestep()) + + for loc in range(len(self.operations[t])): + # Check first if this is occupied: + if CGRACompileSettings['DebugGetValidSlots']: + print("Searchign location ", loc, "at time", t) + is_free = self.slots_are_free(loc, t, t + latency) + if not is_free: + if CGRACompileSettings['DebugGetValidSlots']: + print("Location was not free.") + # The latency compute step is expensive, so skip + # if possible. + continue + + # Now, check to see if all of the operands can reach + # this --- note that we can't have them sharing routing + # resources, so we have to keep track of what we are using + # here. + used_resources = [] + arrival_times = [] + # We operate on a cloned copy becase we haven't actually + # scheduled the op yet --- just trying to look + # for valid locations! + noc_schedule_clone = noc_schedule.clone() + buffer_schedule_clone = buffer_schedule.clone() + earliest_execution_time = t + + failed = False + # Keep track fo the resrouces that are getting reserved + # within these clones so they can be + # returned and updated in a scheduling state. + paths = [] + buffer_slots = [] + + for (dep, dep_time, dep_loc, dep_latency) in dep_locations: + if CGRACompileSettings['DebugGetValidSlots']: + print("Checking dependency from ", dep_time, "and location", dep_loc) + if CGRACompileSettings['BufferingMode'] == 'before_transmit': + pass + else: + pass + + finish_time = dep_time + dep_latency + path = self.cgra.noc.shortest_available_path(finish_time, dep, dep_loc, loc, noc_schedule_clone) + if path is None: + if CGRACompileSettings['DebugGetValidSlots']: + print("Path was not free.") + # Couldn't schedule the node here! + failed = True + break + else: + arrival_times.append(finish_time + len(path)) + # Reserve the routing resources in the NOC clone. + noc_schedule_clone.occupy_path(path) + paths.append(path) # Keep track of the paths this requies. + earliest_execution_time = max(earliest_execution_time, finish_time + len(path)) + for arrival_time in arrival_times: + # TODO(jcw) --- note that if the problem is that the buffers + # get full, it's unlikely that dealying further will solve + # the problem. Not 100% sure what the actual solution to + # this will be. + reserved = buffer_schedule_clone.occupy_buffer(loc, arrival_time, earliest_execution_time) + if CGRACompileSettings['DebugGetValidSlots']: + print("Trying to reserve buffering space from arrival time ", arrival_time, "...") + print("Reserved:", reserved) + buffer_slots.append((arrival_time, earliest_execution_time)) + if not reserved: + # Not enough buffering + failed = True + break + if not failed: + # We were able to route everything tof this possible placement. + tries_since_slot_found = 0 + yield t, loc, paths, buffer_slots + else: + tries_since_slot_found += 1 + + t += 1 + # Returns a fixed-length tensor for this schedule. # It focuses on the last few cycles. def to_rlmap_tensor(self, node, time_window_size=1): @@ -260,121 +349,75 @@ def alloc_times(self, x): else: free_before = True +class BufferSchedule(object): + def __init__(self): + self.schedule = [] + + def clone(self): + new_sched = BufferSchedule() + for bufs in self.schedule: + new_sched.schedule.append(dict(bufs)) + return new_sched + + def occupy_buffer(self, loc, from_time, to_time): + for t in range(from_time, to_time + 1): + while t >= len(self.schedule): + self.schedule.append({}) + + if loc in self.schedule[t]: + self.schedule[t][loc] += 1 + max_buf = CGRACompileSettings['BufferLimits'] + # if the max buffering is set to 0 or -ve, assume + # infinite buffering. + if max_buf > 0 and self.schedule[t][loc] > max_buf: + return False + else: + self.schedule[t][loc] = 1 + + return True + class NOCSchedule(object): def __init__(self): self.schedule = [] - def occupy_path(self, start_cycle, path): - for hop in path: - self.occupy_connection(start_cycle, hop) + def clone(self): + # Return a deep copy of this schedule. + new_schedule = NOCSchedule() + for conns in self.schedule: + new_schedule.schedule.append(dict(conns)) + return new_schedule + + def occupy_path(self, path: DataPath): + start_cycle = path.start_cycle + for hop in path.path: + self.occupy_connection(path.source_node, start_cycle, hop) start_cycle += 1 - def occupy_connection(self, time, connection): + def occupy_connection(self, node: Node, time: int, connection): while time >= len(self.schedule): - self.schedule.append(set()) + self.schedule.append({}) - if connection in self.schedule[time]: + if connection in self.schedule[time] and self.schedule[time][connection] != node.name: # Can't occuoy an already occupied connection. + print("Tried to occupy connection already occupied by ", self.schedule[time][connection], "with node", node.name) assert False else: - self.schedule[time].add(connection) + self.schedule[time][connection] = node.name - def is_occupied(self, time, hop): + def is_occupied(self, source_dfg_node: Node, time, hop): if time >= len(self.schedule): # Not occupied if beyond current suecule return False else: - return hop in self.schedule[time] - -# A class representing a NOC. -class NOC(object): - def __init__(self, nodes, neighbours: Dict[str, List[str]]): - # A list of all the nodes. - self.nodes = nodes - # A directed list of one-hop connections between nodes. - self.neighbours = neighbours - - # Returns the neighbour within a 3D space. I don't really - # know how best to set this up in reality ---- espc if a node - # doesn't really have e.g. an 'up' neighbour, but only a 'up and left at - # the same time neighbour'. The key constraint currently implemented - # here is that only six directions are supported (up, down, north, south, east - # west) - def get_neighbour(self, direction, location): - ns = self.neighbours[location] - index = None - # TODO --- we need a better way of storing these - # so it isn't implicit in the connection --- this implies - # that everything that has a 'south' connection must - # also have a north connection. - if direction == 'north': - index = 0 - elif direction == 'south': - index = 1 - elif direction == 'east': - index = 2 - elif direction == 'west': - index = 3 - elif direction == 'up': - index = 4 - elif direction == 'down': - index = 5 - - if index is None: - print("Unknown index ", direction) - assert False - - if index < len(ns): - print("Returning a node ", len(ns)) - return ns[index] - else: - return None - - # Work out the shortest path from from_n to to_n in - # the current NoC. - def shortest_path(self, from_n, to_n): - return self.shortest_available_path(0, from_n, to_n, None) - - def shortest_available_path(self, start_time, from_n, to_n, schedule): - # So we should obviously do this better. - # Just a hack-y BFS search. - seen = set() - # Keep track of node and path so far. - # Invariant: this is sorted by shortest - # path. - to_see = [(from_n, [])] - - while len(to_see) > 0: - n, path_to = to_see[0] - to_see = to_see[1:] - - if n == to_n: - # Found the path. By invariant, this is the shortest - # path. - return path_to - - nexts = self.neighbours[n] - for node in nexts: - if node in seen: - pass - else: - curr_time = start_time + len(path_to) - if schedule is not None: - if schedule.is_occupied(curr_time, (n, node)): - # Can't use this as a path if it's currently - # occupied. - # TODO --- Add support for buffered delays. - # continue - if CompileSettings['IntroduceRequiredDelays']: - continue - else: - return None - # This is BFS, so everything must bewithin - # one hop of the current search. Therefore - # this is the longest one, and can go at the back. - to_see.append((node, path_to + [(n, node)])) - # No path between nodes. - return None + if hop in self.schedule[time]: + if self.schedule[time] == source_dfg_node.name: + return False # Technically ocupied, but can be + # shared. + print("Slot is occupired with node ", self.schedule[time].name) + print("Looking to use it for node ", source_dfg_node.name) + return True + else: + return False class Schedule(object): def __init__(self, cgra, dfg): @@ -397,7 +440,7 @@ def to_rlmap_tensor(self, node, time_window_size=1): # of operations on nodes. return self.operations.to_rlmap_tensor(node, time_window_size=time_window_size) - def swap(self, origin_time, origin_index, target_time, target_index): + def swap(self, origin_time, origin_index, target_time, target_index, dfg, allow_invalid=True): # This is a slightly non-trivial function since operations may have non-one # latency. We treat swap-points as the starting-points of the operation --- # if the target point is in the middle of another operation, we choose to @@ -415,14 +458,34 @@ def swap(self, origin_time, origin_index, target_time, target_index): target_window_is_clear = self.operations.set_operation(target_time, target_index, operation_node, operation_node.operation.latency) # Now do the swap of operations - if target_window_is_clear: - print("Doing swap between ", origin_index, 'at', origin_time, 'to', target_index, 'at', target_time, 'with latency', op_latency) + if target_window_is_clear and target_time >= 0: # Dont' swap into past! + print("Doing swap between ", origin_index, 'at', origin_time, 'to', target_index, 'at', target_time, 'with latency', op_latency, "(invalid swaps is allowed is ", allow_invalid, ")") + InitializationInterval, _ = self.get_InitializationInterval(dfg) + assert InitializationInterval is not None #We are tryign to preserve this invariant through the scheduling. self.operations.set_operation(target_time, target_index, operation_node, op_latency) self.operations.clear_operation(origin_time, origin_index, op_latency) + if not allow_invalid: + # Check that this produced a valid schedule. + # TODO --- make this check more efficient -- we don't have + # to recompute the whole InitializationInterval. + InitializationInterval, _ = self.get_InitializationInterval(dfg) + if InitializationInterval is None: + print("Undo the swap!") + # Undo the swap + self.operations.set_operation(origin_time, origin_index, operation_node, op_latency) + self.operations.clear_operation(target_time, target_index, op_latency) + InitializationInterval, _ = self.get_InitializationInterval(dfg) + assert InitializationInterval is not None + return False return True else: return False + # This returns an iterator that iterates over possible + # valid slots for an operation. This allows for things like + # random placement. + def get_valid_slots(self, dependencies, latency, noc_schedule, buffer_schedule): + return self.operations.get_valid_slots(dependencies, latency, noc_schedule, buffer_schedule) def clear_operation(self, time, index, latency): self.operations.clear_operation(time, index, latency) @@ -436,7 +499,7 @@ def compute_and_reserve_communication_distance(self, cycle, n1, n2, noc_schedule n2_t, n2_loc = self.get_location(n2) # TODO -- a sanity-check that cycle is after this might be a good idea. - path = self.cgra.noc.shortest_available_path(cycle, n1_loc, n2_loc, noc_schedule) + path = self.cgra.noc.shortest_available_path(cycle, n1, n1_loc, n2_loc, noc_schedule) if path is None: # TODO --- we should probably punish the agent a lot here @@ -444,13 +507,13 @@ def compute_and_reserve_communication_distance(self, cycle, n1, n2, noc_schedule print("Schedule has not valid path between ", n1_loc, "and", n2_loc, "at time", cycle) return None else: - noc_schedule.occupy_path(cycle, path) + noc_schedule.occupy_path(path) # I think we don't need the whole path? Not too sure though. return len(path) - def get_II(self, dfg): - # Compute the II of the current schedule. + def get_InitializationInterval(self, dfg): + # Compute the InitializationInterval of the current schedule. # We don't require the placement part to be actually correct --- # do the actual schedule what we generate can differ @@ -459,6 +522,7 @@ def get_II(self, dfg): noc_schedule = NOCSchedule() # The NOC schedule is recomputed # every time because it is dependent on the actual # schedule. + buffer_schedule = BufferSchedule() # What cycle does this node get executed on? cycles_start = {} @@ -492,20 +556,26 @@ def get_II(self, dfg): # This is not a complete operation continue - print("Looking at node ", node) - print("Has preds ", [str(p) for p in preds]) + if CGRACompileSettings['DebugGetInitializationInterval']: + print("Looking at node ", node) + print("Has preds ", [str(p) for p in preds]) + + arrival_times = [] for pred in preds: if pred.name not in cycles_end: finished = False continue pred_cycle = cycles_end[pred.name] - print ("Have pred that finishes at cycle", pred_cycle) + if CGRACompileSettings['DebugGetInitializationInterval']: + print ("Have pred that finishes at cycle", pred_cycle) # Compute the time to this node, and # reserve those paths on the NoC. distance = self.compute_and_reserve_communication_distance(pred_cycle, pred, node, noc_schedule) + if CGRACompileSettings['DebugGetInitializationInterval']: + print("Failed due to distance not working", distance) if distance is None: # This schedule isn't possible due to conflicting memory requirements. return None, False @@ -513,9 +583,20 @@ def get_II(self, dfg): # Compute when this predecessor reaches this node: arrival_time = distance + pred_cycle earliest_time = max(earliest_time, arrival_time) + arrival_times.append(arrival_time) + + # Setup the buffering requirements: + for arr_time in arrival_times: + ntim, nloc = self.get_location(node) + reserved = buffer_schedule.occupy_buffer(nloc, arrival_time, earliest_time) + if not reserved: + # This schedule isn't possible due to buffering requirements. + # TODO --- can we delay computation to get the buffering + # satified? + return None, False - # TODO --- compute a penalty based on the gap between - # operations to account for buffering. + # TODO --- compute a penalty based on the gap between + # operations to account for buffering. # Check that the PE is actually free at this time --- if it # isn't, push the operation back. @@ -536,13 +617,14 @@ def get_II(self, dfg): cycles_start[node.name] = free_time cycles_end[node.name] = free_time + operation_latency(node.operation) - print ("Node ", node.name, "has earliest time", earliest_time) + if CGRACompileSettings['DebugGetInitializationInterval']: + print("Node ", node.name, "has earliest time", earliest_time) # Now that we've done that, we need to go through all the nodes and - # work out the II. + # work out the InitializationInterval. # When was this computation slot last used? (i.e. when could # we overlap the next iteration?) - min_II = 0 + min_InitializationInterval = 0 for loc in actual_schedule.locations(): # Now, we could achieve better performance # by overlapping these in a more fine-grained @@ -556,12 +638,13 @@ def get_II(self, dfg): first_alloc = min(actual_schedule.alloc_times(loc)) difference = last_free - first_alloc - print ("Diff at loc", loc, "is", difference) - min_II = max(min_II, difference) + if CGRACompileSettings['DebugGetInitializationInterval']: + print ("Diff at loc", loc, "is", difference) + min_InitializationInterval = max(min_InitializationInterval, difference) # TODO --- we should probably return some kind of object # that would enable final compilation also. - return min_II, finished + return min_InitializationInterval, finished # Create a dummy CGRA that is a bunch of PEs in a row with neighbor-wise communciations nodes = [1, 2, 3, 4] @@ -571,7 +654,7 @@ def get_II(self, dfg): neighbours_dict[0] = [n + 1] neighbours_dict[len(nodes)] = [n - 1] -compilation_session_noc = NOC(nodes, neighbours_dict) +compilation_session_noc = DictNOC(nodes, neighbours_dict) compilation_session_cgra = CGRA(nodes, compilation_session_noc) action_space = [ActionSpace(name="Schedule", @@ -626,7 +709,7 @@ def get_II(self, dfg): space=Space( int64_value=Int64Range(min=0, max=MAX_WINDOW_SIZE) )), - ObservationSpace(name="II", + ObservationSpace(name="InitializationInterval", space=Space( int64_value=Int64Range(min=0) )), @@ -731,10 +814,10 @@ def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool latency = operation_latency(node.operation) op_set = self.schedule.set_operation(self.time, response - 1, node, latency) - # Check that the II still exists: - II, finished = self.schedule.get_II(self.dfg) - has_II = II is not None - if not has_II: + # Check that the InitializationInterval still exists: + InitializationInterval, finished = self.schedule.get_InitializationInterval(self.dfg) + has_InitializationInterval = InitializationInterval is not None + if not has_InitializationInterval: # Unset that operation: print("Setting operation resulted in failed DFG mapping") print(self.schedule) @@ -742,14 +825,14 @@ def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool print("After clearning, have") print(self.schedule) op_set = False # Need to punish. - new_II, _ = self.schedule.get_II(self.dfg) - assert (new_II is not None) # This should not + new_InitializationInterval, _ = self.schedule.get_InitializationInterval(self.dfg) + assert (new_InitializationInterval is not None) # This should not # be non-existent after un-scheduling. if op_set: had_effect = True print("Scheduled operation", str(self.node_order[self.current_operation_index])) - print("Got an II of ", II) + print("Got an InitializationInterval of ", InitializationInterval) self.current_operation_index += 1 elif response == 0: self.time += 1 @@ -787,11 +870,11 @@ def get_observation(self, observation_space: ObservationSpace) -> Event: elif observation_space.name == "CurrentInstructionIndex": # Return a way to localize the instruction within the graph. return Event(int64_value=self.current_operation_index) - elif observation_space.name == "II": - print("Computing II for schedule:") + elif observation_space.name == "InitializationInterval": + print("Computing InitializationInterval for schedule:") print(self.schedule) - ii, finished = self.schedule.get_II(self.dfg) - print("Got II", ii) + ii, finished = self.schedule.get_InitializationInterval(self.dfg) + print("Got InitializationInterval", ii) print ("Finished is ", finished) return Event(int64_value=ii) elif observation_space.name == "RLMapObservations": diff --git a/compiler_gym/envs/cgra/service/compiler_gym-cgra-service b/compiler_gym/envs/cgra/service/compiler_gym-cgra-service index 06db57f41..e24392676 100755 --- a/compiler_gym/envs/cgra/service/compiler_gym-cgra-service +++ b/compiler_gym/envs/cgra/service/compiler_gym-cgra-service @@ -1,4 +1,8 @@ #!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. import os import traceback @@ -7,7 +11,7 @@ from compiler_gym.envs.cgra.service.cgra_service import make_cgra_compilation_se from compiler_gym.service.runtime import create_and_run_compiler_gym_service if __name__ == "__main__": - # TODO --- load a CGRA description? + # TODO(jcw) --- load a CGRA description? try: create_and_run_compiler_gym_service(make_cgra_compilation_session()) except: diff --git a/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service b/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service index 54300e6ef..756e79ab0 100755 --- a/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service +++ b/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service @@ -1,4 +1,8 @@ #!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. import os import traceback diff --git a/compiler_gym/envs/cgra/service/relative_cgra_env.py b/compiler_gym/envs/cgra/service/relative_cgra_env.py index 6387f7ed9..a6c325c48 100644 --- a/compiler_gym/envs/cgra/service/relative_cgra_env.py +++ b/compiler_gym/envs/cgra/service/relative_cgra_env.py @@ -1,4 +1,7 @@ - +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. from compiler_gym.util.gym_type_hints import OptionalArgumentValue from compiler_gym.service.client_service_compiler_env import ClientServiceCompilerEnv @@ -7,14 +10,14 @@ from compiler_gym.datasets import Benchmark from compiler_gym.envs.cgra.datasets import get_cgra_datasets -from compiler_gym.envs.cgra.cgra_rewards import IntermediateIIReward, FinalIIReward +from compiler_gym.envs.cgra.cgra_rewards import IntermediateInitializationIntervalReward, FinalInitializationIntervalReward class RelativeCgraEnv(ClientServiceCompilerEnv): def __init__(self, *args, punish_intermediate: bool = True, datasets_site_path: Optional[Path] = None, benchmark: Optional[Union[str, Benchmark]], **kwargs): if punish_intermediate: - reward = IntermediateIIReward() + reward = IntermediateInitializationIntervalReward() else: - reward = FinalIIReward() + reward = FinalInitializationIntervalReward() super().__init__( *args, **kwargs, diff --git a/compiler_gym/envs/cgra/service/relative_placement_service.py b/compiler_gym/envs/cgra/service/relative_placement_service.py index 0314d16f0..0344bfe82 100644 --- a/compiler_gym/envs/cgra/service/relative_placement_service.py +++ b/compiler_gym/envs/cgra/service/relative_placement_service.py @@ -1,6 +1,13 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + from pathlib import Path from typing import Tuple, Optional, Union, List -from compiler_gym.envs.cgra.service.cgra_service import CGRASession, observation_space, Schedule, CGRA, relative_placement_directions +from compiler_gym.envs.cgra.service.cgra_service import BufferSchedule, CGRASession, NOCSchedule, observation_space, Schedule, CGRA, relative_placement_directions +from compiler_gym.envs.cgra.compile_settings import CGRACompileSettings, RelativePlacementSettings +import random from compiler_gym.spaces import Reward import traceback from compiler_gym.service import CompilationSession @@ -48,7 +55,7 @@ def __init__(self, working_directory: Path, action_space: ActionSpace, benchmark # after every reset, it is important for some (e.g. genetic algorithms) self.dfg = pickle.loads(benchmark.program.contents) print("Loaded DFG " + str(self.dfg)) - # TODO --- support better seeds. + # TODO(jcw) --- support better seeds. self.schedule = Schedule(self.cgra, self.dfg) self.initial_placement = self.get_initial_placement(self.dfg, 0) @@ -85,40 +92,103 @@ def reset(self, raise e def get_initial_placement(self, dfg, seed): - try: - # For now, just place the nodes in order on the CGRA. - # Iterate through the PEs, and then increment the clock cycle - # if we can't place. - pe_ind = 0 - time = 0 - max_pe = self.cgra.cells_as_list() - nodes = dfg.bfs() - iterating = True - was_set = True - while iterating: - # only move to next node if we properly set the operation last time. - if was_set: - n = next(nodes, None) - if n is None: - # Finished scheduling! - iterating = False - continue - was_set = self.schedule.set_operation(time, pe_ind, n, n.operation.latency) - if was_set: - print("Set initial placement for node", str(n)) - print("Position is ", self.schedule.get_location(n)) - - # TODO -- should we check that this produces a schedule with an II? - # Aim is to start with a very spread-out schedule that should just work --- - # let the SA algorithm compress it, rather than trying to make - # the SA algorithm find a valid schedule. - pe_ind += 1 - time += n.operation.latency - if pe_ind >= len(max_pe): - pe_ind = 0 - except Exception as e: - print(traceback.format_exc()) - raise e + mode = CGRACompileSettings['InitialPlacementMode'] + if mode == 'random': + self.get_initial_placement_random(dfg, seed) + elif mode == 'first_avail': + self.get_initial_placement_nth_avail(dfg, seed, 1) + elif mode == 'second_avail': + # First avail results in compressed sequences. + # second avail spreads things out better? + self.get_initial_placement_nth_avail(dfg, seed, 2) + elif mode == 'lee2021': + self.get_initial_placement_linear(dfg, seed) + + def get_initial_placement_nth_avail(self, dfg, seed, n): + nodes = dfg.bfs() + noc_schedule = NOCSchedule() + buffer_schedule = BufferSchedule() + + for node in nodes: + # Take the first possible placmenent + dependences = dfg.get_preds(node) + lat = node.operation.latency + poss_placements = self.schedule.get_valid_slots(dependences, lat, noc_schedule, buffer_schedule) + i = n + t_placement, loc = None, None + while i > 0: + t_placement, loc, required_paths, required_buffer_placements = next(poss_placements) + i -= 1 + self.schedule.set_operation(t_placement, loc, node, node.operation.latency) + for path in required_paths: + noc_schedule.occupy_path(path) + for from_time, to_time in required_buffer_placements: + buffer_schedule.occupy_buffer(loc, from_time, to_time) + + # When using the first_avail placement, it should result + # in a valid schedule to start with. + initial_InitializationInterval, _ = self.schedule.get_InitializationInterval(dfg) + print("After initial placement (mode, first_avail), got InitializationInterval", initial_InitializationInterval) + assert initial_InitializationInterval is not None #should be a valid schedule. + + + # Do a random initial placment --- requires + # extensively smart agents to then go and correct this. + def get_initial_placement_random(self, dfg, seed): + max_pe = self.cgra.cells_as_list() + nodes = dfg.bfs() + time = 0 + # RODO -- setup seed. + + was_set = False + iterating = True + while iterating: + if was_set: + n = next(nodes, None) + if n is None: + iterating = False + continue + else: + # Try at new time + time += 1 + pe_ind = random.randomint(0, len(max_pe) - 1) + was_set = self.schedule.set_operation(time, pe_ind, n, n.operation.latency) + + # This is like a crappy approxiation of Lee 2021 DAC (Crappy + # because it's not guaranteed to give you the right thing.) + # It's also not quite that --- because that was truly on + # he diagonal, while this is using a zig-zag approach. + def get_initial_placement_linear(self, dfg, seed): + # For now, just place the nodes in order on the CGRA. + # Iterate through the PEs, and then increment the clock cycle + # if we can't place. + pe_ind = 0 + time = 0 + max_pe = self.cgra.cells_as_list() + nodes = dfg.bfs() + iterating = True + was_set = True + while iterating: + # only move to next node if we properly set the operation last time. + if was_set: + n = next(nodes, None) + if n is None: + # Finished scheduling! + iterating = False + continue + was_set = self.schedule.set_operation(time, pe_ind, n, n.operation.latency) + if was_set: + print("Set initial placement for node", str(n)) + print("Position is ", self.schedule.get_location(n)) + + # TODO -- should we check that this produces a schedule with an InitializationInterval? + # Aim is to start with a very spread-out schedule that should just work --- + # let the SA algorithm compress it, rather than trying to make + # the SA algorithm find a valid schedule. + pe_ind += 1 + time += n.operation.latency + if pe_ind >= len(max_pe): + pe_ind = 0 def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool]: try: @@ -149,9 +219,10 @@ def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool else: new_location = self.cgra.get_neighbour(action_to_do, current_location) + print("Before swap, InitializationInterval is ", self.schedule.get_InitializationInterval(self.dfg), "iteration is ", self.iteration_number) if new_location is not None: print("Swapping between", current_location, 'and', new_location) - swapped = self.schedule.swap(current_time, current_location, new_time, new_location) + swapped = self.schedule.swap(current_time, current_location, new_time, new_location, self.dfg, allow_invalid=RelativePlacementSettings['AllowInvalidIntermediateSchedules']) else: # If the new location is none, that means that we picked a direction # that is invalid (ie. doesn't exist for the node in question). To make @@ -167,6 +238,7 @@ def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool print("After iteration, schedule is ", self.schedule) print("Swapped is ", swapped) + print("InitializationInterval is ", self.schedule.get_InitializationInterval(self.dfg)) return False, None, swapped except Exception as e: @@ -176,8 +248,8 @@ def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool def get_observation(self, observation_space: ObservationSpace) -> Event: try: result = super().get_observation(observation_space=observation_space) - if observation_space.name == 'II': - ii, finished = self.schedule.get_II(self.dfg) + if observation_space.name == 'InitializationInterval': + ii, finished = self.schedule.get_InitializationInterval(self.dfg) if not finished: # The RLLib library can't handle nones, so # Just return a large punishment if this fails diff --git a/examples/cgra/.gitignore b/examples/cgra/.gitignore index 302961024..11584e79a 100644 --- a/examples/cgra/.gitignore +++ b/examples/cgra/.gitignore @@ -1,2 +1,3 @@ -rewards.png ga_output +relative_placement_output/out +relative_placement_output/rp_data \ No newline at end of file diff --git a/examples/cgra/ga_scripts/ga_score_extractor.sh b/examples/cgra/ga_scripts/ga_score_extractor.sh index f4bb070cc..ebea2697b 100755 --- a/examples/cgra/ga_scripts/ga_score_extractor.sh +++ b/examples/cgra/ga_scripts/ga_score_extractor.sh @@ -12,7 +12,7 @@ while [[ $# -gt 0 ]]; do input=$1 shift - # Get the second to last line, which has the II for the graph. + # Get the second to last line, which has the InitializationInterval for the graph. ii=$(tail -n 2 $input | head -n 1 | cut -f 7 -d' ' ) echo "$ii, " >> $output diff --git a/examples/cgra/ga_scripts/plot.sh b/examples/cgra/ga_scripts/plot.sh new file mode 100755 index 000000000..2101bcd82 --- /dev/null +++ b/examples/cgra/ga_scripts/plot.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python plot_CDFs.py ga_output GA ../relative_placement_output/rp_data RLMap diff --git a/examples/cgra/ga_scripts/plot_CDFs.py b/examples/cgra/ga_scripts/plot_CDFs.py index 275495999..c2159c236 100644 --- a/examples/cgra/ga_scripts/plot_CDFs.py +++ b/examples/cgra/ga_scripts/plot_CDFs.py @@ -10,7 +10,7 @@ def load_cdf_from_file(f): for item in lines.split(','): if item.strip(): try: - data.append(int(item.strip())) + data.append(abs(int(item.strip()))) except: # Plenty of reasons this could fail -- mostly due to @@ -36,7 +36,7 @@ def compute_cdf(data): return x_points, cdf -def plot_datas(datas): +def plot_datas(datas, names): # First, compute the CDF from the raw data cdfs = [] xvals = [] @@ -47,21 +47,31 @@ def plot_datas(datas): xvs_max = 0 for i in range(len(cdfs)): - plt.plot(xvals[i], cdfs[i]) + plt.plot(xvals[i], cdfs[i], label=names[i]) xvs_max = max(max(xvals[i]), xvs_max) plt.ylim([0.0, 1.0]) plt.xlim([0, xvs_max]) + plt.ylabel('CDF') + plt.xlabel('InitializationInterval') + plt.legend() plt.savefig('cdfs.png') if __name__ == "__main__": parser = argparse.ArgumentParser() + # Alternate between files and names. parser.add_argument('files', nargs='+') args = parser.parse_args() datas = [] + names = [] + name = False for file in args.files: - data = load_cdf_from_file(file) - datas.append(data) + if name: + names.append(file) + else: + data = load_cdf_from_file(file) + datas.append(data) + name = not name - plot_datas(datas) + plot_datas(datas, names) diff --git a/examples/cgra/relative_placement_model.py b/examples/cgra/relative_placement_model.py index 7cb22545a..415974724 100644 --- a/examples/cgra/relative_placement_model.py +++ b/examples/cgra/relative_placement_model.py @@ -3,7 +3,6 @@ from compiler_gym.envs.compiler_env import CompilerEnv from compiler_gym.wrappers import TimeLimit import compiler_gym -import model from ray import tune from ray.rllib.agents.ppo import PPOTrainer import ray @@ -11,18 +10,11 @@ from itertools import islice import argparse -class RelativePlacementModel(model.Model): - def __init__(self): - super().__init__() - - def get_action(observations): - return super().get_action() - def make_env() -> compiler_gym.envs.CompilerEnv: env = compiler_gym.make( "relative-cgra-v0", observation_space="RLMapObservations", - reward_space="II", + reward_space="InitializationInterval", action_space="move", benchmark='dfg_10/0' # I think this gets overwritten in the running loop. ) @@ -45,7 +37,7 @@ def run_agent_on_benchmarks(bmarks): action = int(agent.compute_action(observation)) print(type(action)) observation, reward, done, _ = env.step(action) - # Just append the last reward, because that is the II. (or a large -ve noting + # Just append the last reward, because that is the InitializationInterval. (or a large -ve noting # failure) rewards.append(reward) print ("Exectuted ", i, "th benchmark of", len(bmarks)) @@ -55,12 +47,14 @@ def run_agent_on_benchmarks(bmarks): parser = argparse.ArgumentParser(description="Run a recreation of the RLMap tool.") parser.add_argument('--train', dest='train', default=False, action='store_true') parser.add_argument('--test', dest='test', default=None) + parser.add_argument('--number', dest='number', default=50, help='Number of benchmarks to run test on.', type=int) + parser.add_argument('--train-size', dest='train_size', default=500, help='Number of benchmarks to train on', type=int) args = parser.parse_args() with make_env() as env: bench = env.datasets['dfg_10'] - train_benchmarks = list(islice(bench.benchmarks(), 650)) - train_benchmarks, val_benchmarks, test_benchmarks = train_benchmarks[:500], train_benchmarks[500:550], train_benchmarks[550:650] + train_benchmarks = list(islice(bench.benchmarks(), args.train_size + 50 + args.number)) + train_benchmarks, val_benchmarks, test_benchmarks = train_benchmarks[:args.train_size], train_benchmarks[args.train_size:args.train_size + 50], train_benchmarks[550:550 + args.number] print("Number of benchmarks for training: ", len(train_benchmarks)) print("Number of benchmarks for vlaidation: ", len(val_benchmarks)) @@ -109,7 +103,7 @@ def make_training_env(*args) -> compiler_gym.envs.CompilerEnv: ) agent.restore(checkpoint) - val_rewards = run_agent_on_benchmarks(val_benchmarks) + val_rewards = run_agent_on_benchmarks(test_benchmarks) plot_results(val_rewards) else: diff --git a/examples/cgra/relative_placement_output/.gitignore b/examples/cgra/relative_placement_output/.gitignore new file mode 100644 index 000000000..c585e1938 --- /dev/null +++ b/examples/cgra/relative_placement_output/.gitignore @@ -0,0 +1 @@ +out \ No newline at end of file diff --git a/examples/cgra/relative_placement_output/relative_placement_score_extractor.sh b/examples/cgra/relative_placement_output/relative_placement_score_extractor.sh new file mode 100755 index 000000000..e7c9d913c --- /dev/null +++ b/examples/cgra/relative_placement_output/relative_placement_score_extractor.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +typeset -a results +if [[ $# -ne 2 ]]; then + echo "Usage $0 " +fi + +output=$2 +input=$1 +if [[ -f $output ]]; then + echo "Output $output already exists (will be overwritten, please manually delete)" + exit 1 +fi +results=( $(grep -e 'Exectuted' -B 1 $input | grep -e 'Computing Reward' | cut -f7 -d' ') ) + +for r in ${results[@]}; do + echo "$r, " >> $output +done \ No newline at end of file diff --git a/examples/cgra/run_ga_relative_placement.sh b/examples/cgra/run_ga_relative_placement.sh index 80f0fd10c..f946c175b 100755 --- a/examples/cgra/run_ga_relative_placement.sh +++ b/examples/cgra/run_ga_relative_placement.sh @@ -1,6 +1,5 @@ #!/bin/bash -echo "Starting iteration $index" rm -rf ga_output mkdir -p ga_output parallel 'echo "Starting iter {}"; python ga.py --nomp --max_cands 100 --env=cgra-v0 --benchmark=dfg_10/{} --reward=II &> ga_output/out_{}' ::: $(seq 0 10000) diff --git a/examples/cgra/run_relative_placement.sh b/examples/cgra/run_relative_placement.sh new file mode 100755 index 000000000..39c43509f --- /dev/null +++ b/examples/cgra/run_relative_placement.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +if [[ $# -ne 1 ]]; then + echo "Usage: $0 " + echo "Use the --train flag to train (on the python script)" + exit 1 +fi + +rm -f relative_placement_output/out +mkdir -p relative_placement_output +echo "Starting program" +python relative_placement_model.py --number 10000 --test $1 &> relative_placement_output/out diff --git a/setup.py b/setup.py index ee9c9fe57..83e07779f 100644 --- a/setup.py +++ b/setup.py @@ -122,6 +122,7 @@ def wheel_filename(**kwargs): "compiler_gym.envs.gcc.datasets", "compiler_gym.envs.gcc.service", "compiler_gym.envs.gcc", + "compiler_gym.envs.cgra.architectures", "compiler_gym.envs.cgra.datasets", "compiler_gym.envs.cgra.service", "compiler_gym.envs.cgra", @@ -149,7 +150,6 @@ def wheel_filename(**kwargs): "package_data": { "compiler_gym": [ "envs/gcc/service/compiler_gym-gcc-service", - # "envs/cgra/service/compiler_gym-cgra-service", "envs/cgra/service/*", "envs/loop_tool/service/compiler_gym-loop_tool-service", "third_party/csmith/csmith/bin/csmith",