diff --git a/compiler_gym/envs/BUILD b/compiler_gym/envs/BUILD index 6d41958eb..707a36399 100644 --- a/compiler_gym/envs/BUILD +++ b/compiler_gym/envs/BUILD @@ -11,6 +11,7 @@ py_library( deps = [ ":compiler_env", "//compiler_gym:config", + "//compiler_gym/envs/cgra", "//compiler_gym/envs/gcc", "//compiler_gym/envs/llvm", "//compiler_gym/envs/loop_tool", diff --git a/compiler_gym/envs/CMakeLists.txt b/compiler_gym/envs/CMakeLists.txt index 79a115bf1..85acbb201 100644 --- a/compiler_gym/envs/CMakeLists.txt +++ b/compiler_gym/envs/CMakeLists.txt @@ -8,6 +8,7 @@ cg_add_all_subdirs() set(ENVS_DEPS ::compiler_env compiler_gym::envs::gcc::gcc + compiler_gym::envs::cgra::cgra compiler_gym::envs::loop_tool::loop_tool ) if(COMPILER_GYM_ENABLE_LLVM_ENV) diff --git a/compiler_gym/envs/__init__.py b/compiler_gym/envs/__init__.py index f8b8829df..4a85057a0 100644 --- a/compiler_gym/envs/__init__.py +++ b/compiler_gym/envs/__init__.py @@ -5,6 +5,7 @@ from compiler_gym import config from compiler_gym.envs.compiler_env import CompilerEnv from compiler_gym.envs.gcc import GccEnv +from compiler_gym.envs.cgra import CgraEnv if config.enable_llvm_env: from compiler_gym.envs.llvm.llvm_env import LlvmEnv # noqa: F401 @@ -18,6 +19,7 @@ "COMPILER_GYM_ENVS", "CompilerEnv", "GccEnv", + "CgraEnv", "LoopToolEnv", ] diff --git a/compiler_gym/envs/cgra/BUILD b/compiler_gym/envs/cgra/BUILD new file mode 100644 index 000000000..b6c28f62f --- /dev/null +++ b/compiler_gym/envs/cgra/BUILD @@ -0,0 +1,28 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +load("@rules_python//python:defs.bzl", "py_library") + +py_library( + name = "cgra", + srcs = [ + "__init__.py", + "cgra_rewards.py", + "Operations.py", + "compile_settings.py", + "DFG.py" + ], + data = [ + "//compiler_gym/envs/cgra/service", + ], + visibility = ["//visibility:public"], + deps = [ + "//compiler_gym/envs/cgra/datasets", + "//compiler_gym/envs/cgra/architectures", + "//compiler_gym/errors", + "//compiler_gym/service:client_service_compiler_env", + "//compiler_gym/service/runtime", # Implicit dependency of service. + "//compiler_gym/util" + ], +) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/CMakeLists.txt b/compiler_gym/envs/cgra/CMakeLists.txt new file mode 100644 index 000000000..7a522146c --- /dev/null +++ b/compiler_gym/envs/cgra/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +cg_add_all_subdirs() + +cg_py_library( + NAME + cgra + SRCS + "__init__.py" + "cgra_rewards.py" + "Operations.py" + "compile_settings.py" + "DFG.py" + DATA + compiler_gym::envs::cgra::service::service + DEPS + compiler_gym::service::client_service_compiler_env + compiler_gym::envs::cgra::datasets::datasets + compiler_gym::engs::cgra::architectures::architectures + compiler_gym::errors::errors + compiler_gym::service::runtime::runtime + compiler_gym::util::util + PUBLIC +) diff --git a/compiler_gym/envs/cgra/DFG.py b/compiler_gym/envs/cgra/DFG.py new file mode 100644 index 000000000..ceb26db92 --- /dev/null +++ b/compiler_gym/envs/cgra/DFG.py @@ -0,0 +1,227 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import json +from pathlib import Path +import random + +from importlib_metadata import entry_points +from compiler_gym.service.proto import ( +Benchmark +) +from typing import Optional, List +from compiler_gym.third_party.inst2vec import Inst2vecEncoder +import compiler_gym.third_party.llvm as llvm +from compiler_gym.envs.cgra.Operations import Operation, operation_from_name + +class Edge(object): + def __init__(self, type): + self.type = type + +class Node(object): + def __init__(self, name, operation): + self.name = name + self.operation = operation + + def __str__(self): + return "Node with name " + self.name + " and op " + str(self.operation) + +class DFG(object): + def __init__(self, working_directory: Optional[Path] = None, from_json: Optional[Path] = None, from_text: Optional[str] = None): + # Copied from here: https://github.com/facebookresearch/CompilerGym/blob/development/examples/loop_optimizations_service/service_py/loops_opt_service.py + # self.inst2vec = _INST2VEC_ENCODER + + if from_json is not None: + self.load_dfg_from_json(from_json) + elif from_text is not None: + self.load_dfg_from_text(from_text) + + def __str__(self): + res = "nodes are: " + str(self.nodes) + " and edges are " + str(self.adj) + return res + + def load_dfg_from_json(self, path): + with open(path, 'r') as p: + # This isnt' text, but I think the json.loads + # that this calls just works? + self.load_dfg_from_text(p) + + def load_dfg_from_text(self, text): + f = json.loads(text) + self.nodes = {} + self.node_names = [] + self.edges = [] + self.adj = {} + self.entry_points = f['entry_points'] + + # build the nodes first. + for node in f['nodes']: + self.nodes[node['name']] = (Node(node['name'], operation_from_name(node['operation']))) + self.adj[node['name']] = [] + self.node_names.append(node['name']) + + for edge in f['edges']: + self.edges.append(Edge(edge['type'])) + + # Build the adj matrix: + for edge in f['edges']: + fnode = edge['from'] + tnode = edge['to'] + + self.adj[fnode].append(tnode) + + # Bit slow this one --- the adjacency matrix is backwards for it :'( + def get_preds(self, node): + preds = [] + for n in self.adj: + if node.name in self.adj[n]: + preds.append(self.nodes[n]) + + return preds + + def get_succs(self, node): + succs = [] + for n in self.adj[node.name]: + succs.append(self.nodes[n]) + return succs + + def build_preds_lookup(self): + preds_lookup = {} + for n in self.node_names: + preds_lookup[n] = self.get_preds(self.nodes[n]) + return preds_lookup + + # TODO(jcw) -- fix this, because for a graph with multiple entry nodes, + # this doesn't actually give the right answer :) + # (should do in most cases) + def bfs(self): + to_explore = self.entry_points[:] + print ("Doing BFS, entry points are ") + print(self.entry_points) + seen = set() + + # build a lookup based on the predecessors + # for each node. + preds_lookup = self.build_preds_lookup() + + while len(to_explore) > 0: + head = to_explore[0] + to_explore = to_explore[1:] + if head in seen: + continue + seen.add(head) + yield self.nodes[head] + + # Add the next batch of nodes that we have + # visited all the preds for if there are more + # nodes to explore. + if len(to_explore) == 0 and len(seen) != len(self.node_names): + for node_name in self.node_names: + if node_name in seen: + continue + else: + # Unseen --- have we seen all th preds? + failed = False + for p in preds_lookup[node_name]: + if p.name not in seen: + failed = True + if not failed: + to_explore.append(node_name) + if len(to_explore) == 0: # We added nothing despite trying + # to. + + # TODO(jcw) -- Fix this, as support for cyclical DFGs + # is important to be able to support loops with + # cross-loop dependencies. + print("Cyclical DFG --- Impossible to do a true BFS") + print("DFG is ", str(self)) + assert False + +# Generate a test DFG using the operations in +# 'operations'. +def generate_DFG(operations: List[Operation], size, seed=0): + random.seed(seed) + # Start with some 0-input ops: + start_ops = random.randint(1, min(size, 3)) + + # Jump-start this --- in reality, these can be + # phi nodes coming from previous tiers of the loop, + # or variables coming from outside the loop. + start_options = [] + print("Generating DFG with ", start_ops, " starting nodes") + for op in operations: + if op.inputs == 0: + start_options.append(op) + + node_number = 0 + edge_number = 0 + + entry_points = [] + nodes = {} + node_names = [] + nodes_list = [] + edges = [] + adj = {} + + # Keep track of variables that we should probably use somewhere. + unused_outputs = [] + for i in range(start_ops): + name = "node" + str(node_number) + node_names.append(name) + n = Node(name, random.choice(start_options)) + node_number += 1 + + nodes[name] = n + nodes_list.append(n) + entry_points.append(name) + unused_outputs.append(n) + adj[name] = [] + + while len(nodes) < size: + # Generate a new node. + operation = random.choice(operations) + name = "node" + str(node_number) + node_names.append(name) + node_number += 1 + + # Get inputs for this: + inputs = [] + while len(inputs) < operation.inputs: + # Select random nodes: baised towards the unused ones. + if random.randint(0, 10) > 6 and len(unused_outputs) > 0: + inputs.append(unused_outputs[0]) + unused_outputs = unused_outputs[1:] + else: + inputs.append(random.choice(nodes_list)) + # If the node has no arguments, then we should add it + # as an entry point. --- todo(jcw) --- should we just skip + # this avoid creating graphs with too many constant loads? + if operation.inputs == 0: + entry_points.append(name) + + # now create the edges. + for inp in inputs: + edge = Edge('data') + # Not too sure why this doens't have the start/end points. + # Think it's a dead datafield. + edges.append(edge) + + adj[inp.name].append(name) + + this_node = Node(name, operation) + nodes[name] = this_node + nodes_list.append(this_node) + unused_outputs.append(this_node) + adj[name] = [] + + res = DFG() + res.adj = adj + res.nodes = nodes + res.entry_points = entry_points + res.edges = edges + res.node_names = node_names + print(res.nodes) + + return res \ No newline at end of file diff --git a/compiler_gym/envs/cgra/Operations.py b/compiler_gym/envs/cgra/Operations.py new file mode 100644 index 000000000..927a8f8c3 --- /dev/null +++ b/compiler_gym/envs/cgra/Operations.py @@ -0,0 +1,58 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +class Operation(object): + def __init__(self, name, inputs, outputs, latency): + self.name = name + self.inputs = inputs + self.outputs = outputs + self.latency = latency + + def __str__(self): + return self.name + +Operations = [ + # TODO(jcw) --- should we support more operations as heterogeneous? + # IMO most of the other things that are scheduled are + # pretty vacuous, although we could explore supporting those. + # Operation is: name, inputs, outputs, cycles. + Operation("add", 2, 1, 1), + Operation("mul", 2, 1, 1), + Operation("sub", 2, 1, 1), + Operation("div", 2, 1, 1), + Operation("and", 2, 1, 1), + Operation("or", 2, 1, 1), + Operation("xor", 2, 1, 1), + Operation("fmul", 2, 1, 1), + Operation("fsub", 2, 1, 1), + Operation("fadd", 2, 1, 1), + Operation("fdiv", 2, 1, 1), + Operation("rsh", 2, 1, 1), + Operation("lsh", 2, 1, 1), + Operation("load", 1, 1, 1), + Operation("store", 1, 1, 1), + Operation("const", 0, 1, 1), + Operation("noop", 0, 0, 1), +] + +def operation_index_of(op): + ind = 0 + for e in Operations: + if e.name == op.name: + return ind + else: + print (e.name + " uneq " + str(op)) + ind += 1 + return -1 + +def operation_latency(op): + # TODO(jcw) --- model latency --- or at least expost this + # to a configuration. + return op.latency + + +def operation_from_name(n): + ind = operation_index_of(n) + return Operations[ind] \ No newline at end of file diff --git a/compiler_gym/envs/cgra/__init__.py b/compiler_gym/envs/cgra/__init__.py new file mode 100644 index 000000000..79cbcba6b --- /dev/null +++ b/compiler_gym/envs/cgra/__init__.py @@ -0,0 +1,34 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +"""This module demonstrates how to """ +from pathlib import Path + +from compiler_gym.envs.cgra.DFG import DFG +from compiler_gym.envs.cgra.service.cgra_service import Schedule, CGRA +from compiler_gym.envs.cgra.service.cgra_env import CgraEnv +from compiler_gym.envs.cgra.service.relative_cgra_env import RelativeCgraEnv +from compiler_gym.util.registration import register +from compiler_gym.util.runfiles_path import runfiles_path + +CGRA_SERVICE_BINARY: Path = runfiles_path( + "compiler_gym/envs/cgra/service/compiler_gym-cgra-service" +) +RELATIVE_CGRA_SERVICE_BINARY: Path = runfiles_path( + "compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service" +) + +register( + id="relative-cgra-v0", + entry_point="compiler_gym.envs.cgra:RelativeCgraEnv", + kwargs={ "service": RELATIVE_CGRA_SERVICE_BINARY }, +) + +register( + id="cgra-v0", + entry_point="compiler_gym.envs.cgra:CgraEnv", + kwargs={"service": CGRA_SERVICE_BINARY}, +) + +__all__ = ["CgraEnv", "DFG", "CGRA", "Schedule", "RelativeCgraEnv"] diff --git a/compiler_gym/envs/cgra/architectures/BUILD b/compiler_gym/envs/cgra/architectures/BUILD new file mode 100644 index 000000000..24a222a6f --- /dev/null +++ b/compiler_gym/envs/cgra/architectures/BUILD @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +load("@rules_python//python:defs.bzl", "py_library") + +py_library( + name = "architectures", + srcs = [ + "__init__.py", + "CGRA.py" + ], + visibility = ["//visibility:public"] +) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/architectures/CGRA.py b/compiler_gym/envs/cgra/architectures/CGRA.py new file mode 100644 index 000000000..cedf9e02d --- /dev/null +++ b/compiler_gym/envs/cgra/architectures/CGRA.py @@ -0,0 +1,154 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, List +from compiler_gym.envs.cgra.compile_settings import CGRACompileSettings + +""" +This is the most abstract representation of a CGRA ---- +a list of nodes, with an interconnect archtiecture. + +This can be inherited from to make things easier. +""" +class CGRA(object): + def __init__(self, nodes, noc): + self.nodes = nodes + self.noc = noc + self.dim = len(self.nodes) + + def __str__(self): + return "CGRA: (" + (str(self.nodes)) + " nodes)" + + def is_supported(self, node_index, op): + # TODO(jcw) -- support heterogeneity + return True + + def cells_as_list(self): + return self.nodes[:] + + def get_neighbour(self, direction, location_from): + return self.noc.get_neighbour(direction, location_from) + +class DataPath(object): + # Keeps track of a path through a NOC. + # Keeping track of the source node is important to allow + # the same bit of data to share the same path. + def __init__(self, source_node, start_cycle, path): + self.path = path + self.start_cycle = start_cycle + self.source_node = source_node + + def __len__(self): + return len(self.path) + + def __str__(self): + return str(self.path) + ": Starting at " + str(self.start_cycle) + ", Carrying results of " + str(self.source_node) + +# Abstract class for a NOC (network on chip). +class NOC(object): + def __init__(self): + pass + + def get_neighbour(self, direction, location): + assert False + + # Work out the shortest path from from_n to to_n in + # the current NoC. + def shortest_path(self, node, from_n, to_n) -> DataPath: + return self.shortest_available_path(0, node, from_n, to_n, None) + + def shortest_available_path(self, start_time, node, from_n, to_n, schedule) -> DataPath: + assert False # Abstract Class + +# A class representing a NOC (netowrk on chip). +class DictNOC(NOC): + def __init__(self, nodes, neighbours: Dict[str, List[str]]): + super().__init__() + # A list of all the nodes. + self.nodes = nodes + # A directed list of one-hop connections between nodes. + self.neighbours = neighbours + + # Returns the neighbour within a 3D space. I don't really + # know how best to set this up in reality ---- espc if a node + # doesn't really have e.g. an 'up' neighbour, but only a 'up and left at + # the same time neighbour'. The key constraint currently implemented + # here is that only six directions are supported (up, down, north, south, east + # west) + def get_neighbour(self, direction, location): + ns = self.neighbours[location] + index = None + # TODO(jcw) --- we need a better way of storing these + # so it isn't implicit in the connection --- this implies + # that everything that has a 'south' connection must + # also have a north connection. + if direction == 'north': + index = 0 + elif direction == 'south': + index = 1 + elif direction == 'east': + index = 2 + elif direction == 'west': + index = 3 + elif direction == 'up': + index = 4 + elif direction == 'down': + index = 5 + + if index is None: + print("Unknown index ", direction) + assert False + + if index < len(ns): + print("Returning a node ", len(ns)) + return ns[index] + else: + return None + + # Returns a DataPath object. + def shortest_available_path(self, start_time, source_dfg_node, from_n, to_n, schedule) -> DataPath: + # So we should obviously do this better. + # Just a hack-y BFS search. + seen = set() + # Keep track of node and path so far. + # Invariant: this is sorted by shortest + # path. + to_see = [(from_n, [])] + + while len(to_see) > 0: + n, path_to = to_see[0] + to_see = to_see[1:] + + if n == to_n: + # Found the path. By invariant, this is the shortest + # path. + return DataPath(source_dfg_node, start_time, path_to) + + nexts = self.neighbours[n] + for node in nexts: + if node in seen: + pass + else: + curr_time = start_time + len(path_to) + if schedule is not None: + if schedule.is_occupied(source_dfg_node, curr_time, (n, node)): + # Can't use this as a path if it's currently + # occupied. + # TODO(jcw) --- Add support for buffered delays. + # continue + if CGRACompileSettings['IntroduceRequiredDelays']: + continue + else: + if CGRACompileSettings['DebugShortestPath']: + print("Shortest Path failued due to occupied slot") + return None + # This is BFS, so everything must bewithin + # one hop of the current search. Therefore + # this is the longest one, and can go at the back. + to_see.append((node, path_to + [(source_dfg_node, n, node)])) + # No path between nodes. + if CGRACompileSettings['DebugShortestPath']: + print("Shortest Path failued due to no path found") + return None diff --git a/compiler_gym/envs/cgra/architectures/CMakeLists.txt b/compiler_gym/envs/cgra/architectures/CMakeLists.txt new file mode 100644 index 000000000..857c81bc3 --- /dev/null +++ b/compiler_gym/envs/cgra/architectures/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +cg_add_all_subdirs() + +cg_py_library( + NAME + architectures + SRCS + "__init__.py" + "CGRA.py" + PUBLIC +) diff --git a/compiler_gym/envs/cgra/architectures/__init__.py b/compiler_gym/envs/cgra/architectures/__init__.py new file mode 100644 index 000000000..7ac3f01f5 --- /dev/null +++ b/compiler_gym/envs/cgra/architectures/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. \ No newline at end of file diff --git a/compiler_gym/envs/cgra/cgra_rewards.py b/compiler_gym/envs/cgra/cgra_rewards.py new file mode 100644 index 000000000..5e911ca7a --- /dev/null +++ b/compiler_gym/envs/cgra/cgra_rewards.py @@ -0,0 +1,71 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from compiler_gym.spaces import Reward + +class IntermediateInitializationIntervalReward(Reward): + """An example reward that uses changes in the "runtime" observation value + to compute incremental reward. + """ + + def __init__(self): + super().__init__( + name="InitializationInterval", + observation_spaces=["InitializationInterval"], + default_value=0, + default_negates_returns=True, + deterministic=True, + platform_dependent=True, + ) + pass + + def reset(self, benchmark: str, observation_view): + del benchmark # unused + + def update(self, action, observations, observation_view): + del action + del observation_view + + print("Computing Reward: got InitializationInterval of ", observations[0]) + if observations[0] is None: + # If we just failed to generate a valid schedule all together, + # return a punishment. Not 100% sure what this punishment should + # be though. + return -1.0 + # Add a constant negative reward for not figuring it out? + return -float(observations[0]) - 0.1 + + +""" +For algorithms where a 'right' answer is quick to arrive at, +the intermediate rewards are less important. +""" +class FinalInitializationIntervalReward(Reward): + def __init__(self): + super().__init__( + name='InitializationInterval', + observation_spaces=['InitializationInterval', 'Done'], + default_value=0, + default_negates_returns=True, + deterministic=True, + platform_dependent=True + ) + + def reset(self, benchmark: str, observation_view): + del benchmark + + def update(self, action, observations, observation_view): + del action + del observation_view + + print ("Computing Reward: get InitializationInterval of ", observations[0]) + print ("Got finished: ", observations[1]) + + if observations[0] is None: + return -0.1 + if observations[1]: + return -float(observations[0]) - 0.1 + else: + return -0.1 \ No newline at end of file diff --git a/compiler_gym/envs/cgra/compile_settings.py b/compiler_gym/envs/cgra/compile_settings.py new file mode 100644 index 000000000..8ef350602 --- /dev/null +++ b/compiler_gym/envs/cgra/compile_settings.py @@ -0,0 +1,67 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +CGRACompileSettings = { + # When this is set, the scheduler will take schedules + # that don't account for delays appropriately, and try + # to stretch them out to account for delays correctly. + # When this is false, the compiler will just reject + # such invalid schedules. + # (when set, something like x + (y * z), scheduled + # as +: PE0 on cycle 0, *: PE1 on cycle 0 is valid). + "IntroduceRequiredDelays": False, + + # How much buffering to assume each PE has. (i.e., + # how many operands can be waiting at that node?) + # Set to 0 for inifinite buffering, which seems + # to be a fairly common assumption in literature, + # although is obviously bogus in real life. + "BufferLimits": 0, + + # The relative placement algorithm relies on an + # initial placement of the DFG ndoes. + # There are several options: + # random: uses truly random node placement. This + # seems to perform poorly sometimes as it relies + # on the agent to de-fuck the placement without + # intermediate rewards --- it is a challenging + # enfironment for an agent, although it seems to work + # OK for a GA approach. + # first_avail: This uses the first valid slot + # for every node ordering. Under the current + # scheme, this is guaranteed to work (I think) + # as we have support for infinite buffering --- however + # with infinite buffering disabled, this can walk itself + # into a hole. + "InitialPlacementMode": 'first_avail', + + # if there is more gap between operations than required to transmit + # the operands, should we buffer before or after the transmission? + # buffering after is more intuitive, but can lead to (rare) situations where + # the nth_avail assignment runs itself into a hole and can't generate + # an assignmnet. + # optionsare before_transmit and after_transmit. + "BufferingMode": "before_transmit", + + # These are debug flags. Done this way because various differnet + # frontends use this, and redefining all the flags seems + # like a pain in the ass. + "DebugGetInitializationInterval": True, # Debug the Schedule:get_InitializationInterval() function. + "DebugGetValidSlots": True, # Debug the InternalSchedule:get_valid_slots function + "DebugShortestPath": True # Debug the DictNOC:shortest_avaibale_path function +} + +# These are some settings for the relative placement algorith. +RelativePlacementSettings = { + # Allow swaps that cause invalid states. This may allow a clever agent + # to perform well, but for less clever agents can result in a lot + # of failed compilations. (e.g., for a GA agent, this should + # probably be true, as that can handle lots of wrong compilations) + 'AllowInvalidIntermediateSchedules': False, + + # Number of times to iterate over the placement algorithm. + # We do Iterations * DFG Nodes iterations. + 'Iterations': 100, +} \ No newline at end of file diff --git a/compiler_gym/envs/cgra/datasets/BUILD b/compiler_gym/envs/cgra/datasets/BUILD new file mode 100644 index 000000000..028f1a03d --- /dev/null +++ b/compiler_gym/envs/cgra/datasets/BUILD @@ -0,0 +1,20 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +load("@rules_python//python:defs.bzl", "py_library") + +py_library( + name = "datasets", + srcs = [ + "__init__.py", + "dfg_bench.py" + ], + visibility = ["//visibility:public"], + deps = [ + "//compiler_gym/datasets", + "//compiler_gym/service/proto", + "//compiler_gym/util", + ] +) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/datasets/CMakeLists.txt b/compiler_gym/envs/cgra/datasets/CMakeLists.txt new file mode 100644 index 000000000..1dde7f1a4 --- /dev/null +++ b/compiler_gym/envs/cgra/datasets/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +cg_add_all_subdirs() + +cg_py_library( + NAME + datasets + SRCS + "__init__.py" + "dfg_bench.py" + DEPS + compiler_gym::datasets::datasets + compiler_gym::service::proto::proto + compiler_gym::util::util + PUBLIC +) diff --git a/compiler_gym/envs/cgra/datasets/__init__.py b/compiler_gym/envs/cgra/datasets/__init__.py new file mode 100644 index 000000000..40992c67d --- /dev/null +++ b/compiler_gym/envs/cgra/datasets/__init__.py @@ -0,0 +1,45 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +from functools import lru_cache +from pathlib import Path +from typing import Iterable, List, Optional, Union + +from compiler_gym.datasets import Dataset +from compiler_gym.envs.cgra.datasets.dfg_bench import GeneratedDFGs, GeneratedDFGs10, GeneratedDFGs15, GeneratedDFGs20, GeneratedDFGs5 +from compiler_gym.util.runfiles_path import site_data_path + + +def _get_cgra_datasets( + site_data_base: Optional[Path] = None +) -> Iterable[Dataset]: + site_data_base = site_data_base or site_data_path("cgra-v0") + + yield GeneratedDFGs5(site_data_base=site_data_base) + yield GeneratedDFGs10(site_data_base=site_data_base) + yield GeneratedDFGs15(site_data_base=site_data_base) + yield GeneratedDFGs20(site_data_base=site_data_base) + + +@lru_cache(maxsize=16) +def get_cgra_datasets( + site_data_base: Optional[Path] = None +) -> List[Dataset]: + """Instantiate the builtin cgra datasets. + + :param site_data_base: The root of the site data path. + + :return: An iterable sequence of :class:`Dataset + ` instances. + """ + return list(_get_cgra_datasets(site_data_base)) + + +__all__ = [ + "GeneratedDFGs5", + "GeneratedDFGs10", + "GeneratedDFGs15", + "GeneratedDFGs20", + "get_cgra_datasets", +] diff --git a/compiler_gym/envs/cgra/datasets/dfg_bench.py b/compiler_gym/envs/cgra/datasets/dfg_bench.py new file mode 100644 index 000000000..13749f53c --- /dev/null +++ b/compiler_gym/envs/cgra/datasets/dfg_bench.py @@ -0,0 +1,84 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import enum +import io +import logging +import os +import re +import shutil +import subprocess +import sys +import tarfile +import tempfile +import numpy as np +from collections import defaultdict +from pathlib import Path +from threading import Lock +from typing import Callable, Dict, Iterable, List, NamedTuple, Optional + +import fasteners +from compiler_gym.datasets.dataset import Dataset + +from compiler_gym.datasets import Benchmark, TarDatasetWithManifest +from compiler_gym.datasets.benchmark import ValidationCallback +from compiler_gym.datasets.uri import BenchmarkUri +from compiler_gym.envs.llvm import llvm_benchmark +from compiler_gym.errors import ValidationError +from compiler_gym.service.proto import BenchmarkDynamicConfig, Command +from compiler_gym.third_party import llvm +from compiler_gym.util.commands import Popen +from compiler_gym.util.download import download +from compiler_gym.util.runfiles_path import cache_path, site_data_path +from compiler_gym.util.timer import Timer + +from compiler_gym.envs.cgra.Operations import Operations +from compiler_gym.envs.cgra.DFG import generate_DFG +import pickle + +class GeneratedDFGs(Dataset): + def __init__(self, size: int, site_data_base=None): + super().__init__( + "benchmark://dfg_" + str(size), + "A dataset of automatically generated DFGs of a particular size.", + "None", + site_data_base=site_data_base + ) + + self.dfg_size = size + + def benchmark_uris_without_index(self): + return "benchmark://dfg_" + str(self.dfg_size) + "/" + + def benchmark_uris(self) -> Iterable[str]: + ind = 0 + while True: + yield (self.benchmark_uris_without_index() + str(ind)) + ind += 1 + + def benchmark_from_index(self, dfg_index, uri): + dfg = generate_DFG(Operations, self.dfg_size, seed=dfg_index) + + return Benchmark.from_file_contents(uri=uri, data=pickle.dumps(dfg)) + + def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: + dfg_index = int(uri.path[1:]) + return self.benchmark_from_index(dfg_index, uri) + + def _random_benchmark(self, random_state: np.random.Generator) -> Benchmark: + index = random_state.randomint(10000000000) + return self.benchmark_from_index(index, self.benchmark_uris_without_index() + str(index)) + +class GeneratedDFGs5(GeneratedDFGs): + def __init__(self, site_data_base=None): + super().__init__(5, site_data_base) +class GeneratedDFGs10(GeneratedDFGs): + def __init__(self, site_data_base=None): + super().__init__(10, site_data_base) +class GeneratedDFGs15(GeneratedDFGs): + def __init__(self, site_data_base=None): + super().__init__(15, site_data_base) +class GeneratedDFGs20(GeneratedDFGs): + def __init__(self, site_data_base=None): + super().__init__(20, site_data_base) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/BUILD b/compiler_gym/envs/cgra/service/BUILD new file mode 100644 index 000000000..d7991242d --- /dev/null +++ b/compiler_gym/envs/cgra/service/BUILD @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +filegroup( + name = "service", + srcs = [ + "compiler_gym-cgra-service", + "compiler_gym-relative-placement-cgra-service", + "cgra_service.py", + "cgra_env.py", + "relative_cgra_env.py", + "relative_placement_service.py", + ], + visibility = ["//visibility:public"], +) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/CMakeLists.txt b/compiler_gym/envs/cgra/service/CMakeLists.txt new file mode 100644 index 000000000..3537a51e3 --- /dev/null +++ b/compiler_gym/envs/cgra/service/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +cg_add_all_subdirs() + +cg_filegroup( + NAME "service" + FILES + "${CMAKE_CURRENT_LIST_DIR}/cgra_service.py" + "${CMAKE_CURRENT_LIST_DIR}/compiler_gym-cgra-service" + "${CMAKE_CURRENT_LIST_DIR}/compiler_gym-cgra-relative-placement-service" +) diff --git a/compiler_gym/envs/cgra/service/cgra_env.py b/compiler_gym/envs/cgra/service/cgra_env.py new file mode 100644 index 000000000..1aec916a9 --- /dev/null +++ b/compiler_gym/envs/cgra/service/cgra_env.py @@ -0,0 +1,45 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import shutil +from pathlib import Path +from typing import Iterable, List, Optional, Union, cast +from compiler_gym.util.gym_type_hints import ObservationType, OptionalArgumentValue + +import numpy as np +from compiler_gym.util.runfiles_path import site_data_path + +from compiler_gym.datasets import Benchmark, Dataset +from compiler_gym.envs.cgra.datasets import get_cgra_datasets + +from compiler_gym.errors import BenchmarkInitError +from compiler_gym.service.client_service_compiler_env import ClientServiceCompilerEnv +from compiler_gym.spaces import Box, Commandline +from compiler_gym.spaces import Dict as DictSpace +from compiler_gym.spaces import Scalar, Sequence + +from compiler_gym.envs.cgra.cgra_rewards import IntermediateInitializationIntervalReward + +class CgraEnv(ClientServiceCompilerEnv): + def __init__(self, *args, datasets_site_path: Optional[Path] = None, benchmark: Optional[Union[str, Benchmark]] = None, datasets_set_path: Optional[Path] = None, **kwargs): + super().__init__( + *args, + **kwargs, + benchmark = benchmark or "dfg_10/1", + datasets=get_cgra_datasets(site_data_base=datasets_site_path), + rewards=[IntermediateInitializationIntervalReward()] + ) + + def reset(self, reward_space = OptionalArgumentValue.UNCHANGED, *args, **kwargs): + observation = super().reset(reward_space=reward_space, *args, **kwargs) + + return observation + + def render(self, mode="human"): + if mode == "human": + print("human-visible schedule") + else: + return self.render(mode) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/cgra_service.py b/compiler_gym/envs/cgra/service/cgra_service.py new file mode 100644 index 000000000..522cfab75 --- /dev/null +++ b/compiler_gym/envs/cgra/service/cgra_service.py @@ -0,0 +1,897 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from re import I +import pickle + +from typing import Optional, Tuple, List, Dict, Set, Union +from pathlib import Path +from compiler_gym.envs.cgra.compile_settings import CGRACompileSettings +from compiler_gym.views import ObservationSpaceSpec +from compiler_gym.spaces import Reward +from compiler_gym.envs.llvm.llvm_rewards import CostFunctionReward +from compiler_gym.service.client_service_compiler_env import ClientServiceCompilerEnv +from compiler_gym.util.gym_type_hints import ObservationType, OptionalArgumentValue +from compiler_gym.service import CompilationSession +from compiler_gym.envs.cgra.architectures.CGRA import CGRA, NOC, DictNOC, DataPath +from compiler_gym.util.commands import run_command +from compiler_gym.service.proto import ( +ActionSpace, +Benchmark, +DoubleRange, +Event, +Int64Box, +Int64Range, +Int64Tensor, +NamedDiscreteSpace, +ObservationSpace, +Space, +StringSpace +) +import compiler_gym.third_party.llvm as llvm +from compiler_gym.third_party.inst2vec import Inst2vecEncoder +from compiler_gym.envs.cgra.DFG import DFG, Node, Edge, generate_DFG +from compiler_gym.envs.cgra.Operations import * + +from compiler_gym.service.proto.compiler_gym_service_pb2 import Int64SequenceSpace +#from compiler_gym.service.runtime import create_and_run_compiler_gym_service + +def load_CGRA(self, file): + # TODO(jcw) -- properly load CGRA + return CGRA(5, 5) + +def load_NOC(self, file): + # TODO(jcw) -- properly load NOC (network on chip). + + # Initialize to a straight-line NOC + return DictNOC([(x, x + 1) for x in range(5)]) + +# This is just a wrapper around an actual object that +# is a schedule --- see the Schedule object for something +# that can be interacted with. +class InternalSchedule(object): + def __init__(self, cgra, dfg): + self.dfg = dfg # For tensorization. + self.cgra = cgra + self.operations = self.initialize_schedule() + + # Returns a complicated 4-tuple. + # (Timeslot, Location, Path Requirements, BufferRequirements) + # Path Requirements is a List[DataPath] + # BufferRequirements is List[(Start Time, End Time)] with + # the buffering location implicitly at the Location. + def get_valid_slots(self, dependencies, latency, noc_schedule, buffer_schedule): + # First, get the finished location of each of the dependencies. + min_time = 0 + dep_locations = [] + for dep in dependencies: + time, location = self.get_location(dep) + # The operation must be scheduled --- it's a dependnecy! (support for loops needed.) + if time is None: + print ("Operation dependency", dep, " has not been scheduled (cross-dependencies not supported).") + assert False + latency = self.get_latency(time, location) + + min_time = max(min_time, time + latency) + dep_locations.append((dep, time, location, latency)) + + # Starting from the min time, iterate over every unoccupied + # tile and see if it's reachable. If it is both unoccupied + # and reachable by all the deps, then we can use it. + t = min_time + # Keep track of the number of tries we've had without + # finding a valid slot. We fail if this gets too high--- + # the point is just to make this easier to debug, although + # perhaps some algorithms could make better use of this. + tries_since_slot_found = 0 + while True: + if tries_since_slot_found > 1000: + print("It has been more than 1000 slots looked at since we found a valid slot --- likely in an infinite loop.") + assert False + while t >= len(self.operations): + # Make sure we aren't past the end of the schedule. + self.operations.append(self.add_timestep()) + + for loc in range(len(self.operations[t])): + # Check first if this is occupied: + if CGRACompileSettings['DebugGetValidSlots']: + print("Searchign location ", loc, "at time", t) + is_free = self.slots_are_free(loc, t, t + latency) + if not is_free: + if CGRACompileSettings['DebugGetValidSlots']: + print("Location was not free.") + # The latency compute step is expensive, so skip + # if possible. + continue + + # Now, check to see if all of the operands can reach + # this --- note that we can't have them sharing routing + # resources, so we have to keep track of what we are using + # here. + used_resources = [] + arrival_times = [] + # We operate on a cloned copy becase we haven't actually + # scheduled the op yet --- just trying to look + # for valid locations! + noc_schedule_clone = noc_schedule.clone() + buffer_schedule_clone = buffer_schedule.clone() + earliest_execution_time = t + + failed = False + # Keep track fo the resrouces that are getting reserved + # within these clones so they can be + # returned and updated in a scheduling state. + paths = [] + buffer_slots = [] + + for (dep, dep_time, dep_loc, dep_latency) in dep_locations: + if CGRACompileSettings['DebugGetValidSlots']: + print("Checking dependency from ", dep_time, "and location", dep_loc) + if CGRACompileSettings['BufferingMode'] == 'before_transmit': + pass + else: + pass + + finish_time = dep_time + dep_latency + path = self.cgra.noc.shortest_available_path(finish_time, dep, dep_loc, loc, noc_schedule_clone) + if path is None: + if CGRACompileSettings['DebugGetValidSlots']: + print("Path was not free.") + # Couldn't schedule the node here! + failed = True + break + else: + arrival_times.append(finish_time + len(path)) + # Reserve the routing resources in the NOC clone. + noc_schedule_clone.occupy_path(path) + paths.append(path) # Keep track of the paths this requies. + earliest_execution_time = max(earliest_execution_time, finish_time + len(path)) + for arrival_time in arrival_times: + # TODO(jcw) --- note that if the problem is that the buffers + # get full, it's unlikely that dealying further will solve + # the problem. Not 100% sure what the actual solution to + # this will be. + reserved = buffer_schedule_clone.occupy_buffer(loc, arrival_time, earliest_execution_time) + if CGRACompileSettings['DebugGetValidSlots']: + print("Trying to reserve buffering space from arrival time ", arrival_time, "...") + print("Reserved:", reserved) + buffer_slots.append((arrival_time, earliest_execution_time)) + if not reserved: + # Not enough buffering + failed = True + break + if not failed: + # We were able to route everything tof this possible placement. + tries_since_slot_found = 0 + yield t, loc, paths, buffer_slots + else: + tries_since_slot_found += 1 + + t += 1 + + # Returns a fixed-length tensor for this schedule. + # It focuses on the last few cycles. + def to_rlmap_tensor(self, node, time_window_size=1): + # Build up a tensor of timesxcgra.dimxcgra.dim as per + # RLMap paper. + # Note that they don't use a times dimension, as their + # PEs are fixed within a single schedule. + # We want to fcous the results ardoung the operation + # that we are looking at. + time_window, _ = self.get_location(node) + # Aim is to be symmetric around the central time window. + start_time = time_window - (time_window_size // 2) + end_time = time_window + ((time_window_size - 1) // 2) + + result_tensor = [] + for t in range(start_time, end_time + 1): + if t < 0: + # If this is a time before the start of the schedule, just + # add some zeroes. + result_tensor += ([0] * ((self.cgra.dim + 1) * (self.cgra.dim + 1))) + continue + if t >= len(self.operations): + # Likewise if we are past the end fo the current schedule + result_tensor += ([0] * ((self.cgra.dim + 1) * (self.cgra.dim + 1))) + continue + + for loc in range(self.cgra.dim + 1): + elem = self.operations[t][loc] + if elem is None: + result_tensor += [0] * (self.cgra.dim + 1) + else: + # Get preds and succs from this node: + pred_nodes = self.dfg.get_preds(elem) + succ_nodes = self.dfg.get_succs(elem) + state_vector = [0] * (self.cgra.dim + 1) + + for l in pred_nodes: + time, loc = self.get_location(l) + state_vector[loc] = 1 + for l in succ_nodes: + time, loc = self.get_location(l) + state_vector[loc] = 2 + result_tensor += state_vector + + return result_tensor + + def __str__(self): + res = "Schedule is \n" + for t in range(len(self.operations)): + res += "time " + str(t) + ": " + res += str([str(n) for n in self.operations[t]]) + res += "\n" + + return res + + def locations(self): + for x in range(self.cgra.dim + 1): + yield x + + def add_timestep(self): + ops = [] + for x in range(self.cgra.dim + 1): + ops.append(None) + return ops + + def initialize_schedule(self): + ops = [] + + ops.append(self.add_timestep()) + return ops + + def get_node(self, optime, oploc): + if optime < len(self.operations): + return self.operations[optime][oploc] + else: + return None + + # See how long the thing scheduled at (T, X) lasts + # for --- note that if you pass in T + N, and the op + # started at T, you'll get true_latnecy - N. + def get_latency(self, optime, oploc): + op = self.get_node(optime, oploc) + old_op = op + t = optime + + while op is not None and op == old_op: + t += 1 + old_op = op + op = self.get_node(t, oploc) + + return t - optime + + # Return true if the CGRA slots are free between + # start_tiem and end_time in location (x, y) + def slots_are_free(self, x, start_time, end_time): + for t in range(start_time, end_time): + # Add more timesteps to the schedule as required. + while t >= len(self.operations): + self.operations.append(self.add_timestep()) + + print ("Looking at time ", t, "op location", x) + print( "oplen is ", len(self.operations[t])) + if self.operations[t][x] is not None: + return False + return True + + # Return the earliest time after earliest time that we can + # fit an op of length 'length' in location x, y + def get_free_time(self, earliest_time, length, loc): + while not self.slots_are_free(loc, earliest_time, earliest_time + length): + earliest_time += 1 + return earliest_time + + def set_operation(self, time, loc, node, latency): + while time + latency >= len(self.operations): + self.operations.append(self.add_timestep()) + + if self.slots_are_free(loc, time, time + latency): + for t in range(time, time + latency): + self.operations[t][loc] = node + return True + else: + # Not set + return False + + # Blindly clear the operation from time to time + latency. + def clear_operation(self, time, loc, latency): + while time + latency >= len(self.operations): + self.operaitons.append(self.add_timestep()) + + cleared = False + for t in range(time, time + latency): + cleared = True + self.operations[t][loc] = None + + assert cleared # sanity-check that we actually did something. + + def get_location(self, node: Node): + # TODO -- make a hash table or something more efficient if required. + for t in range(len(self.operations)): + for x in range(self.cgra.dim + 1): + if self.operations[t][x] is None: + continue + if self.operations[t][x].name == node.name: + return t, x + return None, None + + def free_times(self, x): + occupied_before = False + for t in range(len(self.operations)): + if self.operations[t][x] is not None: + occupied_before = True + else: + if occupied_before: + # This was occupired at the last timestep t, + # so it's become freed at this point. + occupied_before = False + yield t + + def has_use(self, x): + for t in range(len(self.operations)): + if self.operations[t][x] is not None: + return True + return False + + def alloc_times(self, x): + free_before = True + for t in range(len(self.operations)): + if self.operations[t][x] is not None: + # Was previously free. + if free_before: + # Now was not free before. + free_before = False + yield t + else: + free_before = True + +class BufferSchedule(object): + def __init__(self): + self.schedule = [] + + def clone(self): + new_sched = BufferSchedule() + for bufs in self.schedule: + new_sched.schedule.append(dict(bufs)) + return new_sched + + def occupy_buffer(self, loc, from_time, to_time): + for t in range(from_time, to_time + 1): + while t >= len(self.schedule): + self.schedule.append({}) + + if loc in self.schedule[t]: + self.schedule[t][loc] += 1 + max_buf = CGRACompileSettings['BufferLimits'] + # if the max buffering is set to 0 or -ve, assume + # infinite buffering. + if max_buf > 0 and self.schedule[t][loc] > max_buf: + return False + else: + self.schedule[t][loc] = 1 + + return True + +class NOCSchedule(object): + def __init__(self): + self.schedule = [] + + def clone(self): + # Return a deep copy of this schedule. + new_schedule = NOCSchedule() + for conns in self.schedule: + new_schedule.schedule.append(dict(conns)) + return new_schedule + + def occupy_path(self, path: DataPath): + start_cycle = path.start_cycle + for hop in path.path: + self.occupy_connection(path.source_node, start_cycle, hop) + start_cycle += 1 + + def occupy_connection(self, node: Node, time: int, connection): + while time >= len(self.schedule): + self.schedule.append({}) + + if connection in self.schedule[time] and self.schedule[time][connection] != node.name: + # Can't occuoy an already occupied connection. + print("Tried to occupy connection already occupied by ", self.schedule[time][connection], "with node", node.name) + assert False + else: + self.schedule[time][connection] = node.name + + def is_occupied(self, source_dfg_node: Node, time, hop): + if time >= len(self.schedule): + # Not occupied if beyond current suecule + return False + else: + if hop in self.schedule[time]: + if self.schedule[time] == source_dfg_node.name: + return False # Technically ocupied, but can be + # shared. + print("Slot is occupired with node ", self.schedule[time].name) + print("Looking to use it for node ", source_dfg_node.name) + return True + else: + return False + +class Schedule(object): + def __init__(self, cgra, dfg): + # Note that we don't store the DFG because this actually + # creates the schedule, but so that this can be tensorized. + self.dfg = dfg + self.cgra = cgra + + self.operations = InternalSchedule(cgra, self.dfg) + + def __str__(self): + return "CGRA:" + str(self.operations) + + def set_operation(self, time, index, node, latency): + return self.operations.set_operation(time, index, node, latency) + + def to_rlmap_tensor(self, node, time_window_size=1): + # Get the RLMap Tensor --- note that it is node dependent + # as this is a compiler that can support time-multiplexing + # of operations on nodes. + return self.operations.to_rlmap_tensor(node, time_window_size=time_window_size) + + def swap(self, origin_time, origin_index, target_time, target_index, dfg, allow_invalid=True): + # This is a slightly non-trivial function since operations may have non-one + # latency. We treat swap-points as the starting-points of the operation --- + # if the target point is in the middle of another operation, we choose to + # schedule this /at the start of the other operation/ + + # First, we need to make sure that the whole target + # window is clear: + op_latency = self.operations.get_latency(origin_time, origin_index) + operation_node = self.operations.get_node(origin_time, origin_index) + # Check that the target window is clear: + # IF its' not clear, the easiest thing to do is a no-op. + assert target_time is not None + assert target_index is not None + assert operation_node is not None + + target_window_is_clear = self.operations.set_operation(target_time, target_index, operation_node, operation_node.operation.latency) + # Now do the swap of operations + if target_window_is_clear and target_time >= 0: # Dont' swap into past! + print("Doing swap between ", origin_index, 'at', origin_time, 'to', target_index, 'at', target_time, 'with latency', op_latency, "(invalid swaps is allowed is ", allow_invalid, ")") + InitializationInterval, _ = self.get_InitializationInterval(dfg) + assert InitializationInterval is not None #We are tryign to preserve this invariant through the scheduling. + self.operations.set_operation(target_time, target_index, operation_node, op_latency) + self.operations.clear_operation(origin_time, origin_index, op_latency) + if not allow_invalid: + # Check that this produced a valid schedule. + # TODO --- make this check more efficient -- we don't have + # to recompute the whole InitializationInterval. + InitializationInterval, _ = self.get_InitializationInterval(dfg) + if InitializationInterval is None: + print("Undo the swap!") + # Undo the swap + self.operations.set_operation(origin_time, origin_index, operation_node, op_latency) + self.operations.clear_operation(target_time, target_index, op_latency) + InitializationInterval, _ = self.get_InitializationInterval(dfg) + assert InitializationInterval is not None + return False + return True + else: + return False + + # This returns an iterator that iterates over possible + # valid slots for an operation. This allows for things like + # random placement. + def get_valid_slots(self, dependencies, latency, noc_schedule, buffer_schedule): + return self.operations.get_valid_slots(dependencies, latency, noc_schedule, buffer_schedule) + + def clear_operation(self, time, index, latency): + self.operations.clear_operation(time, index, latency) + + def get_location(self, node): + return self.operations.get_location(node) + + def compute_and_reserve_communication_distance(self, cycle, n1, n2, noc_schedule): + # Compute the shortest path: + n1_t, n1_loc = self.get_location(n1) + n2_t, n2_loc = self.get_location(n2) + + # TODO -- a sanity-check that cycle is after this might be a good idea. + path = self.cgra.noc.shortest_available_path(cycle, n1, n1_loc, n2_loc, noc_schedule) + + if path is None: + # TODO --- we should probably punish the agent a lot here + # rather than crashing? + print("Schedule has not valid path between ", n1_loc, "and", n2_loc, "at time", cycle) + return None + else: + noc_schedule.occupy_path(path) + + # I think we don't need the whole path? Not too sure though. + return len(path) + + def get_InitializationInterval(self, dfg): + # Compute the InitializationInterval of the current schedule. + + # We don't require the placement part to be actually correct --- + # do the actual schedule what we generate can differ + # from the schedule we have internally. + actual_schedule = InternalSchedule(self.cgra, dfg) + noc_schedule = NOCSchedule() # The NOC schedule is recomputed + # every time because it is dependent on the actual + # schedule. + buffer_schedule = BufferSchedule() + + # What cycle does this node get executed on? + cycles_start = {} + # What cycle does the result of this node become + # available on? + cycles_end = {} + + # Keep track of when resources can be re-used. + freed = {} # When we're done + used = {} # When we start + + # We keep track of whether scheduling is finished + # elsewhere --- this is just a sanity-check. + finished = True + + # Step 1 is to iterate over all the nodes + # in a BFS manner. + for node in dfg.bfs(): + # For each node, compute the latency, + # and the delay to get the arguments to + # reach it. + preds = dfg.get_preds(node) + + # Get the time that this operation has + # been scheduled for. + scheduled_time, loc = self.get_location(node) + earliest_time = scheduled_time + + if scheduled_time is None: + finished = False + # This is not a complete operation + continue + + if CGRACompileSettings['DebugGetInitializationInterval']: + print("Looking at node ", node) + print("Has preds ", [str(p) for p in preds]) + + arrival_times = [] + for pred in preds: + if pred.name not in cycles_end: + finished = False + continue + + pred_cycle = cycles_end[pred.name] + if CGRACompileSettings['DebugGetInitializationInterval']: + print ("Have pred that finishes at cycle", pred_cycle) + + # Compute the time to this node, and + # reserve those paths on the NoC. + distance = self.compute_and_reserve_communication_distance(pred_cycle, pred, node, noc_schedule) + + if CGRACompileSettings['DebugGetInitializationInterval']: + print("Failed due to distance not working", distance) + if distance is None: + # This schedule isn't possible due to conflicting memory requirements. + return None, False + + # Compute when this predecessor reaches this node: + arrival_time = distance + pred_cycle + earliest_time = max(earliest_time, arrival_time) + arrival_times.append(arrival_time) + + # Setup the buffering requirements: + for arr_time in arrival_times: + ntim, nloc = self.get_location(node) + reserved = buffer_schedule.occupy_buffer(nloc, arrival_time, earliest_time) + if not reserved: + # This schedule isn't possible due to buffering requirements. + # TODO --- can we delay computation to get the buffering + # satified? + return None, False + + # TODO --- compute a penalty based on the gap between + # operations to account for buffering. + + # Check that the PE is actually free at this time --- if it + # isn't, push the operation back. + latency = operation_latency(node.operation) + free_time = actual_schedule.get_free_time(earliest_time, latency, loc) + actual_schedule.set_operation(free_time, loc, node, latency) + if free_time != earliest_time: + # We should probably punish the agent for this. + # Doesn't have any correctness issues as long as we + # assume infinite buffering (which we shouldn't do, and + # will eventually fix). + print("Place failed to place node in a sensible place: it is already in use!") + + # TODO --- do we need to punish this more? (i.e. integrate + # buffering requirements?) + + # This node should run at the earliest time available. + cycles_start[node.name] = free_time + cycles_end[node.name] = free_time + operation_latency(node.operation) + + if CGRACompileSettings['DebugGetInitializationInterval']: + print("Node ", node.name, "has earliest time", earliest_time) + + # Now that we've done that, we need to go through all the nodes and + # work out the InitializationInterval. + # When was this computation slot last used? (i.e. when could + # we overlap the next iteration?) + min_InitializationInterval = 0 + for loc in actual_schedule.locations(): + # Now, we could achieve better performance + # by overlapping these in a more fine-grained + # manner --- but that seems like a lot of effort + # for probably not much gain? + # there ar probably loops where the gain + # is not-so-marginal. + if actual_schedule.has_use(loc): + # Can only do this for PEs that actually have uses! + last_free = max(actual_schedule.free_times(loc)) + first_alloc = min(actual_schedule.alloc_times(loc)) + + difference = last_free - first_alloc + if CGRACompileSettings['DebugGetInitializationInterval']: + print ("Diff at loc", loc, "is", difference) + min_InitializationInterval = max(min_InitializationInterval, difference) + + # TODO --- we should probably return some kind of object + # that would enable final compilation also. + return min_InitializationInterval, finished + +# Create a dummy CGRA that is a bunch of PEs in a row with neighbor-wise communciations +nodes = [1, 2, 3, 4] +neighbours_dict = {} +for n in range(1, len(nodes)): + neighbours_dict[n] = [n + 1, n - 1] +neighbours_dict[0] = [n + 1] +neighbours_dict[len(nodes)] = [n - 1] + +compilation_session_noc = DictNOC(nodes, neighbours_dict) +compilation_session_cgra = CGRA(nodes, compilation_session_noc) + +action_space = [ActionSpace(name="Schedule", + space=Space( + named_discrete=NamedDiscreteSpace( + name=[str(x) for x in compilation_session_cgra.cells_as_list()] + ) + # int64_box=Int64Box( + # low=Int64Tensor(shape=[2], value=[0, 0]), + # high=Int64Tensor(shape=[2], value=[compilation_session_cgra.x_dim, compilation_session_cgra.y_dim]) + # ) + ) + ) + ] + +MAX_WINDOW_SIZE = 100 + +# This is here rather than in the RP environment because +# it's needed to define the observation space. +rlmap_time_depth = 20 +# Have an entry for each cell in the compilation_session CGRA and also +# a note of the current operation +rlmap_tensor_size = ((compilation_session_cgra.dim + 1) * (compilation_session_cgra.dim + 1)) * rlmap_time_depth + 1 +relative_placement_directions = ["no_action", "up", "down", "north", "south", "east", "west", "sooner", "later"] +observation_space = [ + # ObservationSpace( + # name="dfg", + # space=Space( + # string_value=StringSpace(length_range=(Int64Range(min=0))) + # ), + # deterministic=True, + # platform_dependent=False, + # default_observation=Event(string_value="") + # ), + ObservationSpace(name="ir", + space=Space( + # TODO -- I think this should be a window of operations + # around the current one. + int64_sequence=Int64SequenceSpace(length_range=Int64Range(min=0, max=MAX_WINDOW_SIZE), scalar_range=Int64Range(min=0, max=len(Operations))) + ) + ), + ObservationSpace(name="CurrentInstruction", + space=Space( + int64_value=Int64Range(min=0, max=len(Operations)), + # TODO -- also need to figure out how to make this + # a graph? + ), + deterministic=True, + platform_dependent=False + ), + ObservationSpace(name="CurrentInstructionIndex", + space=Space( + int64_value=Int64Range(min=0, max=MAX_WINDOW_SIZE) + )), + ObservationSpace(name="InitializationInterval", + space=Space( + int64_value=Int64Range(min=0) + )), + ObservationSpace(name="RLMapObservations", + space=Space( + int64_box=Int64Box( + low=Int64Tensor(shape=[rlmap_tensor_size], value=([0] * rlmap_tensor_size)), + high=Int64Tensor(shape=[rlmap_tensor_size], value=([100000] * rlmap_tensor_size)) + ) + ) + ) + + # ObservationSpace( + # name="Schedule", + # space=Space( + # int64_box=Int64Box( + # low=Int64Tensor(shape=[2], value=[0, 0]), + # high=Int64Tensor(shape=[2], value=[cgra.x_dim, cgra.y_dim]) + # ) + # ) + # ) + ] + +class CGRASession(CompilationSession): + def __init__(self, working_directory: Path, action_space: ActionSpace, benchmark: Benchmark): + super().__init__(working_directory, action_space, benchmark) + logging.info("Starting a compilation session for CGRA" + str(self.cgra)) + # Load the DFG (from a test_dfg.json file): + self.dfg = pickle.loads(benchmark.program.contents) + self.schedule = Schedule(self.cgra, self.dfg) + + self.current_operation_index = 0 + self.time = 0 # Starting schedulign time --- we could do + # this another way also, by asking the agent to come up with a raw + # time rather than stepping through. + # TODO -- load this properly. + self.dfg_to_ops_list() + + def reset(self, + benchmark: Optional[Union[str, Benchmark]] = None, + action_space: Optional[str] = None, + observation_space: Union[ + OptionalArgumentValue, str, ObservationSpaceSpec + ] = OptionalArgumentValue.UNCHANGED, + reward_space: Union[ + OptionalArgumentValue, str, Reward + ] = OptionalArgumentValue.UNCHANGED, + ): + print("Reset started") + if benchmark is not None: + self.dfg = pickle.loads(benchmark.program.contents) + else: + self.dfg = None + self.schedule = Schedule(self.cgra, self.dfg) + self.current_operation_index = 0 + self.time = 0 + print("Reset complete") + + def dfg_to_ops_list(self): + # Embed the DFG into an operations list that we go through --- + # it contains two things: the name of the node, and the index + # that corresponds to within the Operations list. + self.ops = [] + self.node_order = [] + for op in self.dfg.bfs(): + # Do we need to do a topo-sort here? + ind = operation_index_of(op.operation) + if ind == -1: + print("Did not find operation " + str(op.operation) + " in the set of Operations") + assert False + + self.ops.append(ind) + self.node_order.append(op) + + cgra = compilation_session_cgra + action_spaces = action_space + + observation_spaces = observation_space + # TODO --- a new observation space corresponding to previous actions + + def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool]: + # print("Action has fields {}".format(str(action.__dict__))) + print("Action is {}".format(str(action))) + + response = action.int64_value + if response == -1: + # Do a reset of the env: + self.reset() + return False, None, True + + # Update the CGRA to schedule the current operation at this space: + # Take 0 to correspond to a no-op. + had_effect = False + if response > 0: + # Schedule is set up to take the operation at the response index + # index - 1. + if self.current_operation_index >= len(self.node_order): + # We've scheduled past the end! + return False, None, False + + node = self.node_order[self.current_operation_index] + latency = operation_latency(node.operation) + op_set = self.schedule.set_operation(self.time, response - 1, node, latency) + + # Check that the InitializationInterval still exists: + InitializationInterval, finished = self.schedule.get_InitializationInterval(self.dfg) + has_InitializationInterval = InitializationInterval is not None + if not has_InitializationInterval: + # Unset that operation: + print("Setting operation resulted in failed DFG mapping") + print(self.schedule) + self.schedule.clear_operation(self.time, response - 1, latency) + print("After clearning, have") + print(self.schedule) + op_set = False # Need to punish. + new_InitializationInterval, _ = self.schedule.get_InitializationInterval(self.dfg) + assert (new_InitializationInterval is not None) # This should not + # be non-existent after un-scheduling. + + if op_set: + had_effect = True + print("Scheduled operation", str(self.node_order[self.current_operation_index])) + print("Got an InitializationInterval of ", InitializationInterval) + self.current_operation_index += 1 + elif response == 0: + self.time += 1 + + done = False + if self.current_operation_index >= len(self.ops): + done = True + + print("At end of cycle, have schedule") + print(self.schedule) + print("Done is ", done) + + return done, None, had_effect + + def get_observation(self, observation_space: ObservationSpace) -> Event: + logging.info("Computing an observation over the space") + + if observation_space.name == "ir": + # TODO --- This should be a DFG? + return Event(int64_tensor=Int64Tensor(shape=[len(self.ops)], value=self.ops)) + elif observation_space.name == "Schedule": + # TODO -- needs to return the schedule for the past + # CGRA history also? + box_value = self.schedule.current_iteration + return Event(int64_box_value=box_value) + elif observation_space.name == "CurrentInstruction": + # Return the properties of the current instruction. + if self.current_operation_index >= len(self.ops): + # I don't get why this is ahpepning --- just make + # sure the agent doesn't yse this. I think it + # might happen on the last iteration. + return Event(int64_value=-1) + else: + return Event(int64_value=self.ops[self.current_operation_index]) + elif observation_space.name == "CurrentInstructionIndex": + # Return a way to localize the instruction within the graph. + return Event(int64_value=self.current_operation_index) + elif observation_space.name == "InitializationInterval": + print("Computing InitializationInterval for schedule:") + print(self.schedule) + ii, finished = self.schedule.get_InitializationInterval(self.dfg) + print("Got InitializationInterval", ii) + print ("Finished is ", finished) + return Event(int64_value=ii) + elif observation_space.name == "RLMapObservations": + print("Getting RLMap Observations") + print("Observation space is " + str(type(observation_space))) + current_operation_index = self.current_operation_index + node = self.node_order[current_operation_index] + # TODO --- add encoding of the CGRA constraints (not required for faithful + # reimplementation of RLMap, but probably required for a fair comparison.) + schedule_encoding = self.schedule.to_rlmap_tensor(node, time_window_size=rlmap_time_depth) + + full_res = [current_operation_index] + schedule_encoding + if len(full_res) != rlmap_tensor_size: + print("Tensor sizes don't match!", len(full_res), ' and ', rlmap_tensor_size) + assert False + + return Event(int64_tensor=Int64Tensor(shape=[len(full_res)], value=full_res)) + +def make_cgra_compilation_session(): + return CGRASession \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/compiler_gym-cgra-service b/compiler_gym/envs/cgra/service/compiler_gym-cgra-service new file mode 100755 index 000000000..e24392676 --- /dev/null +++ b/compiler_gym/envs/cgra/service/compiler_gym-cgra-service @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import os +import traceback + +from compiler_gym.envs.cgra.service.cgra_service import make_cgra_compilation_session +from compiler_gym.service.runtime import create_and_run_compiler_gym_service + +if __name__ == "__main__": + # TODO(jcw) --- load a CGRA description? + try: + create_and_run_compiler_gym_service(make_cgra_compilation_session()) + except: + print(traceback.format_exc()) + raise \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service b/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service new file mode 100755 index 000000000..756e79ab0 --- /dev/null +++ b/compiler_gym/envs/cgra/service/compiler_gym-relative-placement-cgra-service @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. +import os +import traceback + +from compiler_gym.envs.cgra.service.relative_placement_service import make_cgra_compilation_session +from compiler_gym.service.runtime import create_and_run_compiler_gym_service + +if __name__ == "__main__": + try: + create_and_run_compiler_gym_service(make_cgra_compilation_session()) + except: + print(traceback.format_exc()) + raise \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/relative_cgra_env.py b/compiler_gym/envs/cgra/service/relative_cgra_env.py new file mode 100644 index 000000000..a6c325c48 --- /dev/null +++ b/compiler_gym/envs/cgra/service/relative_cgra_env.py @@ -0,0 +1,42 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from compiler_gym.util.gym_type_hints import OptionalArgumentValue +from compiler_gym.service.client_service_compiler_env import ClientServiceCompilerEnv +from pathlib import Path +from typing import Iterable, List, Optional, Union, cast +from compiler_gym.datasets import Benchmark + +from compiler_gym.envs.cgra.datasets import get_cgra_datasets +from compiler_gym.envs.cgra.cgra_rewards import IntermediateInitializationIntervalReward, FinalInitializationIntervalReward + +class RelativeCgraEnv(ClientServiceCompilerEnv): + def __init__(self, *args, punish_intermediate: bool = True, datasets_site_path: Optional[Path] = None, benchmark: Optional[Union[str, Benchmark]], **kwargs): + if punish_intermediate: + reward = IntermediateInitializationIntervalReward() + else: + reward = FinalInitializationIntervalReward() + super().__init__( + *args, + **kwargs, + benchmark = benchmark or "dfg_10/1", + datasets=get_cgra_datasets(site_data_base=datasets_site_path), + rewards=[reward], + derived_observation_spaces=[] + ) + + def reset(self, reward_space = OptionalArgumentValue.UNCHANGED, *args, **kwargs): + observation = super().reset(reward_space=reward_space, *args, **kwargs) + + return observation + + def make_benchmark(self, inputs, copt, system_include: bool = True, timeout: int=600): + return None + + def render(self, mode="human"): + if mode == "human": + print("Human visible schedule") + else: + return self.render(mode) \ No newline at end of file diff --git a/compiler_gym/envs/cgra/service/relative_placement_service.py b/compiler_gym/envs/cgra/service/relative_placement_service.py new file mode 100644 index 000000000..0344bfe82 --- /dev/null +++ b/compiler_gym/envs/cgra/service/relative_placement_service.py @@ -0,0 +1,270 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from pathlib import Path +from typing import Tuple, Optional, Union, List +from compiler_gym.envs.cgra.service.cgra_service import BufferSchedule, CGRASession, NOCSchedule, observation_space, Schedule, CGRA, relative_placement_directions +from compiler_gym.envs.cgra.compile_settings import CGRACompileSettings, RelativePlacementSettings +import random +from compiler_gym.spaces import Reward +import traceback +from compiler_gym.service import CompilationSession +from compiler_gym.util.gym_type_hints import ObservationType, OptionalArgumentValue +from compiler_gym.views import ObservationSpaceSpec +from compiler_gym.service.proto import ( +ActionSpace, +Benchmark, +DoubleRange, +Event, +Int64Box, +Int64Range, +Int64Tensor, +NamedDiscreteSpace, +ObservationSpace, +Space, +StringSpace +) +import pickle + +""" +Unlike in direct placement, in relative placement, we take an operation and schedule it +to it's nearby neighbours that support the operation. + +""" + +action_space = [ + ActionSpace(name="move", + space=Space( + named_discrete=NamedDiscreteSpace( + # has a max of 9 connection dimensions (none, up, down, n, s, e, w, sooner, later) + name=relative_placement_directions + ) + )) +] + +class RelativePlacementCGRASession(CGRASession): + def __init__(self, working_directory: Path, action_space: ActionSpace, benchmark: Benchmark): + try: + print("Initailziing relplace session") + super().__init__(working_directory, action_space, benchmark) + + # For the relative placmenet CGRA, we need to come up with an initial placement strategy. + # While it may not be important for all classes of algorithm that this is consistent + # after every reset, it is important for some (e.g. genetic algorithms) + self.dfg = pickle.loads(benchmark.program.contents) + print("Loaded DFG " + str(self.dfg)) + # TODO(jcw) --- support better seeds. + self.schedule = Schedule(self.cgra, self.dfg) + self.initial_placement = self.get_initial_placement(self.dfg, 0) + + # At the same time, the results of this are sensitive to the starting position, + # so, it's important that we can control the starting position. + self.current_operation_index = 0 + + # This is a constant that says how many times we should iterate over the array. + self.max_iterations = 10 + + self.iteration_number = 0 + except Exception as e: + print(traceback.format_exc()) + raise e + + + observation_spaces: List[ObservationSpace] = observation_space + action_spaces = action_space + + def reset(self, + benchmark: Optional[Union[str, Benchmark]] = None, + action_space: Optional[str] = None, + observation_space: Union[ + OptionalArgumentValue, str, ObservationSpaceSpec + ] = OptionalArgumentValue.UNCHANGED, + reward_space: Union[ + OptionalArgumentValue, str, Reward + ] = OptionalArgumentValue.UNCHANGED, + ): + try: + return super().reset(benchmark, action_space, observation_space, reward_space) + except Exception as e: + print(traceback.format_exc()) + raise e + + def get_initial_placement(self, dfg, seed): + mode = CGRACompileSettings['InitialPlacementMode'] + if mode == 'random': + self.get_initial_placement_random(dfg, seed) + elif mode == 'first_avail': + self.get_initial_placement_nth_avail(dfg, seed, 1) + elif mode == 'second_avail': + # First avail results in compressed sequences. + # second avail spreads things out better? + self.get_initial_placement_nth_avail(dfg, seed, 2) + elif mode == 'lee2021': + self.get_initial_placement_linear(dfg, seed) + + def get_initial_placement_nth_avail(self, dfg, seed, n): + nodes = dfg.bfs() + noc_schedule = NOCSchedule() + buffer_schedule = BufferSchedule() + + for node in nodes: + # Take the first possible placmenent + dependences = dfg.get_preds(node) + lat = node.operation.latency + poss_placements = self.schedule.get_valid_slots(dependences, lat, noc_schedule, buffer_schedule) + i = n + t_placement, loc = None, None + while i > 0: + t_placement, loc, required_paths, required_buffer_placements = next(poss_placements) + i -= 1 + self.schedule.set_operation(t_placement, loc, node, node.operation.latency) + for path in required_paths: + noc_schedule.occupy_path(path) + for from_time, to_time in required_buffer_placements: + buffer_schedule.occupy_buffer(loc, from_time, to_time) + + # When using the first_avail placement, it should result + # in a valid schedule to start with. + initial_InitializationInterval, _ = self.schedule.get_InitializationInterval(dfg) + print("After initial placement (mode, first_avail), got InitializationInterval", initial_InitializationInterval) + assert initial_InitializationInterval is not None #should be a valid schedule. + + + # Do a random initial placment --- requires + # extensively smart agents to then go and correct this. + def get_initial_placement_random(self, dfg, seed): + max_pe = self.cgra.cells_as_list() + nodes = dfg.bfs() + time = 0 + # RODO -- setup seed. + + was_set = False + iterating = True + while iterating: + if was_set: + n = next(nodes, None) + if n is None: + iterating = False + continue + else: + # Try at new time + time += 1 + pe_ind = random.randomint(0, len(max_pe) - 1) + was_set = self.schedule.set_operation(time, pe_ind, n, n.operation.latency) + + # This is like a crappy approxiation of Lee 2021 DAC (Crappy + # because it's not guaranteed to give you the right thing.) + # It's also not quite that --- because that was truly on + # he diagonal, while this is using a zig-zag approach. + def get_initial_placement_linear(self, dfg, seed): + # For now, just place the nodes in order on the CGRA. + # Iterate through the PEs, and then increment the clock cycle + # if we can't place. + pe_ind = 0 + time = 0 + max_pe = self.cgra.cells_as_list() + nodes = dfg.bfs() + iterating = True + was_set = True + while iterating: + # only move to next node if we properly set the operation last time. + if was_set: + n = next(nodes, None) + if n is None: + # Finished scheduling! + iterating = False + continue + was_set = self.schedule.set_operation(time, pe_ind, n, n.operation.latency) + if was_set: + print("Set initial placement for node", str(n)) + print("Position is ", self.schedule.get_location(n)) + + # TODO -- should we check that this produces a schedule with an InitializationInterval? + # Aim is to start with a very spread-out schedule that should just work --- + # let the SA algorithm compress it, rather than trying to make + # the SA algorithm find a valid schedule. + pe_ind += 1 + time += n.operation.latency + if pe_ind >= len(max_pe): + pe_ind = 0 + + def apply_action(self, action: Event) -> Tuple[bool, Optional[ActionSpace], bool]: + try: + if self.iteration_number == self.max_iterations: + # The iteration is finished. + return True, None, False + + step = action.int64_value + print("Got step ", step) + action_to_do = relative_placement_directions[step] + print("Got step", step, "which entails moving in direction", action_to_do) + # Get the ndoe --- the dfg.nodes is a dict, so need to access through the names + # list. + current_operation_node_name = self.dfg.node_names[self.current_operation_index] + current_operation = self.dfg.nodes[current_operation_node_name] + + current_time, current_location = self.schedule.get_location(current_operation) + print("For node ", current_operation, "found location", current_location) + new_time = current_time + new_location = current_location + if action_to_do == "sooner": + new_time -= 1 + elif action_to_do == "later": + new_time += 1 + else: + if action_to_do == "no_action": + new_location = None + else: + new_location = self.cgra.get_neighbour(action_to_do, current_location) + + print("Before swap, InitializationInterval is ", self.schedule.get_InitializationInterval(self.dfg), "iteration is ", self.iteration_number) + if new_location is not None: + print("Swapping between", current_location, 'and', new_location) + swapped = self.schedule.swap(current_time, current_location, new_time, new_location, self.dfg, allow_invalid=RelativePlacementSettings['AllowInvalidIntermediateSchedules']) + else: + # If the new location is none, that means that we picked a direction + # that is invalid (ie. doesn't exist for the node in question). To make + # it easier on the RL/GA algorithms, we'll just silently skiup this here. + swapped = False + + # Prepare for next iteration: + self.current_operation_index += 1 + if self.current_operation_index > len(self.dfg.nodes) - 1: + # Wrap around for another pass through the nodes. + self.current_operation_index = 0 + self.iteration_number += 1 + + print("After iteration, schedule is ", self.schedule) + print("Swapped is ", swapped) + print("InitializationInterval is ", self.schedule.get_InitializationInterval(self.dfg)) + + return False, None, swapped + except Exception as e: + print(traceback.format_exc()) + raise e + + def get_observation(self, observation_space: ObservationSpace) -> Event: + try: + result = super().get_observation(observation_space=observation_space) + if observation_space.name == 'InitializationInterval': + ii, finished = self.schedule.get_InitializationInterval(self.dfg) + if not finished: + # The RLLib library can't handle nones, so + # Just return a large punishment if this fails + # to schedule. + result = Event(int64_value=-100) + else: + result = Event(int64_value=-ii) + print ("got the result.", result) + elif observation_space.name == 'RLMapObservations': + # print("Got RLMap Observations", result.int64_tensor) + pass + return result + except Exception as e: + print(traceback.format_exc()) + raise e + +def make_cgra_compilation_session(): + return RelativePlacementCGRASession \ No newline at end of file diff --git a/compiler_gym/envs/cgra/test/test_dfg.json b/compiler_gym/envs/cgra/test/test_dfg.json new file mode 100644 index 000000000..739212a55 --- /dev/null +++ b/compiler_gym/envs/cgra/test/test_dfg.json @@ -0,0 +1,54 @@ +{ + "entry_points": ["n3", "n5"], + "nodes": [{ + "operation": "add", + "name": "n1" + }, + {"operation": "mul", + "name": "n2"}, + { + "operation": "load", + "name": "n3" + }, + { + "operation": "store", + "name": "n4" + }, + { + "operation": "load", + "name": "n5" + } + ], + "edges": [ + { + "name": "e1", + "from": "n3", + "to": "n1", + "type": "data" + }, + { + "name": "e2", + "from": "n5", + "to": "n1", + "type": "data" + }, + { + "name": "e3", + "from": "n1", + "to": "n2", + "type": "data" + }, + { + "name": "e4", + "from": "n3", + "to": "n2", + "type": "data" + }, + { + "name": "e5", + "from": "n2", + "to": "n4", + "type": "data" + } + ] +} diff --git a/examples/cgra/.gitignore b/examples/cgra/.gitignore new file mode 100644 index 000000000..11584e79a --- /dev/null +++ b/examples/cgra/.gitignore @@ -0,0 +1,3 @@ +ga_output +relative_placement_output/out +relative_placement_output/rp_data \ No newline at end of file diff --git a/examples/cgra/ga.py b/examples/cgra/ga.py new file mode 100644 index 000000000..1afdd997e --- /dev/null +++ b/examples/cgra/ga.py @@ -0,0 +1,332 @@ +"""Perform a GA of the action space of a CompilerGym environment. + +Use the RandomWalk python script to generate the initial candidates, then +use a GA to select the best one. + +To make this somewhat tractable, this assumes that the environemnt +takes -1 as an action, and that -1 resets the environment. +TODO -- Support environments that don't do that. +""" + +import random +import math + +import humanize +from random_walk import run_random_walk +from compiler_gym.datasets import benchmark +from absl import app, flags + +from typing import Set, List + +from compiler_gym.envs import CompilerEnv +from compiler_gym.util.gym_type_hints import ActionType +from compiler_gym.util.flags.benchmark_from_flags import benchmark_from_flags +from compiler_gym.util.flags.env_from_flags import env_from_flags +from compiler_gym.util.shell_format import emph +from compiler_gym.util.timer import Timer + +from compiler_gym.random_search import random_search + +import numpy as np + +# Dict for keeping track of various debugging/information counters. +ga_stats = { } + +def reset_ga_stats(): + global ga_stats + + ga_stats['valid_candidates_during_compute_score'] = 0 + ga_stats['invalid_candidates_during_compute_score'] = 0 + ga_stats['incomplete_candidates_during_compute_score'] = 0 + ga_stats['candidates_randomly_skipped'] = 0 + ga_stats['candidates_scored'] = 0 + +reset_ga_stats() +print("Initialized Counters") + +if __name__ == "__main__": + flags.DEFINE_boolean( + "variable_length_sequences", + False, + "Use a crossover algorithm that supports generation of variable length sequences" + ) + flags.DEFINE_boolean( + "print_counters", + False, + "Print Internal Compile Conters" + ) + flags.DEFINE_integer( + "iters", + 12, + "Min numbrt og iterations" + ) + flags.DEFINE_integer( + "generation_size", + 32, + "number of candidates to track" + ) + flags.DEFINE_integer( + "initialization_steps", + 100, + "number of steps to initialize the initial elements." + ) + flags.DEFINE_integer( + "max_cands", + 1000, + "max number of candidates to add in crossover." + ) + flags.DEFINE_float( + "length_preservation_factor", + 0.9, + "how much to discount different length crossovers. (formual is N^(length difference))" + ) + flags.DEFINE_float( + "reduction_factor", + 0.1, + "what fraction to reduce the number of generated candidates by (0.1 is 10 percent of generated candidates are carried forward to evaluation)" + ) + flags.DEFINE_boolean( + "refill", + False, + "refill the candidates list using randomly generated candidates if it is too small after each generation" + ) + flags.DEFINE_integer( + "expected_candidates", + 1000, + "How many candidates to take during crossover (in expectation) (only for fixed length --- see reduction factor for variable length)" + ) + FLAGS = flags.FLAGS + +class Candidate: + def __init__(self, actions): + self.actions = actions + self.score = None + self.failed = True + + def copy(self): + new_cand = Candidate(self.actions[:]) + new_cand.score = self.score + new_cand.failed = self.failed + + return new_cand + + def __str__(self): + return "Actions: " + str(self.actions) + ", score " + str(self.score) + " (failed: " + str(self.failed) + ")" + + def score(self): + return self.score + + def compute_score(self, env, reward_space_name): + ga_stats['candidates_scored'] += 1 + env.reset() + assert len(self.actions) > 0 + + print ("Computing score for actions " + str(self.actions)) + reward = -100000 # Largest reward is best + failed = True # Empty schedules marked as failing (Is this a good idea?) + try: + done = False + for action in self.actions: + failed = False + observation, reward, done, info = env.step(action) + # TODO -- Should we check if we finsihed early? + if done: + ga_stats['valid_candidates_during_compute_score'] += 1 + failed = False + else: + ga_stats['incomplete_candidates_during_compute_score'] += 1 + failed = True + except: + # TODO -- can we do better? + failed = True + ga_stats['invalid_candidates_during_compute_score'] += 1 + + if failed: + reward = -10000000 + self.score = reward + self.failed = failed + return reward, failed + +def mutate(cands: Set[List[ActionType]]): + new_set = set() + for c in cands: + if random.randint(0, 1) == 0: + # TODO -- pick a better probability of that? + max_action = max(c.actions) # TODO --- select from all actions not just from max seen. + new_c = c.copy() + new_c.actions[random.randint(0, len(c.actions) - 1)] = random.randint(0, max_action) + new_set.add(new_c) + new_set.add(c) + + return new_set + +# This is a much simpler method that produces a much smaller set for +# fixed-lenght sequences. The crossover_variable_length method +# has a tendency to explode under long action sequences. +def crossover_fixed_length(cands: Set[List[ActionType]]): + new_cands = set() + for c in cands: + cand_count = len(c.actions) * len(cands) * len(cands) + fraction_taken = float(FLAGS.expected_candidates) / float(cand_count) + + for c2 in cands: + for i in range(len(c.actions)): + should_add = random.random() < fraction_taken + if not should_add: + continue + + new_cand = Candidate(c.actions[:i] + c2.actions[i:]) + new_cands.add(new_cand) + + print("Generated ", len(new_cands), "candidates") + return new_cands + +def crossover_variable_length(cands: Set[List[ActionType]]): + # Is there a better way to do this? + # For the CGRA env, these are not fixed length. + + # Try to keep the number generated down a bit: + naive_number = 0 + for c in cands: + for c2 in cands: + naive_number += len(c.actions) * len(c2.actions) + + print ("Naively would add " + str(naive_number) + " candidates") + new_candidates = set() + for cand in cands: + for i in range(len(cand.actions)): + cand_head = cand.actions[:i] + + # Before we bother iterating, sandwich the range here. + length_diff = len(cand.actions) - len(cand_head) + # So the mean length added should be that lenght diff + # formula is pow(factor, abs(act_lengh - length_diff)) + # Threshold at like 10% + bound_value = 0.10 + bounds = int(math.log(bound_value, FLAGS.length_preservation_factor)) + for other_cand in cands: + for j in range(max(length_diff - bounds, 0), min(length_diff + bounds, len(other_cand.actions))): + # Don't add everything with certainty. Exponential backoff on size. + probability = pow(FLAGS.length_preservation_factor, abs(len(cand.actions) - (len(cand_head) + j))) + if (random.random() < probability) and (random.random() < FLAGS.reduction_factor): + other_cand_tail = other_cand.actions[j:] + + new_candidates.add(Candidate(cand_head + other_cand_tail)) + else: + ga_stats['candidates_randomly_skipped'] += 1 + print("Generated " + str(len(new_candidates)) + " new candidates") + + return new_candidates.union(cands) + +def get_best(cands, count=1): + filtered_cands = list(filter(lambda f: (not f.failed), cands)) + print("Got ", len(filtered_cands), "filtered cands from ", len(cands), "original cands") + sorted_cands = sorted(filtered_cands, key=lambda e: -e.score) + assert count > 0 + best_score = sorted_cands[0].score + + result = set() + for cand in sorted_cands[:count]: + result.add(cand) + + return result, best_score + +def compute_individual_fitness(inps): + cand, env = inps + cand.compute_score(env, FLAGS.reward) + return cand + +def compute_set_fitness(inps): + cands, env = inps + env.reset() + for c in cands: + compute_individual_fitness((c, env)) + +# For splitting up array a for multi core +def split_array(a, n): + new_arrs = [] + for i in range(n): + new_arrs.append([]) + + ind = 0 + for elem in a: + new_arrs[ind].append(elem) + ind += 1 + ind = ind % n + + return new_arrs + +def compute_fitness(cands, benchmark): + with env_from_flags(benchmark=benchmark) as env: + env.reset() + for cand in cands: + compute_individual_fitness((cand, env)) + + return cands + + +def run_ga(benchmark: benchmark.Benchmark, step_count: int, initial_candidates: Set[List[ActionType]]) -> None: + # Create an optimizer + + iter_number = 0 + candidate_count = len(initial_candidates) + + current_candidates = initial_candidates + + while iter_number < step_count: + if (len(current_candidates)) < candidate_count and FLAGS.refill: + current_candidates += generate_random_candidates(candidate_count - len(current_candidates), benchmark) + + print ("Starting iteration", iter_number, "with", len(current_candidates), "candidates") + mutations = mutate(current_candidates.copy()) + if FLAGS.variable_length_sequences: + crossed = crossover_variable_length(mutations) + else: + crossed = crossover_fixed_length(mutations) + fitness = compute_fitness(crossed, benchmark) + print ("Iter: " + str(iter_number) + " with generation size " + str(len(fitness))) + + current_candidates, best = get_best(fitness, count=candidate_count) + print ("After iteration " + str(iter_number) + " best score is " + str(best)) + + iter_number += 1 + + # Get the best one. + return get_best(current_candidates, count=1) + +def generate_random_candidates(number, this_benchmark): + cands = [] + with env_from_flags(benchmark=this_benchmark) as env: + env.reset() + for i in range(number): + print("Init Candidate ", i) + env.reset() + cands.append(Candidate(run_random_walk(env, FLAGS.initialization_steps))) + + return cands + +def main(argv): + print("Starting GA") + assert len(argv) == 1, f"Unrecognized flags: {argv[1:]}" + + this_benchmark = benchmark_from_flags() + initial_candidates = set(generate_random_candidates(FLAGS.generation_size, this_benchmark)) + + result, best_score = run_ga(this_benchmark, FLAGS.iters, initial_candidates) + for elt in result: + # This loop should only go once. + print ("Result is: ", elt) + + with env_from_flags(benchmark=this_benchmark) as env: + env.reset() + for action in elt.actions: + env.step(action) + + print ("Result env was", env, "best score was", best_score) + + if FLAGS.print_counters: + for field in ga_stats: + print (field, " = ", ga_stats[field]) + +if __name__ == "__main__": + app.run(main) diff --git a/examples/cgra/ga_scripts/.gitignore b/examples/cgra/ga_scripts/.gitignore new file mode 100644 index 000000000..04f7babeb --- /dev/null +++ b/examples/cgra/ga_scripts/.gitignore @@ -0,0 +1,2 @@ +output +cdfs.png diff --git a/examples/cgra/ga_scripts/ga_score_extractor.sh b/examples/cgra/ga_scripts/ga_score_extractor.sh new file mode 100755 index 000000000..ebea2697b --- /dev/null +++ b/examples/cgra/ga_scripts/ga_score_extractor.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +typeset -a results +if [[ $# -lt 2 ]]; then + echo "Usage: $0 " +fi + +output=$1 +echo "" -n > $output +shift +while [[ $# -gt 0 ]]; do + input=$1 + shift + + # Get the second to last line, which has the InitializationInterval for the graph. + ii=$(tail -n 2 $input | head -n 1 | cut -f 7 -d' ' ) + + echo "$ii, " >> $output +done diff --git a/examples/cgra/ga_scripts/plot.sh b/examples/cgra/ga_scripts/plot.sh new file mode 100755 index 000000000..2101bcd82 --- /dev/null +++ b/examples/cgra/ga_scripts/plot.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python plot_CDFs.py ga_output GA ../relative_placement_output/rp_data RLMap diff --git a/examples/cgra/ga_scripts/plot_CDFs.py b/examples/cgra/ga_scripts/plot_CDFs.py new file mode 100644 index 000000000..c2159c236 --- /dev/null +++ b/examples/cgra/ga_scripts/plot_CDFs.py @@ -0,0 +1,77 @@ +import matplotlib.pyplot as plt +import argparse +import numpy + +def load_cdf_from_file(f): + with open(f) as fle: + lines = ''.join(fle.readlines()) + data = [] + # Should only be one-line files? + for item in lines.split(','): + if item.strip(): + try: + data.append(abs(int(item.strip()))) + except: + # Plenty of reasons this could fail -- mostly due to + + # incomplete runs + pass + + print ("Loaded ", len(data), "items") + return data + +def compute_cdf(data): + sorted_data = sorted(data) + x_points = range(0, max(sorted_data)) + cdf = [0.0] * len(x_points) + sum_so_far = 0 + cdf_pointer = 0 + value_per_point = 1 #Treated as int to avoid FP accum issues. + for point in sorted_data: + cdf_pointer = int(float(len(x_points)) * float(point) / float(max(sorted_data))) - 1 + sum_so_far += value_per_point + + cdf[cdf_pointer] = float(sum_so_far) / float(len(sorted_data)) + + return x_points, cdf + + +def plot_datas(datas, names): + # First, compute the CDF from the raw data + cdfs = [] + xvals = [] + for data in datas: + xvalus, cdf = (compute_cdf(data)) + xvals.append(xvalus) + cdfs.append(cdf) + + xvs_max = 0 + for i in range(len(cdfs)): + plt.plot(xvals[i], cdfs[i], label=names[i]) + xvs_max = max(max(xvals[i]), xvs_max) + + plt.ylim([0.0, 1.0]) + plt.xlim([0, xvs_max]) + plt.ylabel('CDF') + plt.xlabel('InitializationInterval') + plt.legend() + plt.savefig('cdfs.png') + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + # Alternate between files and names. + parser.add_argument('files', nargs='+') + args = parser.parse_args() + + datas = [] + names = [] + name = False + for file in args.files: + if name: + names.append(file) + else: + data = load_cdf_from_file(file) + datas.append(data) + name = not name + + plot_datas(datas, names) diff --git a/examples/cgra/relative_placement_model.py b/examples/cgra/relative_placement_model.py new file mode 100644 index 000000000..415974724 --- /dev/null +++ b/examples/cgra/relative_placement_model.py @@ -0,0 +1,110 @@ +from unittest.result import failfast +from compiler_gym.wrappers.datasets import CycleOverBenchmarks +from compiler_gym.envs.compiler_env import CompilerEnv +from compiler_gym.wrappers import TimeLimit +import compiler_gym +from ray import tune +from ray.rllib.agents.ppo import PPOTrainer +import ray +import matplotlib.pyplot as plt +from itertools import islice +import argparse + +def make_env() -> compiler_gym.envs.CompilerEnv: + env = compiler_gym.make( + "relative-cgra-v0", + observation_space="RLMapObservations", + reward_space="InitializationInterval", + action_space="move", + benchmark='dfg_10/0' # I think this gets overwritten in the running loop. + ) + env = TimeLimit(env, max_episode_steps=5) + + return env + +def plot_results(rewards): + plt.bar(range(len(rewards)), rewards) + plt.ylabel("Reward (higher better)") + plt.savefig('rewards.png') + +def run_agent_on_benchmarks(bmarks): + with make_env() as env: + rewards = [] + for i, benchmark in enumerate(bmarks, start=1): + observation, done = env.reset(benchmark=benchmark), False + reward = 0 + while not done: + action = int(agent.compute_action(observation)) + print(type(action)) + observation, reward, done, _ = env.step(action) + # Just append the last reward, because that is the InitializationInterval. (or a large -ve noting + # failure) + rewards.append(reward) + print ("Exectuted ", i, "th benchmark of", len(bmarks)) + return rewards + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run a recreation of the RLMap tool.") + parser.add_argument('--train', dest='train', default=False, action='store_true') + parser.add_argument('--test', dest='test', default=None) + parser.add_argument('--number', dest='number', default=50, help='Number of benchmarks to run test on.', type=int) + parser.add_argument('--train-size', dest='train_size', default=500, help='Number of benchmarks to train on', type=int) + args = parser.parse_args() + + with make_env() as env: + bench = env.datasets['dfg_10'] + train_benchmarks = list(islice(bench.benchmarks(), args.train_size + 50 + args.number)) + train_benchmarks, val_benchmarks, test_benchmarks = train_benchmarks[:args.train_size], train_benchmarks[args.train_size:args.train_size + 50], train_benchmarks[550:550 + args.number] + + print("Number of benchmarks for training: ", len(train_benchmarks)) + print("Number of benchmarks for vlaidation: ", len(val_benchmarks)) + print("Number of benchmarks for testing:", len(test_benchmarks)) + + def make_training_env(*args) -> compiler_gym.envs.CompilerEnv: + del args + return CycleOverBenchmarks(make_env(), train_benchmarks) + + tune.register_env("RLMap", make_training_env) + + if args.train: + if ray.is_initialized(): + ray.shutdown() + ray.init(include_dashboard=False, ignore_reinit_error=True) + analysis = tune.run( + PPOTrainer, + checkpoint_at_end=True, + stop={ + "episodes_total": 500 + }, + config={ + "seed": 0xCC, + "num_workers": 1, + "env": "RLMap", + "rollout_fragment_length": 5, + "train_batch_size": 5, + "sgd_minibatch_size": 5, + }, + ) + best_checkpoint = analysis.get_best_checkpoint( + metric="episode_reward_mean", + mode="max", + trial=analysis.trials[0] + ) + print("Best checkpoint is '", best_checkpoint, "'") + if args.test: + checkpoint = args.test + agent = PPOTrainer( + env='RLMap', + config={ + "num_workers": 1, + "seed": 0xCC, + "explore": False + } + ) + + agent.restore(checkpoint) + val_rewards = run_agent_on_benchmarks(test_benchmarks) + + plot_results(val_rewards) + else: + print("Not testing (use --test to also test)") diff --git a/examples/cgra/relative_placement_output/.gitignore b/examples/cgra/relative_placement_output/.gitignore new file mode 100644 index 000000000..c585e1938 --- /dev/null +++ b/examples/cgra/relative_placement_output/.gitignore @@ -0,0 +1 @@ +out \ No newline at end of file diff --git a/examples/cgra/relative_placement_output/relative_placement_score_extractor.sh b/examples/cgra/relative_placement_output/relative_placement_score_extractor.sh new file mode 100755 index 000000000..e7c9d913c --- /dev/null +++ b/examples/cgra/relative_placement_output/relative_placement_score_extractor.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +typeset -a results +if [[ $# -ne 2 ]]; then + echo "Usage $0 " +fi + +output=$2 +input=$1 +if [[ -f $output ]]; then + echo "Output $output already exists (will be overwritten, please manually delete)" + exit 1 +fi +results=( $(grep -e 'Exectuted' -B 1 $input | grep -e 'Computing Reward' | cut -f7 -d' ') ) + +for r in ${results[@]}; do + echo "$r, " >> $output +done \ No newline at end of file diff --git a/examples/cgra/run_ga_relative_placement.sh b/examples/cgra/run_ga_relative_placement.sh new file mode 100755 index 000000000..f946c175b --- /dev/null +++ b/examples/cgra/run_ga_relative_placement.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +rm -rf ga_output +mkdir -p ga_output +parallel 'echo "Starting iter {}"; python ga.py --nomp --max_cands 100 --env=cgra-v0 --benchmark=dfg_10/{} --reward=II &> ga_output/out_{}' ::: $(seq 0 10000) diff --git a/examples/cgra/run_relative_placement.sh b/examples/cgra/run_relative_placement.sh new file mode 100755 index 000000000..39c43509f --- /dev/null +++ b/examples/cgra/run_relative_placement.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +if [[ $# -ne 1 ]]; then + echo "Usage: $0 " + echo "Use the --train flag to train (on the python script)" + exit 1 +fi + +rm -f relative_placement_output/out +mkdir -p relative_placement_output +echo "Starting program" +python relative_placement_model.py --number 10000 --test $1 &> relative_placement_output/out diff --git a/examples/random_walk.py b/examples/random_walk.py index 44c1a6f33..19eadd93d 100644 --- a/examples/random_walk.py +++ b/examples/random_walk.py @@ -20,18 +20,22 @@ from compiler_gym.util.flags.env_from_flags import env_from_flags from compiler_gym.util.shell_format import emph from compiler_gym.util.timer import Timer +from compiler_gym.util.gym_type_hints import ActionType -flags.DEFINE_integer( - "step_min", - 12, - "The minimum number of steps. Fewer steps may be performed if the " - "environment ends the episode early.", -) -flags.DEFINE_integer("step_max", 256, "The maximum number of steps.") -FLAGS = flags.FLAGS +from typing import List +if __name__ == "__main__": + flags.DEFINE_integer( + "step_min", + 12, + "The minimum number of steps. Fewer steps may be performed if the " + "environment ends the episode early.", + ) + flags.DEFINE_integer("step_max", 256, "The maximum number of steps.") + FLAGS = flags.FLAGS -def run_random_walk(env: CompilerEnv, step_count: int) -> None: + +def run_random_walk(env: CompilerEnv, step_count: int) -> List[ActionType]: """Perform a random walk of the action space. :param env: The environment to use. @@ -40,12 +44,14 @@ def run_random_walk(env: CompilerEnv, step_count: int) -> None: environment to end the episode. """ rewards = [] + actions = [] step_num = 0 with Timer() as episode_time: env.reset() for step_num in range(1, step_count + 1): action_index = env.action_space.sample() + actions.append(action_index) with Timer() as step_time: observation, reward, done, info = env.step(action_index) print( @@ -76,6 +82,8 @@ def reward_percentage(reward, rewards): f"at step {humanize.intcomma(rewards.index(max(rewards)) + 1)})" ) + return actions + def main(argv): """Main entry point.""" diff --git a/setup.py b/setup.py index 24563f22b..83e07779f 100644 --- a/setup.py +++ b/setup.py @@ -122,6 +122,10 @@ def wheel_filename(**kwargs): "compiler_gym.envs.gcc.datasets", "compiler_gym.envs.gcc.service", "compiler_gym.envs.gcc", + "compiler_gym.envs.cgra.architectures", + "compiler_gym.envs.cgra.datasets", + "compiler_gym.envs.cgra.service", + "compiler_gym.envs.cgra", "compiler_gym.envs.loop_tool", "compiler_gym.envs.loop_tool.service", "compiler_gym.envs", @@ -146,6 +150,7 @@ def wheel_filename(**kwargs): "package_data": { "compiler_gym": [ "envs/gcc/service/compiler_gym-gcc-service", + "envs/cgra/service/*", "envs/loop_tool/service/compiler_gym-loop_tool-service", "third_party/csmith/csmith/bin/csmith", "third_party/csmith/csmith/include/csmith-2.3.0/*.h",