From 18ea9bdfea73f9652eea72b9bc03f097a4f0e28a Mon Sep 17 00:00:00 2001 From: Pierre-Luc Bacon Date: Thu, 20 Feb 2014 13:23:22 -0500 Subject: [PATCH 1/2] Fixes Pinball domain Fix some inconsistencies with RL-Glue. Add the ability to replay saved trajectories. --- pyrl/environments/pinball.py | 292 +++++++++++++++++++++-------------- 1 file changed, 177 insertions(+), 115 deletions(-) diff --git a/pyrl/environments/pinball.py b/pyrl/environments/pinball.py index 483405d..cb61537 100644 --- a/pyrl/environments/pinball.py +++ b/pyrl/environments/pinball.py @@ -8,8 +8,9 @@ """ +import sys import random -import argparse, os +import argparse import numpy as np from itertools import * @@ -34,25 +35,26 @@ class BallModel: """ DRAG = 0.995 - def __init__(self, start_position, radius): - """ - :param start_position: The initial position - :type start_position: float - :param radius: The ball radius - :type radius: float - """ + def __init__(self, start_position, radius, noise=0.03): + """ + :param start_position: The initial position + :type start_position: float + :param radius: The ball radius + :type radius: float + """ self.position = start_position self.radius = radius + self.noise = noise self.xdot = 0.0 self.ydot = 0.0 def add_impulse(self, delta_xdot, delta_ydot): - """ Change the momentum of the ball + """ Change the momentum of the ball :param delta_xdot: The change in velocity in the x direction - :type delta_xdot: float - :param delta_ydot: The change in velocity in the y direction - :type delta_ydot: float - """ + :type delta_xdot: float + :param delta_ydot: The change in velocity in the y direction + :type delta_ydot: float + """ self.xdot += delta_xdot/5.0 self.ydot += delta_ydot/5.0 self._clip(self.xdot) @@ -63,13 +65,20 @@ def add_drag(self): self.xdot *= self.DRAG self.ydot *= self.DRAG + def add_noise(self): + """ Corrupt the velocity with some Gaussian noise """ + self.xdot += np.random.normal(0.0, self.noise) + self.ydot += np.random.normal(0.0, self.noise) + self._clip(self.xdot) + self._clip(self.ydot) + def step(self): - """ Move the ball by one increment """ + """ Move the ball by one increment """ self.position[0] += self.xdot*self.radius/20.0 self.position[1] += self.ydot*self.radius/20.0 def _clip(self, val, low=-1, high=1): - """ Clip a value in a given range """ + """ Clip a value in a given range """ if val > high: val = high if val < low: @@ -84,10 +93,10 @@ class PinballObstacle: compute the appropriate effect to apply on the ball. """ def __init__(self, points): - """ - :param points: A list of points defining the polygon - :type points: list of lists - """ + """ + :param points: A list of points defining the polygon + :type points: list of lists + """ self.points = points self.min_x = min(self.points, key=lambda pt: pt[0])[0] self.max_x = max(self.points, key=lambda pt: pt[0])[0] @@ -100,8 +109,8 @@ def __init__(self, points): def collision(self, ball): """ Determines if the ball hits this obstacle - :param ball: An instance of :class:`BallModel` - :type ball: :class:`BallModel` + :param ball: An instance of :class:`BallModel` + :type ball: :class:`BallModel` """ self._double_collision = False @@ -131,11 +140,11 @@ def collision(self, ball): def collision_effect(self, ball): """ Based of the collision detection result triggered - in :func:`PinballObstacle.collision`, compute the + in :func:`PinballObstacle.collision`, compute the change in velocity. - :param ball: An instance of :class:`BallModel` - :type ball: :class:`BallModel` + :param ball: An instance of :class:`BallModel` + :type ball: :class:`BallModel` """ if self._double_collision: @@ -164,12 +173,12 @@ def collision_effect(self, ball): def _select_edge(self, intersect1, intersect2, ball): """ If the ball hits a corner, select one of two edges. - :param intersect1: A pair of points defining an edge of the polygon - :type intersect1: list of lists - :param intersect2: A pair of points defining an edge of the polygon - :type intersect2: list of lists - :returns: The edge with the smallest angle with the velocity vector - :rtype: list of lists + :param intersect1: A pair of points defining an edge of the polygon + :type intersect1: list of lists + :param intersect2: A pair of points defining an edge of the polygon + :type intersect2: list of lists + :returns: The edge with the smallest angle with the velocity vector + :rtype: list of lists """ velocity = np.array([ball.xdot, ball.ydot]) @@ -189,15 +198,15 @@ def _select_edge(self, intersect1, intersect2, ball): return intersect2 def _angle(self, v1, v2): - """ Compute the angle difference between two vectors + """ Compute the angle difference between two vectors - :param v1: The x,y coordinates of the vector - :type: v1: list - :param v2: The x,y coordinates of the vector - :type: v2: list - :rtype: float + :param v1: The x,y coordinates of the vector + :type: v1: list + :param v2: The x,y coordinates of the vector + :type: v2: list + :rtype: float - """ + """ angle_diff = np.arctan2(v1[0], v1[1]) - np.arctan2(v2[0], v2[1]) if angle_diff < 0: angle_diff += 2*np.pi @@ -206,13 +215,13 @@ def _angle(self, v1, v2): def _intercept_edge(self, pt_pair, ball): """ Compute the projection on and edge and find out - if it intercept with the ball. - :param pt_pair: The pair of points defining an edge - :type pt_pair: list of lists - :param ball: An instance of :class:`BallModel` - :type ball: :class:`BallModel` - :returns: True if the ball has hit an edge of the polygon - :rtype: bool + if it intercept with the ball. + :param pt_pair: The pair of points defining an edge + :type pt_pair: list of lists + :param ball: An instance of :class:`BallModel` + :type ball: :class:`BallModel` + :returns: True if the ball has hit an edge of the polygon + :rtype: bool """ # Find the projection on an edge @@ -268,9 +277,9 @@ class PinballModel: def __init__(self, configuration): """ Read a configuration file for Pinball and draw the domain to screen - :param configuration: a configuration file containing the polygons, + :param configuration: a configuration file containing the polygons, source(s) and target location. - :type configuration: str + :type configuration: str """ self.action_effects = {self.ACC_X:(1, 0), self.ACC_Y:(0, 1), self.DEC_X:(-1, 0), self.DEC_Y:(0, -1), self.ACC_NONE:(0, 0)} @@ -296,31 +305,50 @@ def __init__(self, configuration): self.target_pos = [float(tokens[1]), float(tokens[2])] self.target_rad = float(tokens[3]) elif tokens[0] == 'start': - start_pos = zip(*[iter(map(float, tokens[1:]))]*2) + start_pos = zip(*[iter(map(float, tokens[1:]))]*2) elif tokens[0] == 'ball': ball_rad = float(tokens[1]) self.ball = BallModel(list(random.choice(start_pos)), ball_rad) + def set_start_position(self, position): + """ Set the initial position of the ball + + :param position: The ball's initial position + :type position: list of float + """ + self.ball.position = position + + def set_start_velocity(self, velocity): + """ Set the initial ball velocity + + :param velocity: The ball's initial velocity + :type velocity: list of float + + """ + self.ball.xdot = velocity[0] + self.ball.ydot = velocity[1] + def get_state(self): - """ Access the current 4-dimensional state vector + """ Access the current 4-dimensional state vector - :returns: a list containing the x position, y position, xdot, ydot - :rtype: list + :returns: a list containing the x position, y position, xdot, ydot + :rtype: list - """ + """ return [self.ball.position[0], self.ball.position[1], self.ball.xdot, self.ball.ydot] def take_action(self, action): """ Take a step in the environment - :param action: The action to apply over the ball + :param action: The action to apply over the ball :type action: int - """ + """ for i in xrange(20): - if i == 0: + if i == 0 and action != self.ACC_NONE: self.ball.add_impulse(*self.action_effects[action]) + self.ball.add_noise() self.ball.step() @@ -354,16 +382,16 @@ def take_action(self, action): return self.THRUST_PENALTY def episode_ended(self): - """ Find out if the ball reached the target + """ Find out if the ball reached the target :returns: True if the ball reched the target position - :rtype: bool + :rtype: bool - """ + """ return np.linalg.norm(np.array(self.ball.position)-np.array(self.target_pos)) < self.target_rad def _check_bounds(self): - """ Make sure that the ball stays within the environment """ + """ Make sure that the ball stays within the environment """ if self.ball.position[0] > 1.0: self.ball.position[0] = 0.95 if self.ball.position[0] < 0.0: @@ -378,25 +406,27 @@ class PinballRLGlue(Environment): """This class is an RL-Glue adapter for :class:`pinball.PinballModel` """ name = "Pinball" + domain_name = 'pinball for reinforcement learning' - def __init__(self, configuration=os.path.join(os.path.dirname(__file__), - 'configs', 'pinball', 'pinball_simple_single.cfg')): + def __init__(self, configuration): """ This class exposes a Pinball environment over RL-Glue - :param configuration: a configuration file for this environment - :type configuration: str + :param configuration: a configuration file for this environment + :type configuration: str - """ - self.pinball = None + """ + self.pinball = None + self.initial_state = None + self.target_location = None self.configuration = configuration def make_taskspec(self): - """ Create a task specification string for this environment + """ Create a task specification string for this environment - :returns: a task specfication string - :rtype: str + :returns: a task specfication string + :rtype: str - """ + """ ts = TaskSpecRLGlue.TaskSpec(discount_factor=1.0, reward_range=(-5, 10000)) ts.addDiscreteAction((0, 4)) @@ -407,39 +437,45 @@ def make_taskspec(self): ts.addContinuousObservation((0.0, 1)) ts.setEpisodic() - ts.setExtra(self.name) + ts.setExtra(self.domain_name) return ts.toTaskSpec() def env_init(self): """ Declare the parameters for this environment - :returns: A string describing the environment - :rtype: str + :returns: A string describing the environment + :rtype: str - """ + """ return self.make_taskspec() def env_start(self): """ Instantiate a new :class:`PinballModel` environment :returns: The initial state - :rtype: :class:`Observation` + :rtype: :class:`Observation` - """ - self.pinball = PinballModel(self.configuration) - obs = Observation() + """ + self.pinball = PinballModel(self.configuration) + if self.initial_state: + self.pinball.set_start_position(self.initial_state[:2]) + self.pinball.set_start_velocity(self.initial_state[2:]) + if self.target_location: + self.pinball.target_pos = self.target_location + + obs = Observation() obs.doubleArray = self.pinball.get_state() - return obs + return obs def env_step(self, action): - """ Take a step in the environment + """ Take a step in the environment - :param action: The action that the agent wants to take - :returns: The next state, reward and whether the current state is terminal - :rtype: :class:`Reward_observation_terminal` + :param action: The action that the agent wants to take + :returns: The next state, reward and whether the current state is terminal + :rtype: :class:`Reward_observation_terminal` - """ + """ returnRO = Reward_observation_terminal() returnRO.r = self.pinball.take_action(action.intArray[0]) @@ -449,28 +485,35 @@ def env_step(self, action): returnRO.o = obs returnRO.terminal = self.pinball.episode_ended() + return returnRO def env_cleanup(self): - """ Do nothing. Called once the episode has terminated """ + """ Do nothing. Called once the episode has terminated """ pass - def env_message(message): - """ Handle a custom message sent over RL-Glue + def env_message(self, message): + """ Handle a custom message sent over RL-Glue - :param message: A message containing the action to execute - :returns: The current configuration filename if the message - is of the form ``config file=`` or ``config file=``. In the - later case, the string following the ``=`` symbol is the - path to a new configuration file. - :rtype: str + :param message: A message containing the action to execute + :returns: The current configuration filename if the message + is of the form ``config file=`` or ``config file=``. In the + later case, the string following the ``=`` symbol is the + path to a new configuration file. + :rtype: str - """ - if message == 'config file?': - return self.configuration + """ + if message == 'config file?': + return self.configuration if message.startswith('config file='): - self.configuration = message.split('=')[1] - return self.configuration + self.configuration = message.split('=')[1] + return self.configuration + if message.startswith('set-start-state'): + self.initial_state = map(float, message.split()[1:]) + return self.configuration + if message.startswith('set-goal-location'): + self.target_location = map(float, message.split()[1:]) + return self.configuration return "I don't know how to respond to your message" @@ -484,12 +527,12 @@ class PinballView: """ def __init__(self, screen, model): - """ - :param screen: a pygame surface - :type screen: :class:`pygame.Surface` - :param model: an instance of a :class:`PinballModel` - :type model: :class:`PinballModel` - """ + """ + :param screen: a pygame surface + :type screen: :class:`pygame.Surface` + :param model: an instance of a :class:`PinballModel` + :type model: :class:`PinballModel` + """ self.screen = screen self.model = model @@ -508,22 +551,39 @@ def __init__(self, screen, model): self.background_surface, self.TARGET_COLOR, self._to_pixels(self.model.target_pos), int(self.model.target_rad*self.screen.get_width())) def _to_pixels(self, pt): - """ Converts from real units in the 0-1 range to pixel units + """ Converts from real units in the 0-1 range to pixel units - :param pt: a point in real units - :type pt: list - :returns: the input point in pixel units - :rtype: list + :param pt: a point in real units + :type pt: list + :returns: the input point in pixel units + :rtype: list - """ + """ return [int(pt[0] * self.screen.get_width()), int(pt[1] * self.screen.get_height())] def blit(self): - """ Blit the ball onto the background surface """ + """ Blit the ball onto the background surface """ self.screen.blit(self.background_surface, (0, 0)) pygame.draw.circle(self.screen, self.BALL_COLOR, self._to_pixels(self.model.ball.position), int(self.model.ball.radius*self.screen.get_width())) +def run_trajectoryview(width, height, configuration, trajectory_file): + pygame.init() + pygame.display.set_caption('Pinball Domain') + screen = pygame.display.set_mode([width, height]) + + environment = PinballModel(configuration) + environment_view = PinballView(screen, environment) + + observations = np.loadtxt(trajectory_file) + + for obs in observations: + pygame.time.wait(50) + environment_view.model.ball.position = obs[:2] + environment_view.blit() + pygame.display.flip() + + pygame.quit() def run_pinballview(width, height, configuration): """ Controller function for a :class:`PinballView` @@ -558,8 +618,8 @@ def run_pinballview(width, height, configuration): if event.type == pygame.KEYUP or event.type == pygame.KEYDOWN: user_action = actions.get(event.key, PinballModel.ACC_NONE) - if environment.take_action(user_action) == environment.END_EPISODE: - done = True + if environment.take_action(user_action) == environment.END_EPISODE: + done = True environment_view.blit() @@ -574,12 +634,14 @@ def run_pinballview(width, height, configuration): default=500, help='screen width (default: 500)') parser.add_argument('--height', action='store', type=int, default=500, help='screen height (default: 500)') - parser.add_argument('-r', '--rlglue', action='store_true', help='expose the environment through RL-Glue') + parser.add_argument('--rlglue', action='store_true', help='expose the environment through RL-Glue') + parser.add_argument('--trajectory', help='replay a trajectory') args = parser.parse_args() if args.rlglue: - print 'Starting rl-glue' - EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration)) + print 'Starting rl-glue' + EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration)) + elif args.trajectory: + run_trajectoryview(args.width, args.height, args.configuration, args.trajectory) else: run_pinballview(args.width, args.height, args.configuration) - From e25f7fb02d0bf1de6b2b0e9b0f6f2adb6c00e725 Mon Sep 17 00:00:00 2001 From: Pierre-Luc Bacon Date: Thu, 20 Feb 2014 13:25:06 -0500 Subject: [PATCH 2/2] Add example experiment for the Pinball domain This example shows how to use the special TrajectoryRecorder decorator for saving trajectories. Furthermore, it uses IPython parallel to compute the performance of many agents in parallel. It can be used over a guerilla SSH cluster over the computers in your lab, or more cleanly on an MPI, PBS or AWS cluster. For local parallel computation, execute in a separate tab: $ ipcluster start -n 4 followed by $ python learn-flat-policy.py -a 100 -n 100 pinball_simple_single.cfg --- pyrl/experiments/learn-flat-policy.py | 72 ++++++++++++++++++++++ pyrl/experiments/pinball_simple_single.cfg | 15 +++++ pyrl/misc/benchmark.py | 53 ++++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 pyrl/experiments/learn-flat-policy.py create mode 100644 pyrl/experiments/pinball_simple_single.cfg create mode 100644 pyrl/misc/benchmark.py diff --git a/pyrl/experiments/learn-flat-policy.py b/pyrl/experiments/learn-flat-policy.py new file mode 100644 index 0000000..c7ca7da --- /dev/null +++ b/pyrl/experiments/learn-flat-policy.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +import os +import cPickle +import argparse + +from IPython.parallel import Client + +def learn_policy(agentid): + from pyrl.agents.sarsa_lambda import sarsa_lambda + from pyrl.rlglue import RLGlueLocal as RLGlueLocal + from pyrl.environments.pinball import PinballRLGlue + from pyrl.misc.benchmark import TrajectoryRecorder + import cPickle + import csv + import os + + prefix = 'flat-policy-%d-agent%d'%(os.getpid(),agentid) + + # Create agent and environments + agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=1.0, lmbda=0.9, + params={'name':'fourier', 'order':4}) + + environment = TrajectoryRecorder(PinballRLGlue(environment_name), prefix + '-trajectory') + + score_file = csv.writer(open(prefix + '-scores.csv', 'wb')) + + # Connect to RL-Glue + rlglue = RLGlueLocal.LocalGlue(environment, agent) + rlglue.RL_init() + + # Execute episodes + scores = [] + for i in xrange(nepisodes): + print 'Episode ', i + terminated = rlglue.RL_episode(max_steps) + total_steps = rlglue.RL_num_steps() + total_reward = rlglue.RL_return() + + print '\t %d steps, %d reward, %d terminated'%(total_steps, total_reward, terminated) + score = [i, total_steps, total_reward, terminated] + scores.append(score) + score_file.writerow(score) + + rlglue.RL_cleanup() + + cPickle.dump(agent, open(prefix + '.pl', 'wb')) + + return scores + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Learn the behavior policy over options') + parser.add_argument('environment', help='environment configuration') + parser.add_argument('-n', '--number-episodes', dest='nepisodes', type=int, + default=100, help='the number of episodes to execute for\ + learning the policy over options (default: 100)') + parser.add_argument('-s', '--max-steps', dest='max_steps', type=int, + default=10000, help='the maximum number of steps that the\ + agent is allowed to take in the environment') + parser.add_argument('-a', '--number-agents', type=int, default=100, help='number of agents to average over') + parser.add_argument('-p', '--prefix', action='store', type=str, + dest='prefix', help="output prefix (default: dataset)") + args = parser.parse_args() + + if not args.prefix: + args.prefix = os.path.splitext(os.path.basename(args.environment))[0] + + rc = Client() + dview = rc[:] + dview.block = True + dview.push(dict({'environment_name':args.environment, 'nepisodes':args.nepisodes, 'max_steps': args.max_steps})) + dview.map(learn_policy, range(args.number_agents)) + diff --git a/pyrl/experiments/pinball_simple_single.cfg b/pyrl/experiments/pinball_simple_single.cfg new file mode 100644 index 0000000..5c34b7d --- /dev/null +++ b/pyrl/experiments/pinball_simple_single.cfg @@ -0,0 +1,15 @@ +ball 0.02 +target 0.9 0.2 0.04 +start 0.2 0.9 + +polygon 0.0 0.0 0.0 0.01 1.0 0.01 1.0 0.0 +polygon 0.0 0.0 0.01 0.0 0.01 1.0 0.0 1.0 +polygon 0.0 1.0 0.0 0.99 1.0 0.99 1.0 1.0 +polygon 1.0 1.0 0.99 1.0 0.99 0.0 1.0 0.0 + +polygon 0.35 0.4 0.45 0.55 0.43 0.65 0.3 0.7 0.45 0.7 0.5 0.6 0.45 0.35 +polygon 0.2 0.6 0.25 0.55 0.15 0.5 0.15 0.45 0.2 0.3 0.12 0.27 0.075 0.35 0.09 0.55 +polygon 0.3 0.8 0.6 0.75 0.8 0.8 0.8 0.9 0.6 0.85 0.3 0.9 +polygon 0.8 0.7 0.975 0.65 0.75 0.5 0.9 0.3 0.7 0.35 0.63 0.65 +polygon 0.6 0.25 0.3 0.07 0.15 0.175 0.15 0.2 0.3 0.175 0.6 0.3 +polygon 0.75 0.025 0.8 0.24 0.725 0.27 0.7 0.025 diff --git a/pyrl/misc/benchmark.py b/pyrl/misc/benchmark.py new file mode 100644 index 0000000..49fb8aa --- /dev/null +++ b/pyrl/misc/benchmark.py @@ -0,0 +1,53 @@ +from rlglue.environment.Environment import Environment +from itertools import product +from functools import partial, reduce +import operator + +class TrajectoryRecorder(Environment): + """ Records trajectories taken in the environment """ + + def __init__(self, decorated, filename): + """ This class provides a decorator wrapper to seamlessly record + a trajectory taken in the environment. + + :param environment: an rlglue environment + :type environment: Environment + + """ + self.decorated = decorated + self.filename = filename + self.trajectory_count = 0 + self.trajectory_file = None + + def env_init(self): + return self.decorated.env_init() + + def env_start(self): + if self.trajectory_file and not self.trajectory_file.closed: + self.trajectory_file.close() + self.trajectory_count += 1 + + obs = self.decorated.env_start() + self.trajectory_file = open('%s-%d.dat'%(self.filename, self.trajectory_count), 'wb') + self.trajectory_file.write(' '.join(map(str, obs.doubleArray)) + '\n') + self.trajectory_file.flush() + + return obs + + def env_step(self, action): + returnRO = self.decorated.env_step(action) + self.trajectory_file.write(' '.join(map(str, returnRO.o.doubleArray)) + '\n') + self.trajectory_file.flush() + + if returnRO.terminal: + self.trajectory_count += 1 + self.trajectory_file.close() + + return returnRO + + def env_cleanup(self): + self.decorated.env_cleanup() + + def env_message(self, message): + return self.decorated.env_message(message) +