diff --git a/pyrl/environments/pinball.py b/pyrl/environments/pinball.py index 483405d..cb61537 100644 --- a/pyrl/environments/pinball.py +++ b/pyrl/environments/pinball.py @@ -8,8 +8,9 @@ """ +import sys import random -import argparse, os +import argparse import numpy as np from itertools import * @@ -34,25 +35,26 @@ class BallModel: """ DRAG = 0.995 - def __init__(self, start_position, radius): - """ - :param start_position: The initial position - :type start_position: float - :param radius: The ball radius - :type radius: float - """ + def __init__(self, start_position, radius, noise=0.03): + """ + :param start_position: The initial position + :type start_position: float + :param radius: The ball radius + :type radius: float + """ self.position = start_position self.radius = radius + self.noise = noise self.xdot = 0.0 self.ydot = 0.0 def add_impulse(self, delta_xdot, delta_ydot): - """ Change the momentum of the ball + """ Change the momentum of the ball :param delta_xdot: The change in velocity in the x direction - :type delta_xdot: float - :param delta_ydot: The change in velocity in the y direction - :type delta_ydot: float - """ + :type delta_xdot: float + :param delta_ydot: The change in velocity in the y direction + :type delta_ydot: float + """ self.xdot += delta_xdot/5.0 self.ydot += delta_ydot/5.0 self._clip(self.xdot) @@ -63,13 +65,20 @@ def add_drag(self): self.xdot *= self.DRAG self.ydot *= self.DRAG + def add_noise(self): + """ Corrupt the velocity with some Gaussian noise """ + self.xdot += np.random.normal(0.0, self.noise) + self.ydot += np.random.normal(0.0, self.noise) + self._clip(self.xdot) + self._clip(self.ydot) + def step(self): - """ Move the ball by one increment """ + """ Move the ball by one increment """ self.position[0] += self.xdot*self.radius/20.0 self.position[1] += self.ydot*self.radius/20.0 def _clip(self, val, low=-1, high=1): - """ Clip a value in a given range """ + """ Clip a value in a given range """ if val > high: val = high if val < low: @@ -84,10 +93,10 @@ class PinballObstacle: compute the appropriate effect to apply on the ball. """ def __init__(self, points): - """ - :param points: A list of points defining the polygon - :type points: list of lists - """ + """ + :param points: A list of points defining the polygon + :type points: list of lists + """ self.points = points self.min_x = min(self.points, key=lambda pt: pt[0])[0] self.max_x = max(self.points, key=lambda pt: pt[0])[0] @@ -100,8 +109,8 @@ def __init__(self, points): def collision(self, ball): """ Determines if the ball hits this obstacle - :param ball: An instance of :class:`BallModel` - :type ball: :class:`BallModel` + :param ball: An instance of :class:`BallModel` + :type ball: :class:`BallModel` """ self._double_collision = False @@ -131,11 +140,11 @@ def collision(self, ball): def collision_effect(self, ball): """ Based of the collision detection result triggered - in :func:`PinballObstacle.collision`, compute the + in :func:`PinballObstacle.collision`, compute the change in velocity. - :param ball: An instance of :class:`BallModel` - :type ball: :class:`BallModel` + :param ball: An instance of :class:`BallModel` + :type ball: :class:`BallModel` """ if self._double_collision: @@ -164,12 +173,12 @@ def collision_effect(self, ball): def _select_edge(self, intersect1, intersect2, ball): """ If the ball hits a corner, select one of two edges. - :param intersect1: A pair of points defining an edge of the polygon - :type intersect1: list of lists - :param intersect2: A pair of points defining an edge of the polygon - :type intersect2: list of lists - :returns: The edge with the smallest angle with the velocity vector - :rtype: list of lists + :param intersect1: A pair of points defining an edge of the polygon + :type intersect1: list of lists + :param intersect2: A pair of points defining an edge of the polygon + :type intersect2: list of lists + :returns: The edge with the smallest angle with the velocity vector + :rtype: list of lists """ velocity = np.array([ball.xdot, ball.ydot]) @@ -189,15 +198,15 @@ def _select_edge(self, intersect1, intersect2, ball): return intersect2 def _angle(self, v1, v2): - """ Compute the angle difference between two vectors + """ Compute the angle difference between two vectors - :param v1: The x,y coordinates of the vector - :type: v1: list - :param v2: The x,y coordinates of the vector - :type: v2: list - :rtype: float + :param v1: The x,y coordinates of the vector + :type: v1: list + :param v2: The x,y coordinates of the vector + :type: v2: list + :rtype: float - """ + """ angle_diff = np.arctan2(v1[0], v1[1]) - np.arctan2(v2[0], v2[1]) if angle_diff < 0: angle_diff += 2*np.pi @@ -206,13 +215,13 @@ def _angle(self, v1, v2): def _intercept_edge(self, pt_pair, ball): """ Compute the projection on and edge and find out - if it intercept with the ball. - :param pt_pair: The pair of points defining an edge - :type pt_pair: list of lists - :param ball: An instance of :class:`BallModel` - :type ball: :class:`BallModel` - :returns: True if the ball has hit an edge of the polygon - :rtype: bool + if it intercept with the ball. + :param pt_pair: The pair of points defining an edge + :type pt_pair: list of lists + :param ball: An instance of :class:`BallModel` + :type ball: :class:`BallModel` + :returns: True if the ball has hit an edge of the polygon + :rtype: bool """ # Find the projection on an edge @@ -268,9 +277,9 @@ class PinballModel: def __init__(self, configuration): """ Read a configuration file for Pinball and draw the domain to screen - :param configuration: a configuration file containing the polygons, + :param configuration: a configuration file containing the polygons, source(s) and target location. - :type configuration: str + :type configuration: str """ self.action_effects = {self.ACC_X:(1, 0), self.ACC_Y:(0, 1), self.DEC_X:(-1, 0), self.DEC_Y:(0, -1), self.ACC_NONE:(0, 0)} @@ -296,31 +305,50 @@ def __init__(self, configuration): self.target_pos = [float(tokens[1]), float(tokens[2])] self.target_rad = float(tokens[3]) elif tokens[0] == 'start': - start_pos = zip(*[iter(map(float, tokens[1:]))]*2) + start_pos = zip(*[iter(map(float, tokens[1:]))]*2) elif tokens[0] == 'ball': ball_rad = float(tokens[1]) self.ball = BallModel(list(random.choice(start_pos)), ball_rad) + def set_start_position(self, position): + """ Set the initial position of the ball + + :param position: The ball's initial position + :type position: list of float + """ + self.ball.position = position + + def set_start_velocity(self, velocity): + """ Set the initial ball velocity + + :param velocity: The ball's initial velocity + :type velocity: list of float + + """ + self.ball.xdot = velocity[0] + self.ball.ydot = velocity[1] + def get_state(self): - """ Access the current 4-dimensional state vector + """ Access the current 4-dimensional state vector - :returns: a list containing the x position, y position, xdot, ydot - :rtype: list + :returns: a list containing the x position, y position, xdot, ydot + :rtype: list - """ + """ return [self.ball.position[0], self.ball.position[1], self.ball.xdot, self.ball.ydot] def take_action(self, action): """ Take a step in the environment - :param action: The action to apply over the ball + :param action: The action to apply over the ball :type action: int - """ + """ for i in xrange(20): - if i == 0: + if i == 0 and action != self.ACC_NONE: self.ball.add_impulse(*self.action_effects[action]) + self.ball.add_noise() self.ball.step() @@ -354,16 +382,16 @@ def take_action(self, action): return self.THRUST_PENALTY def episode_ended(self): - """ Find out if the ball reached the target + """ Find out if the ball reached the target :returns: True if the ball reched the target position - :rtype: bool + :rtype: bool - """ + """ return np.linalg.norm(np.array(self.ball.position)-np.array(self.target_pos)) < self.target_rad def _check_bounds(self): - """ Make sure that the ball stays within the environment """ + """ Make sure that the ball stays within the environment """ if self.ball.position[0] > 1.0: self.ball.position[0] = 0.95 if self.ball.position[0] < 0.0: @@ -378,25 +406,27 @@ class PinballRLGlue(Environment): """This class is an RL-Glue adapter for :class:`pinball.PinballModel` """ name = "Pinball" + domain_name = 'pinball for reinforcement learning' - def __init__(self, configuration=os.path.join(os.path.dirname(__file__), - 'configs', 'pinball', 'pinball_simple_single.cfg')): + def __init__(self, configuration): """ This class exposes a Pinball environment over RL-Glue - :param configuration: a configuration file for this environment - :type configuration: str + :param configuration: a configuration file for this environment + :type configuration: str - """ - self.pinball = None + """ + self.pinball = None + self.initial_state = None + self.target_location = None self.configuration = configuration def make_taskspec(self): - """ Create a task specification string for this environment + """ Create a task specification string for this environment - :returns: a task specfication string - :rtype: str + :returns: a task specfication string + :rtype: str - """ + """ ts = TaskSpecRLGlue.TaskSpec(discount_factor=1.0, reward_range=(-5, 10000)) ts.addDiscreteAction((0, 4)) @@ -407,39 +437,45 @@ def make_taskspec(self): ts.addContinuousObservation((0.0, 1)) ts.setEpisodic() - ts.setExtra(self.name) + ts.setExtra(self.domain_name) return ts.toTaskSpec() def env_init(self): """ Declare the parameters for this environment - :returns: A string describing the environment - :rtype: str + :returns: A string describing the environment + :rtype: str - """ + """ return self.make_taskspec() def env_start(self): """ Instantiate a new :class:`PinballModel` environment :returns: The initial state - :rtype: :class:`Observation` + :rtype: :class:`Observation` - """ - self.pinball = PinballModel(self.configuration) - obs = Observation() + """ + self.pinball = PinballModel(self.configuration) + if self.initial_state: + self.pinball.set_start_position(self.initial_state[:2]) + self.pinball.set_start_velocity(self.initial_state[2:]) + if self.target_location: + self.pinball.target_pos = self.target_location + + obs = Observation() obs.doubleArray = self.pinball.get_state() - return obs + return obs def env_step(self, action): - """ Take a step in the environment + """ Take a step in the environment - :param action: The action that the agent wants to take - :returns: The next state, reward and whether the current state is terminal - :rtype: :class:`Reward_observation_terminal` + :param action: The action that the agent wants to take + :returns: The next state, reward and whether the current state is terminal + :rtype: :class:`Reward_observation_terminal` - """ + """ returnRO = Reward_observation_terminal() returnRO.r = self.pinball.take_action(action.intArray[0]) @@ -449,28 +485,35 @@ def env_step(self, action): returnRO.o = obs returnRO.terminal = self.pinball.episode_ended() + return returnRO def env_cleanup(self): - """ Do nothing. Called once the episode has terminated """ + """ Do nothing. Called once the episode has terminated """ pass - def env_message(message): - """ Handle a custom message sent over RL-Glue + def env_message(self, message): + """ Handle a custom message sent over RL-Glue - :param message: A message containing the action to execute - :returns: The current configuration filename if the message - is of the form ``config file=`` or ``config file=``. In the - later case, the string following the ``=`` symbol is the - path to a new configuration file. - :rtype: str + :param message: A message containing the action to execute + :returns: The current configuration filename if the message + is of the form ``config file=`` or ``config file=``. In the + later case, the string following the ``=`` symbol is the + path to a new configuration file. + :rtype: str - """ - if message == 'config file?': - return self.configuration + """ + if message == 'config file?': + return self.configuration if message.startswith('config file='): - self.configuration = message.split('=')[1] - return self.configuration + self.configuration = message.split('=')[1] + return self.configuration + if message.startswith('set-start-state'): + self.initial_state = map(float, message.split()[1:]) + return self.configuration + if message.startswith('set-goal-location'): + self.target_location = map(float, message.split()[1:]) + return self.configuration return "I don't know how to respond to your message" @@ -484,12 +527,12 @@ class PinballView: """ def __init__(self, screen, model): - """ - :param screen: a pygame surface - :type screen: :class:`pygame.Surface` - :param model: an instance of a :class:`PinballModel` - :type model: :class:`PinballModel` - """ + """ + :param screen: a pygame surface + :type screen: :class:`pygame.Surface` + :param model: an instance of a :class:`PinballModel` + :type model: :class:`PinballModel` + """ self.screen = screen self.model = model @@ -508,22 +551,39 @@ def __init__(self, screen, model): self.background_surface, self.TARGET_COLOR, self._to_pixels(self.model.target_pos), int(self.model.target_rad*self.screen.get_width())) def _to_pixels(self, pt): - """ Converts from real units in the 0-1 range to pixel units + """ Converts from real units in the 0-1 range to pixel units - :param pt: a point in real units - :type pt: list - :returns: the input point in pixel units - :rtype: list + :param pt: a point in real units + :type pt: list + :returns: the input point in pixel units + :rtype: list - """ + """ return [int(pt[0] * self.screen.get_width()), int(pt[1] * self.screen.get_height())] def blit(self): - """ Blit the ball onto the background surface """ + """ Blit the ball onto the background surface """ self.screen.blit(self.background_surface, (0, 0)) pygame.draw.circle(self.screen, self.BALL_COLOR, self._to_pixels(self.model.ball.position), int(self.model.ball.radius*self.screen.get_width())) +def run_trajectoryview(width, height, configuration, trajectory_file): + pygame.init() + pygame.display.set_caption('Pinball Domain') + screen = pygame.display.set_mode([width, height]) + + environment = PinballModel(configuration) + environment_view = PinballView(screen, environment) + + observations = np.loadtxt(trajectory_file) + + for obs in observations: + pygame.time.wait(50) + environment_view.model.ball.position = obs[:2] + environment_view.blit() + pygame.display.flip() + + pygame.quit() def run_pinballview(width, height, configuration): """ Controller function for a :class:`PinballView` @@ -558,8 +618,8 @@ def run_pinballview(width, height, configuration): if event.type == pygame.KEYUP or event.type == pygame.KEYDOWN: user_action = actions.get(event.key, PinballModel.ACC_NONE) - if environment.take_action(user_action) == environment.END_EPISODE: - done = True + if environment.take_action(user_action) == environment.END_EPISODE: + done = True environment_view.blit() @@ -574,12 +634,14 @@ def run_pinballview(width, height, configuration): default=500, help='screen width (default: 500)') parser.add_argument('--height', action='store', type=int, default=500, help='screen height (default: 500)') - parser.add_argument('-r', '--rlglue', action='store_true', help='expose the environment through RL-Glue') + parser.add_argument('--rlglue', action='store_true', help='expose the environment through RL-Glue') + parser.add_argument('--trajectory', help='replay a trajectory') args = parser.parse_args() if args.rlglue: - print 'Starting rl-glue' - EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration)) + print 'Starting rl-glue' + EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration)) + elif args.trajectory: + run_trajectoryview(args.width, args.height, args.configuration, args.trajectory) else: run_pinballview(args.width, args.height, args.configuration) - diff --git a/pyrl/experiments/learn-flat-policy.py b/pyrl/experiments/learn-flat-policy.py new file mode 100644 index 0000000..c7ca7da --- /dev/null +++ b/pyrl/experiments/learn-flat-policy.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +import os +import cPickle +import argparse + +from IPython.parallel import Client + +def learn_policy(agentid): + from pyrl.agents.sarsa_lambda import sarsa_lambda + from pyrl.rlglue import RLGlueLocal as RLGlueLocal + from pyrl.environments.pinball import PinballRLGlue + from pyrl.misc.benchmark import TrajectoryRecorder + import cPickle + import csv + import os + + prefix = 'flat-policy-%d-agent%d'%(os.getpid(),agentid) + + # Create agent and environments + agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=1.0, lmbda=0.9, + params={'name':'fourier', 'order':4}) + + environment = TrajectoryRecorder(PinballRLGlue(environment_name), prefix + '-trajectory') + + score_file = csv.writer(open(prefix + '-scores.csv', 'wb')) + + # Connect to RL-Glue + rlglue = RLGlueLocal.LocalGlue(environment, agent) + rlglue.RL_init() + + # Execute episodes + scores = [] + for i in xrange(nepisodes): + print 'Episode ', i + terminated = rlglue.RL_episode(max_steps) + total_steps = rlglue.RL_num_steps() + total_reward = rlglue.RL_return() + + print '\t %d steps, %d reward, %d terminated'%(total_steps, total_reward, terminated) + score = [i, total_steps, total_reward, terminated] + scores.append(score) + score_file.writerow(score) + + rlglue.RL_cleanup() + + cPickle.dump(agent, open(prefix + '.pl', 'wb')) + + return scores + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Learn the behavior policy over options') + parser.add_argument('environment', help='environment configuration') + parser.add_argument('-n', '--number-episodes', dest='nepisodes', type=int, + default=100, help='the number of episodes to execute for\ + learning the policy over options (default: 100)') + parser.add_argument('-s', '--max-steps', dest='max_steps', type=int, + default=10000, help='the maximum number of steps that the\ + agent is allowed to take in the environment') + parser.add_argument('-a', '--number-agents', type=int, default=100, help='number of agents to average over') + parser.add_argument('-p', '--prefix', action='store', type=str, + dest='prefix', help="output prefix (default: dataset)") + args = parser.parse_args() + + if not args.prefix: + args.prefix = os.path.splitext(os.path.basename(args.environment))[0] + + rc = Client() + dview = rc[:] + dview.block = True + dview.push(dict({'environment_name':args.environment, 'nepisodes':args.nepisodes, 'max_steps': args.max_steps})) + dview.map(learn_policy, range(args.number_agents)) + diff --git a/pyrl/experiments/pinball_simple_single.cfg b/pyrl/experiments/pinball_simple_single.cfg new file mode 100644 index 0000000..5c34b7d --- /dev/null +++ b/pyrl/experiments/pinball_simple_single.cfg @@ -0,0 +1,15 @@ +ball 0.02 +target 0.9 0.2 0.04 +start 0.2 0.9 + +polygon 0.0 0.0 0.0 0.01 1.0 0.01 1.0 0.0 +polygon 0.0 0.0 0.01 0.0 0.01 1.0 0.0 1.0 +polygon 0.0 1.0 0.0 0.99 1.0 0.99 1.0 1.0 +polygon 1.0 1.0 0.99 1.0 0.99 0.0 1.0 0.0 + +polygon 0.35 0.4 0.45 0.55 0.43 0.65 0.3 0.7 0.45 0.7 0.5 0.6 0.45 0.35 +polygon 0.2 0.6 0.25 0.55 0.15 0.5 0.15 0.45 0.2 0.3 0.12 0.27 0.075 0.35 0.09 0.55 +polygon 0.3 0.8 0.6 0.75 0.8 0.8 0.8 0.9 0.6 0.85 0.3 0.9 +polygon 0.8 0.7 0.975 0.65 0.75 0.5 0.9 0.3 0.7 0.35 0.63 0.65 +polygon 0.6 0.25 0.3 0.07 0.15 0.175 0.15 0.2 0.3 0.175 0.6 0.3 +polygon 0.75 0.025 0.8 0.24 0.725 0.27 0.7 0.025 diff --git a/pyrl/misc/benchmark.py b/pyrl/misc/benchmark.py new file mode 100644 index 0000000..49fb8aa --- /dev/null +++ b/pyrl/misc/benchmark.py @@ -0,0 +1,53 @@ +from rlglue.environment.Environment import Environment +from itertools import product +from functools import partial, reduce +import operator + +class TrajectoryRecorder(Environment): + """ Records trajectories taken in the environment """ + + def __init__(self, decorated, filename): + """ This class provides a decorator wrapper to seamlessly record + a trajectory taken in the environment. + + :param environment: an rlglue environment + :type environment: Environment + + """ + self.decorated = decorated + self.filename = filename + self.trajectory_count = 0 + self.trajectory_file = None + + def env_init(self): + return self.decorated.env_init() + + def env_start(self): + if self.trajectory_file and not self.trajectory_file.closed: + self.trajectory_file.close() + self.trajectory_count += 1 + + obs = self.decorated.env_start() + self.trajectory_file = open('%s-%d.dat'%(self.filename, self.trajectory_count), 'wb') + self.trajectory_file.write(' '.join(map(str, obs.doubleArray)) + '\n') + self.trajectory_file.flush() + + return obs + + def env_step(self, action): + returnRO = self.decorated.env_step(action) + self.trajectory_file.write(' '.join(map(str, returnRO.o.doubleArray)) + '\n') + self.trajectory_file.flush() + + if returnRO.terminal: + self.trajectory_count += 1 + self.trajectory_file.close() + + return returnRO + + def env_cleanup(self): + self.decorated.env_cleanup() + + def env_message(self, message): + return self.decorated.env_message(message) +