diff --git a/pyrl/environments/pinball.py b/pyrl/environments/pinball.py
index 483405d..cb61537 100644
--- a/pyrl/environments/pinball.py
+++ b/pyrl/environments/pinball.py
@@ -8,8 +8,9 @@
 
 """
 
+import sys
 import random
-import argparse, os
+import argparse
 import numpy as np
 from itertools import *
 
@@ -34,25 +35,26 @@ class BallModel:
     """
     DRAG = 0.995
 
-    def __init__(self, start_position, radius):
-	"""
-	:param start_position: The initial position
-	:type start_position: float
-	:param radius: The ball radius
-	:type radius: float
-	"""
+    def __init__(self, start_position, radius, noise=0.03):
+        """
+        :param start_position: The initial position
+        :type start_position: float
+        :param radius: The ball radius
+        :type radius: float
+        """
         self.position = start_position
         self.radius = radius
+        self.noise = noise
         self.xdot = 0.0
         self.ydot = 0.0
 
     def add_impulse(self, delta_xdot, delta_ydot):
-	""" Change the momentum of the ball
+        """ Change the momentum of the ball
         :param delta_xdot: The change in velocity in the x direction
-	:type delta_xdot: float
-	:param delta_ydot: The change in velocity in the y direction
-	:type delta_ydot: float
-	"""
+        :type delta_xdot: float
+        :param delta_ydot: The change in velocity in the y direction
+        :type delta_ydot: float
+        """
         self.xdot += delta_xdot/5.0
         self.ydot += delta_ydot/5.0
         self._clip(self.xdot)
@@ -63,13 +65,20 @@ def add_drag(self):
         self.xdot *= self.DRAG
         self.ydot *= self.DRAG
 
+    def add_noise(self):
+        """ Corrupt the velocity with some Gaussian noise """
+        self.xdot += np.random.normal(0.0, self.noise)
+        self.ydot += np.random.normal(0.0, self.noise)
+        self._clip(self.xdot)
+        self._clip(self.ydot)
+
     def step(self):
-	""" Move the ball by one increment """
+        """ Move the ball by one increment """
         self.position[0] += self.xdot*self.radius/20.0
         self.position[1] += self.ydot*self.radius/20.0
 
     def _clip(self, val, low=-1, high=1):
-	""" Clip a value in a given range """
+        """ Clip a value in a given range """
         if val > high:
             val = high
         if val < low:
@@ -84,10 +93,10 @@ class PinballObstacle:
     compute the appropriate effect to apply on the ball.
     """
     def __init__(self, points):
-	"""
-	:param points: A list of points defining the polygon
-	:type points: list of lists
-	"""
+        """
+        :param points: A list of points defining the polygon
+        :type points: list of lists
+        """
         self.points = points
         self.min_x = min(self.points, key=lambda pt: pt[0])[0]
         self.max_x = max(self.points, key=lambda pt: pt[0])[0]
@@ -100,8 +109,8 @@ def __init__(self, points):
     def collision(self, ball):
         """ Determines if the ball hits this obstacle
 
-	:param ball: An instance of :class:`BallModel`
-	:type ball: :class:`BallModel`
+        :param ball: An instance of :class:`BallModel`
+        :type ball: :class:`BallModel`
         """
         self._double_collision = False
 
@@ -131,11 +140,11 @@ def collision(self, ball):
 
     def collision_effect(self, ball):
         """ Based of the collision detection result triggered
-	in :func:`PinballObstacle.collision`, compute the
+        in :func:`PinballObstacle.collision`, compute the
         change in velocity.
 
-	:param ball: An instance of :class:`BallModel`
-	:type ball: :class:`BallModel`
+        :param ball: An instance of :class:`BallModel`
+        :type ball: :class:`BallModel`
 
         """
         if self._double_collision:
@@ -164,12 +173,12 @@ def collision_effect(self, ball):
     def _select_edge(self, intersect1, intersect2, ball):
         """ If the ball hits a corner, select one of two edges.
 
-	:param intersect1: A pair of points defining an edge of the polygon
-	:type intersect1: list of lists
-	:param intersect2: A pair of points defining an edge of the polygon
-	:type intersect2: list of lists
-	:returns: The edge with the smallest angle with the velocity vector
-	:rtype: list of lists
+        :param intersect1: A pair of points defining an edge of the polygon
+        :type intersect1: list of lists
+        :param intersect2: A pair of points defining an edge of the polygon
+        :type intersect2: list of lists
+        :returns: The edge with the smallest angle with the velocity vector
+        :rtype: list of lists
 
         """
         velocity = np.array([ball.xdot, ball.ydot])
@@ -189,15 +198,15 @@ def _select_edge(self, intersect1, intersect2, ball):
         return intersect2
 
     def _angle(self, v1, v2):
-	""" Compute the angle difference between two vectors
+        """ Compute the angle difference between two vectors
 
-	:param v1: The x,y coordinates of the vector
-	:type: v1: list
-	:param v2: The x,y coordinates of the vector
-	:type: v2: list
-	:rtype: float
+        :param v1: The x,y coordinates of the vector
+        :type: v1: list
+        :param v2: The x,y coordinates of the vector
+        :type: v2: list
+        :rtype: float
 
-	"""
+        """
         angle_diff = np.arctan2(v1[0], v1[1]) - np.arctan2(v2[0], v2[1])
         if angle_diff < 0:
             angle_diff += 2*np.pi
@@ -206,13 +215,13 @@ def _angle(self, v1, v2):
     def _intercept_edge(self, pt_pair, ball):
         """ Compute the projection on and edge and find out
 
-	if it intercept with the ball.
-	:param pt_pair: The pair of points defining an edge
-	:type pt_pair: list of lists
-	:param ball: An instance of :class:`BallModel`
-	:type ball: :class:`BallModel`
-	:returns: True if the ball has hit an edge of the polygon
-	:rtype: bool
+        if it intercept with the ball.
+        :param pt_pair: The pair of points defining an edge
+        :type pt_pair: list of lists
+        :param ball: An instance of :class:`BallModel`
+        :type ball: :class:`BallModel`
+        :returns: True if the ball has hit an edge of the polygon
+        :rtype: bool
 
         """
         # Find the projection on an edge
@@ -268,9 +277,9 @@ class PinballModel:
     def __init__(self, configuration):
         """ Read a configuration file for Pinball and draw the domain to screen
 
-	:param configuration: a configuration file containing the polygons,
+        :param configuration: a configuration file containing the polygons,
         source(s) and target location.
-	:type configuration: str
+        :type configuration: str
 
         """
         self.action_effects = {self.ACC_X:(1, 0), self.ACC_Y:(0, 1), self.DEC_X:(-1, 0), self.DEC_Y:(0, -1), self.ACC_NONE:(0, 0)}
@@ -296,31 +305,50 @@ def __init__(self, configuration):
                     self.target_pos = [float(tokens[1]), float(tokens[2])]
                     self.target_rad = float(tokens[3])
                 elif tokens[0] == 'start':
-		    start_pos = zip(*[iter(map(float, tokens[1:]))]*2)
+                    start_pos = zip(*[iter(map(float, tokens[1:]))]*2)
                 elif tokens[0] == 'ball':
                     ball_rad = float(tokens[1])
 
         self.ball = BallModel(list(random.choice(start_pos)), ball_rad)
 
+    def set_start_position(self, position):
+        """ Set the initial position of the ball
+
+        :param position: The ball's initial position
+        :type position: list of float
+        """
+        self.ball.position = position
+
+    def set_start_velocity(self, velocity):
+        """ Set the initial ball velocity
+
+        :param velocity: The ball's initial velocity
+        :type velocity: list of float
+
+        """
+        self.ball.xdot = velocity[0]
+        self.ball.ydot = velocity[1]
+
     def get_state(self):
-	""" Access the current 4-dimensional state vector
+        """ Access the current 4-dimensional state vector
 
-	:returns: a list containing the x position, y position, xdot, ydot
-	:rtype: list
+        :returns: a list containing the x position, y position, xdot, ydot
+        :rtype: list
 
-	"""
+        """
         return [self.ball.position[0], self.ball.position[1], self.ball.xdot, self.ball.ydot]
 
     def take_action(self, action):
         """ Take a step in the environment
 
-	:param action: The action to apply over the ball
+        :param action: The action to apply over the ball
         :type action: int
 
-	"""
+        """
         for i in xrange(20):
-            if i == 0:
+            if i == 0 and action != self.ACC_NONE:
                 self.ball.add_impulse(*self.action_effects[action])
+                self.ball.add_noise()
 
             self.ball.step()
 
@@ -354,16 +382,16 @@ def take_action(self, action):
         return self.THRUST_PENALTY
 
     def episode_ended(self):
-	""" Find out if the ball reached the target
+        """ Find out if the ball reached the target
 
         :returns: True if the ball reched the target position
-	:rtype: bool
+        :rtype: bool
 
-	"""
+        """
         return np.linalg.norm(np.array(self.ball.position)-np.array(self.target_pos)) < self.target_rad
 
     def _check_bounds(self):
-	""" Make sure that the ball stays within the environment """
+        """ Make sure that the ball stays within the environment """
         if self.ball.position[0] > 1.0:
             self.ball.position[0] = 0.95
         if self.ball.position[0] < 0.0:
@@ -378,25 +406,27 @@ class PinballRLGlue(Environment):
     """This class is an RL-Glue adapter for :class:`pinball.PinballModel` """
 
     name = "Pinball"
+    domain_name = 'pinball for reinforcement learning'
 
-    def __init__(self, configuration=os.path.join(os.path.dirname(__file__),
-                                                  'configs', 'pinball', 'pinball_simple_single.cfg')):
+    def __init__(self, configuration):
         """ This class exposes a Pinball environment over RL-Glue
 
-	:param configuration: a configuration file for this environment
-	:type configuration: str
+        :param configuration: a configuration file for this environment
+        :type configuration: str
 
-	"""
-	self.pinball = None
+        """
+        self.pinball = None
+        self.initial_state = None
+        self.target_location = None
         self.configuration = configuration
 
     def make_taskspec(self):
-	""" Create a task specification string for this environment
+        """ Create a task specification string for this environment
 
-	:returns: a task specfication string
-	:rtype: str
+        :returns: a task specfication string
+        :rtype: str
 
-	"""
+        """
         ts = TaskSpecRLGlue.TaskSpec(discount_factor=1.0, reward_range=(-5, 10000))
 
         ts.addDiscreteAction((0, 4))
@@ -407,39 +437,45 @@ def make_taskspec(self):
         ts.addContinuousObservation((0.0, 1))
 
         ts.setEpisodic()
-        ts.setExtra(self.name)
+        ts.setExtra(self.domain_name)
 
         return ts.toTaskSpec()
 
     def env_init(self):
         """ Declare the parameters for this environment
 
-	:returns: A string describing the environment
-	:rtype: str
+        :returns: A string describing the environment
+        :rtype: str
 
-	"""
+        """
         return self.make_taskspec()
 
     def env_start(self):
         """ Instantiate a new :class:`PinballModel` environment
 
         :returns: The initial state
-	:rtype: :class:`Observation`
+        :rtype: :class:`Observation`
 
-	"""
-	self.pinball = PinballModel(self.configuration)
-	obs = Observation()
+        """
+        self.pinball = PinballModel(self.configuration)
+        if self.initial_state:
+            self.pinball.set_start_position(self.initial_state[:2])
+            self.pinball.set_start_velocity(self.initial_state[2:])
+        if self.target_location:
+            self.pinball.target_pos = self.target_location
+
+        obs = Observation()
         obs.doubleArray = self.pinball.get_state()
-	return obs
+        return obs
 
     def env_step(self, action):
-	""" Take a step in the environment
+        """ Take a step in the environment
 
-	:param action: The action that the agent wants to take
-	:returns: The next state, reward and whether the current state is terminal
-	:rtype: :class:`Reward_observation_terminal`
+        :param action: The action that the agent wants to take
+        :returns: The next state, reward and whether the current state is terminal
+        :rtype: :class:`Reward_observation_terminal`
 
-	"""
+        """
         returnRO = Reward_observation_terminal()
 
         returnRO.r = self.pinball.take_action(action.intArray[0])
@@ -449,28 +485,35 @@ def env_step(self, action):
         returnRO.o = obs
 
         returnRO.terminal = self.pinball.episode_ended()
+
         return returnRO
 
     def env_cleanup(self):
-	""" Do nothing. Called once the episode has terminated """
+        """ Do nothing. Called once the episode has terminated """
         pass
 
-    def env_message(message):
-	""" Handle a custom message sent over RL-Glue
+    def env_message(self, message):
+        """ Handle a custom message sent over RL-Glue
 
-	:param message: A message containing the action to execute
-	:returns: The current configuration filename if the message
-	is of the form ``config file=`` or ``config file=``. In the
-	later case, the string following the ``=`` symbol is the
-	path to a new configuration file.
-	:rtype: str
+        :param message: A message containing the action to execute
+        :returns: The current configuration filename if the message
+        is of the form ``config file=`` or ``config file=``. In the
+        later case, the string following the ``=`` symbol is the
+        path to a new configuration file.
+        :rtype: str
 
-	"""
-	if message == 'config file?':
-	    return self.configuration
+        """
+        if message == 'config file?':
+            return self.configuration
         if message.startswith('config file='):
-	    self.configuration = message.split('=')[1]
-	    return self.configuration
+            self.configuration = message.split('=')[1]
+            return self.configuration
+        if message.startswith('set-start-state'):
+            self.initial_state = map(float, message.split()[1:])
+            return self.configuration
+        if message.startswith('set-goal-location'):
+            self.target_location = map(float, message.split()[1:])
+            return self.configuration
 
         return "I don't know how to respond to your message"
 
@@ -484,12 +527,12 @@ class PinballView:
 
     """
     def __init__(self, screen, model):
-	"""
-	:param screen: a pygame surface
-	:type screen: :class:`pygame.Surface`
-	:param model: an instance of a :class:`PinballModel`
-	:type model: :class:`PinballModel`
-	"""
+        """
+        :param screen: a pygame surface
+        :type screen: :class:`pygame.Surface`
+        :param model: an instance of a :class:`PinballModel`
+        :type model: :class:`PinballModel`
+        """
         self.screen = screen
         self.model = model
 
@@ -508,22 +551,39 @@ def __init__(self, screen, model):
             self.background_surface, self.TARGET_COLOR, self._to_pixels(self.model.target_pos), int(self.model.target_rad*self.screen.get_width()))
 
     def _to_pixels(self, pt):
-	""" Converts from real units in the 0-1 range to pixel units
+        """ Converts from real units in the 0-1 range to pixel units
 
-	:param pt: a point in real units
-	:type pt: list
-	:returns: the input point in pixel units
-	:rtype: list
+        :param pt: a point in real units
+        :type pt: list
+        :returns: the input point in pixel units
+        :rtype: list
 
-	"""
+        """
         return [int(pt[0] * self.screen.get_width()), int(pt[1] * self.screen.get_height())]
 
     def blit(self):
-	""" Blit the ball onto the background surface """
+        """ Blit the ball onto the background surface """
         self.screen.blit(self.background_surface, (0, 0))
         pygame.draw.circle(self.screen, self.BALL_COLOR,
                            self._to_pixels(self.model.ball.position), int(self.model.ball.radius*self.screen.get_width()))
 
+def run_trajectoryview(width, height, configuration, trajectory_file):
+    pygame.init()
+    pygame.display.set_caption('Pinball Domain')
+    screen = pygame.display.set_mode([width, height])
+
+    environment = PinballModel(configuration)
+    environment_view = PinballView(screen, environment)
+
+    observations = np.loadtxt(trajectory_file)
+
+    for obs in observations:
+        pygame.time.wait(50)
+        environment_view.model.ball.position = obs[:2]
+        environment_view.blit()
+        pygame.display.flip()
+
+    pygame.quit()
 
 def run_pinballview(width, height, configuration):
     """ Controller function for a :class:`PinballView`
@@ -558,8 +618,8 @@ def run_pinballview(width, height, configuration):
             if event.type == pygame.KEYUP or event.type == pygame.KEYDOWN:
                 user_action = actions.get(event.key, PinballModel.ACC_NONE)
 
-	if environment.take_action(user_action) == environment.END_EPISODE:
-	    done = True
+        if environment.take_action(user_action) == environment.END_EPISODE:
+            done = True
 
         environment_view.blit()
 
@@ -574,12 +634,14 @@ def run_pinballview(width, height, configuration):
                         default=500, help='screen width (default: 500)')
     parser.add_argument('--height', action='store', type=int,
                         default=500, help='screen height (default: 500)')
-    parser.add_argument('-r', '--rlglue', action='store_true', help='expose the environment through RL-Glue')
+    parser.add_argument('--rlglue', action='store_true', help='expose the environment through RL-Glue')
+    parser.add_argument('--trajectory', help='replay a trajectory')
     args = parser.parse_args()
 
     if args.rlglue:
-	print 'Starting rl-glue'
-	EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration))
+        print 'Starting rl-glue'
+        EnvironmentLoader.loadEnvironment(PinballRLGlue(args.configuration))
+    elif args.trajectory:
+        run_trajectoryview(args.width, args.height, args.configuration, args.trajectory)
     else:
         run_pinballview(args.width, args.height, args.configuration)
-
diff --git a/pyrl/experiments/learn-flat-policy.py b/pyrl/experiments/learn-flat-policy.py
new file mode 100644
index 0000000..c7ca7da
--- /dev/null
+++ b/pyrl/experiments/learn-flat-policy.py
@@ -0,0 +1,72 @@
+#!/usr/bin/python
+import os
+import cPickle
+import argparse
+
+from IPython.parallel import Client
+
+def learn_policy(agentid):
+    from pyrl.agents.sarsa_lambda import sarsa_lambda
+    from pyrl.rlglue import RLGlueLocal as RLGlueLocal
+    from pyrl.environments.pinball import PinballRLGlue
+    from pyrl.misc.benchmark import TrajectoryRecorder
+    import cPickle
+    import csv
+    import os
+
+    prefix = 'flat-policy-%d-agent%d'%(os.getpid(),agentid)
+
+    # Create agent and environments
+    agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=1.0, lmbda=0.9,
+    params={'name':'fourier', 'order':4})
+
+    environment = TrajectoryRecorder(PinballRLGlue(environment_name), prefix + '-trajectory')
+
+    score_file = csv.writer(open(prefix + '-scores.csv', 'wb'))
+
+    # Connect to RL-Glue
+    rlglue = RLGlueLocal.LocalGlue(environment, agent)
+    rlglue.RL_init()
+
+    # Execute episodes
+    scores = []
+    for i in xrange(nepisodes):
+        print 'Episode ', i
+        terminated = rlglue.RL_episode(max_steps)
+        total_steps = rlglue.RL_num_steps()
+        total_reward = rlglue.RL_return()
+
+        print '\t %d steps, %d reward, %d terminated'%(total_steps, total_reward, terminated)
+        score = [i, total_steps, total_reward, terminated]
+        scores.append(score)
+        score_file.writerow(score)
+
+    rlglue.RL_cleanup()
+
+    cPickle.dump(agent, open(prefix + '.pl', 'wb'))
+
+    return scores
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Learn the behavior policy over options')
+    parser.add_argument('environment', help='environment configuration')
+    parser.add_argument('-n', '--number-episodes', dest='nepisodes', type=int,
+                    default=100, help='the number of episodes to execute for\
+                    learning the policy over options (default: 100)')
+    parser.add_argument('-s', '--max-steps', dest='max_steps', type=int,
+                    default=10000, help='the maximum number of steps that the\
+                    agent is allowed to take in the environment')
+    parser.add_argument('-a', '--number-agents', type=int, default=100, help='number of agents to average over')
+    parser.add_argument('-p', '--prefix', action='store', type=str,
+                    dest='prefix', help="output prefix (default: dataset)")
+    args = parser.parse_args()
+
+    if not args.prefix:
+        args.prefix = os.path.splitext(os.path.basename(args.environment))[0]
+
+    rc = Client()
+    dview = rc[:]
+    dview.block = True
+    dview.push(dict({'environment_name':args.environment, 'nepisodes':args.nepisodes, 'max_steps': args.max_steps}))
+    dview.map(learn_policy, range(args.number_agents))
+
diff --git a/pyrl/experiments/pinball_simple_single.cfg b/pyrl/experiments/pinball_simple_single.cfg
new file mode 100644
index 0000000..5c34b7d
--- /dev/null
+++ b/pyrl/experiments/pinball_simple_single.cfg
@@ -0,0 +1,15 @@
+ball 0.02
+target 0.9 0.2 0.04
+start 0.2 0.9
+
+polygon 0.0 0.0 0.0 0.01 1.0 0.01 1.0 0.0
+polygon 0.0 0.0 0.01 0.0 0.01 1.0 0.0 1.0
+polygon 0.0 1.0 0.0 0.99 1.0 0.99 1.0 1.0
+polygon 1.0 1.0 0.99 1.0 0.99 0.0 1.0 0.0
+
+polygon 0.35 0.4 0.45 0.55 0.43 0.65 0.3 0.7 0.45 0.7 0.5 0.6 0.45 0.35
+polygon 0.2 0.6 0.25 0.55 0.15 0.5 0.15 0.45 0.2 0.3 0.12 0.27 0.075 0.35 0.09 0.55
+polygon 0.3 0.8 0.6 0.75 0.8 0.8 0.8 0.9 0.6 0.85 0.3 0.9
+polygon 0.8 0.7 0.975 0.65 0.75 0.5 0.9 0.3 0.7 0.35 0.63 0.65
+polygon 0.6 0.25 0.3 0.07 0.15 0.175 0.15 0.2 0.3 0.175 0.6 0.3
+polygon 0.75 0.025 0.8 0.24 0.725 0.27 0.7 0.025
diff --git a/pyrl/misc/benchmark.py b/pyrl/misc/benchmark.py
new file mode 100644
index 0000000..49fb8aa
--- /dev/null
+++ b/pyrl/misc/benchmark.py
@@ -0,0 +1,53 @@
+from rlglue.environment.Environment import Environment
+from itertools import product
+from functools import partial, reduce
+import operator
+
+class TrajectoryRecorder(Environment):
+    """ Records trajectories taken in the environment """
+
+    def __init__(self, decorated, filename):
+        """ This class provides a decorator wrapper to seamlessly record
+        a trajectory taken in the environment.
+
+        :param environment: an rlglue environment
+        :type environment: Environment
+
+        """
+        self.decorated = decorated
+        self.filename = filename
+        self.trajectory_count = 0
+        self.trajectory_file = None
+
+    def env_init(self):
+        return self.decorated.env_init()
+
+    def env_start(self):
+        if self.trajectory_file and not self.trajectory_file.closed:
+            self.trajectory_file.close()
+            self.trajectory_count += 1
+
+        obs = self.decorated.env_start()
+        self.trajectory_file = open('%s-%d.dat'%(self.filename, self.trajectory_count), 'wb')
+        self.trajectory_file.write(' '.join(map(str, obs.doubleArray)) + '\n')
+        self.trajectory_file.flush()
+
+        return obs
+
+    def env_step(self, action):
+        returnRO = self.decorated.env_step(action)
+        self.trajectory_file.write(' '.join(map(str, returnRO.o.doubleArray)) + '\n')
+        self.trajectory_file.flush()
+
+        if returnRO.terminal:
+            self.trajectory_count += 1
+            self.trajectory_file.close()
+
+        return returnRO
+
+    def env_cleanup(self):
+        self.decorated.env_cleanup()
+
+    def env_message(self, message):
+        return self.decorated.env_message(message)
+