""" Continuous bandit: one state, always the same, 1-timestep episodes, and the reward is based on sin(action)
"""
def__init__(self):
super().__init__()
self.observation_space=gym.spaces.Box(
low=np.zeros((1,)),
high=np.ones((1,))
)
self.action_space=gym.spaces.Box(
low=-np.ones((1,)),
high=np.ones((1,))
)
defreset(self):
return (np.zeros((1,)),{})
defstep(self,a):
reward=np.sin(a*3.1516).sum()
returnnp.zeros((1,)),reward,True,False,{}
classSimpleLargeActionEnv(gym.Env):
""" Continuous bandit with high-dimensional action
"""
def__init__(self):
super().__init__()
self.observation_space=gym.spaces.Box(
low=np.zeros((1,)),
high=np.ones((1,))
)
self.action_space=gym.spaces.Box(
low=-np.ones((16,)),
high=np.ones((16,))
)
defreset(self):
return (np.zeros((1,)),{})
defstep(self,a):
reward=np.sin(a*3.1516).sum()
returnnp.zeros((1,)),reward,True,False,{}
classSimpleTwoStatesEnv(SimpleActionOnlyEnv):
""" Contextual bandit. 1-timestep episodes, each episode is in one of two possible states (selected at random). The reward function depends on the state
""" 2 states, 10-timestep episodes, the action sometimes causes the agent to change state. Allows to check that V(s_t+1) is computed correctly by the agent
""" Continuous navigation task: observe (x, y), produce (dx, dy), and aim at reaching (0, 0). The range of observations is [0, 1] (so we try to reach a corner)
"""
def__init__(self):
super().__init__()
self.observation_space=gym.spaces.Box(
low=np.zeros((2,)),
high=np.ones((2,))
)
self.action_space=gym.spaces.Box(
low=-np.ones((2,)),
high=np.ones((2,))
)
self.state=np.zeros((2,),dtype=np.float32)
defreset(self):
self.state[0]=random.random()
self.state[1]=random.random()
self._timestep=0
return (self.state,{})
defstep(self,a):
defdistance_to_goal(s):
# The goal is 0, 0, so the distance to goal is sqrt(self.state^2)
returnnp.sqrt((s**2).sum())
old_d=distance_to_goal(self.state)
self.state=np.clip(self.state+0.1*a,0.0,1.0)
new_d=distance_to_goal(self.state)
x,y=self.state
reward=10.*(old_d-new_d)# Reward for getting closer to the goal