""" Continuous navigation task: observe (x, y, speed_x, speed_y), produce (dx, dy), and aim at reaching (0, 0). The range of observations is [0, 1] (so we try to reach a corner)
"""
def__init__(self):
super().__init__()
# Position + speed
self.observation_space=gym.spaces.Box(
low=np.zeros((4,)),
high=np.ones((4,))
)
# Acceleration
self.action_space=gym.spaces.Box(
low=-np.ones((2,)),
high=np.ones((2,))
)
self.state=np.zeros((4,),dtype=np.float32)
defreset(self,**kwargs):
# position
self.state[0]=random.random()
self.state[1]=random.random()
# speed
self.state[2],self.state[3]=0,0
self._timestep=0
returnself.state,{}
defstep(self,a):
defdistance_to_goal(s):
# The goal is 0, 0, so the distance to goal is sqrt(self.state^2)