Skip to content
Snippets Groups Projects
Commit aca53d17 authored by Denis Steckelmacher's avatar Denis Steckelmacher
Browse files

Make Program compute a monte-carlo average of its output

parent 5f19c2fb
No related branches found
No related tags found
No related merge requests found
......@@ -61,24 +61,23 @@ class Args:
"""the discount factor gamma"""
tau: float = 0.005
"""target smoothing coefficient (default: 0.005)"""
batch_size: int = 128
batch_size: int = 512
"""the batch size of sample from the reply memory"""
policy_noise: float = 0.2
"""the scale of policy noise"""
learning_starts: int = 2000
"""timestep to start learning"""
policy_frequency: int = 500
policy_frequency: int = 512
"""the frequency of training policy (delayed)"""
noise_clip: float = 0.5
"""noise clip parameter of the Target Policy Smoothing Regularization"""
# Parameters for the program optimizer
num_individuals: int = 300
num_individuals: int = 100
num_genes: int = 5
num_eval_runs: int = 2
num_generations: int = 20
num_parents_mating: int = 100
num_parents_mating: int = 50
mutation_probability: float = 0.1
def make_env(env_id, seed, idx, capture_video, run_name):
......@@ -112,11 +111,11 @@ def get_state_actions(program_optimizers, obs, env, args):
for i, o in enumerate(obs):
action = np.zeros(env.action_space.shape, dtype=np.float32)
for i in range(20):
for action_index in range(env.action_space.shape[0]):
action[action_index] += program_optimizers[action_index].get_action(o)
for action_index in range(env.action_space.shape[0]):
action[action_index] = program_optimizers[action_index].get_action(o)
program_actions.append(action / 20)
action = np.clip(action, env.action_space.low, env.action_space.high)
program_actions.append(action)
return np.array(program_actions)
......@@ -189,6 +188,7 @@ def run_synthesis(args: Args):
with torch.no_grad():
action = get_state_actions(program_optimizers, obs[None, :], env, args)[0]
action = np.random.normal(loc=action, scale=args.policy_noise)
print('ACTION', action)
# TRY NOT TO MODIFY: execute the game and log data.
next_obs, reward, termination, truncation, info = env.step(action)
......
......@@ -42,28 +42,21 @@ class ProgramOptimizer:
try:
# Num input variables looked at
expected_lookedat = self.states.shape[1]
lookedat = 0.0
for i in range(100):
# This is a stochastic process
lookedat += program.num_inputs_looked_at(expected_lookedat)
looked_proportion = (lookedat / 100) / expected_lookedat
lookedat = program.num_inputs_looked_at()
looked_proportion = lookedat / self.state_dim
# Evaluate the program several times, because evaluations are stochastic
batch_size = self.states.shape[0]
sum_error = 0.0
for eval_run in range(self.config.num_eval_runs):
for index in range(batch_size):
# MSE for the loss
action = program(self.states[index])
desired_action = self.actions[index]
for index in range(batch_size):
# MSE for the loss
action = program(self.states[index])
desired_action = self.actions[index]
sum_error += np.mean((action - desired_action) ** 2)
sum_error += np.mean((action - desired_action) ** 2)
avg_error = (sum_error / (batch_size * self.config.num_eval_runs))
avg_error = (sum_error / batch_size)
fitness = (1.0 - avg_error) * looked_proportion
except InvalidProgramException:
fitness = -1000.0
......
# x 2 + 2 * <end> <end>
#
# Literals # positive
# Operators # negative, we have a finite number of them
# Input variables # negative, we can have many of them
# <end> # OPERATOR_END
#
# 1. PyGAD produces numpy arrays (lists of floats). Look at them in pairs of (mean, variance).
# sample a token from that normal distribution, and transform the sample to one
# of the tokens listed above
# 2. Run that
import math
import numpy as np
......@@ -57,9 +46,6 @@ class Program:
def on_literal_func(stack, token):
stack.append(f"±{token}")
def on_input_func(stack, input_index):
stack.append(f"x[{input_index}]")
def on_operator_func(stack, operator, operands):
# Put a string representation of the operator on the stack
if len(operands) == 1:
......@@ -75,8 +61,8 @@ class Program:
stack.append(result)
return self._visit_program(
init_func=lambda: [f"x[{i}]" for i in range(self.state_dim)] * 20,
on_literal_func=on_literal_func,
on_input_func=on_input_func,
on_operator_func=on_operator_func
)
......@@ -88,26 +74,26 @@ class Program:
stack.append(token)
def on_input_func(stack, input_index):
stack.append(inp[input_index])
def on_operator_func(stack, operator, operands):
result = operator.function(*operands)
stack.append(result)
return self._visit_program(
on_literal_func=on_literal_func,
on_input_func=on_input_func,
on_operator_func=on_operator_func
)
AVG = 500
x = 0.0
for i in range(AVG):
x += self._visit_program(
init_func=lambda: list(inp) * 20,
on_literal_func=on_literal_func,
on_operator_func=on_operator_func
)
def num_inputs_looked_at(self, state_vars):
return x / AVG
def num_inputs_looked_at(self):
def on_literal_func(stack, token):
stack.append(set([])) # Literals don't look at inputs
def on_input_func(stack, input_index):
stack.append(set([input_index])) # Inputs look at inputs
def on_operator_func(stack, operator, operands):
looked_at = set([])
......@@ -117,13 +103,13 @@ class Program:
stack.append(looked_at)
return len(self._visit_program(
init_func=lambda: [set([i]) for i in range(self.state_dim)] * 20,
on_literal_func=on_literal_func,
on_input_func=on_input_func,
on_operator_func=on_operator_func
))
def _visit_program(self, on_literal_func, on_input_func, on_operator_func):
stack = []
def _visit_program(self, init_func, on_literal_func, on_operator_func):
stack = init_func()
for token in self.tokens:
if token >= 0.0:
......@@ -132,20 +118,10 @@ class Program:
# Now, cast token to an int, but with stochasticity so that a value
# close to x.5 is always cast to x, but other values may end up on x+1 or x-1
token = int(token + 0.498 * (np.random.random() - 0.5))
# Input variable
if token < -NUM_OPERATORS:
input_index = -token - NUM_OPERATORS - 1
if input_index >= self.state_dim:
raise InvalidProgramException()
on_input_func(stack, input_index)
continue
token = int(token + (np.random.random() - 0.5))
# Operators
operator_index = -token - 1
operator_index = (-token - 1) % len(OPERATORS)
operator = OPERATORS[operator_index]
# Pop the operands
......@@ -164,15 +140,49 @@ class Program:
return stack[-1]
if __name__ == '__main__':
def dbg_average():
# Compute the average output of programs
values = []
for l in range(20):
for i in range(100000):
dna = np.random.random((l,))
dna[0:-1:2] *= -(NUM_OPERATORS + 1) # Tokens between -NUM_OPERATORS - state_dim and 0
p = Program(dna)
values.append(p([]))
dna *= -(NUM_OPERATORS + 1) # Tokens between -NUM_OPERATORS - state_dim and 0
p = Program(dna, 1)
try:
values.append(p([0.0]))
except InvalidProgramException:
values.append(0.0)
print('Average output of random programs of size', l, ':', np.mean(values), '+-', np.std(values))
def dbg_random_functions():
import cv2
AVG = 1000
while True:
data = np.zeros((20, 20), dtype=np.float32)
dna = np.random.random((5,))
dna *= -(NUM_OPERATORS + 1) # Tokens between -NUM_OPERATORS - state_dim and 0
p = Program(dna, 2)
print(p.to_string())
for y in range(20):
for x in range(20):
data[y, x] = p([x / 20, y / 20])
print(data.std())
data -= data.min()
data /= data.max() + 1e-3
image = (data * 255).astype(np.uint8)
image = cv2.resize(image, (200, 200))
cv2.imshow('image', image)
cv2.waitKey(100)
if __name__ == '__main__':
dbg_random_functions()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment