Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
CM-GP
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
Setting up 2FA is now mandatory for all users.
Show more breadcrumbs
Senne Deproost
CM-GP
Commits
aca53d17
Commit
aca53d17
authored
7 months ago
by
Denis Steckelmacher
Browse files
Options
Downloads
Patches
Plain Diff
Make Program compute a monte-carlo average of its output
parent
5f19c2fb
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
TD3_program_synthesis.py
+9
-9
9 additions, 9 deletions
TD3_program_synthesis.py
optim.py
+8
-15
8 additions, 15 deletions
optim.py
postfix_program.py
+56
-46
56 additions, 46 deletions
postfix_program.py
with
73 additions
and
70 deletions
TD3_program_synthesis.py
+
9
−
9
View file @
aca53d17
...
...
@@ -61,24 +61,23 @@ class Args:
"""
the discount factor gamma
"""
tau
:
float
=
0.005
"""
target smoothing coefficient (default: 0.005)
"""
batch_size
:
int
=
12
8
batch_size
:
int
=
5
12
"""
the batch size of sample from the reply memory
"""
policy_noise
:
float
=
0.2
"""
the scale of policy noise
"""
learning_starts
:
int
=
2000
"""
timestep to start learning
"""
policy_frequency
:
int
=
5
00
policy_frequency
:
int
=
5
12
"""
the frequency of training policy (delayed)
"""
noise_clip
:
float
=
0.5
"""
noise clip parameter of the Target Policy Smoothing Regularization
"""
# Parameters for the program optimizer
num_individuals
:
int
=
3
00
num_individuals
:
int
=
1
00
num_genes
:
int
=
5
num_eval_runs
:
int
=
2
num_generations
:
int
=
20
num_parents_mating
:
int
=
10
0
num_parents_mating
:
int
=
5
0
mutation_probability
:
float
=
0.1
def
make_env
(
env_id
,
seed
,
idx
,
capture_video
,
run_name
):
...
...
@@ -112,11 +111,11 @@ def get_state_actions(program_optimizers, obs, env, args):
for
i
,
o
in
enumerate
(
obs
):
action
=
np
.
zeros
(
env
.
action_space
.
shape
,
dtype
=
np
.
float32
)
for
i
in
range
(
20
):
for
action_index
in
range
(
env
.
action_space
.
shape
[
0
]):
action
[
action_index
]
+=
program_optimizers
[
action_index
].
get_action
(
o
)
for
action_index
in
range
(
env
.
action_space
.
shape
[
0
]):
action
[
action_index
]
=
program_optimizers
[
action_index
].
get_action
(
o
)
program_actions
.
append
(
action
/
20
)
action
=
np
.
clip
(
action
,
env
.
action_space
.
low
,
env
.
action_space
.
high
)
program_actions
.
append
(
action
)
return
np
.
array
(
program_actions
)
...
...
@@ -189,6 +188,7 @@ def run_synthesis(args: Args):
with
torch
.
no_grad
():
action
=
get_state_actions
(
program_optimizers
,
obs
[
None
,
:],
env
,
args
)[
0
]
action
=
np
.
random
.
normal
(
loc
=
action
,
scale
=
args
.
policy_noise
)
print
(
'
ACTION
'
,
action
)
# TRY NOT TO MODIFY: execute the game and log data.
next_obs
,
reward
,
termination
,
truncation
,
info
=
env
.
step
(
action
)
...
...
This diff is collapsed.
Click to expand it.
optim.py
+
8
−
15
View file @
aca53d17
...
...
@@ -42,28 +42,21 @@ class ProgramOptimizer:
try
:
# Num input variables looked at
expected_lookedat
=
self
.
states
.
shape
[
1
]
lookedat
=
0.0
for
i
in
range
(
100
):
# This is a stochastic process
lookedat
+=
program
.
num_inputs_looked_at
(
expected_lookedat
)
looked_proportion
=
(
lookedat
/
100
)
/
expected_lookedat
lookedat
=
program
.
num_inputs_looked_at
()
looked_proportion
=
lookedat
/
self
.
state_dim
# Evaluate the program several times, because evaluations are stochastic
batch_size
=
self
.
states
.
shape
[
0
]
sum_error
=
0.0
for
eval_run
in
range
(
self
.
config
.
num_eval_runs
):
for
index
in
range
(
batch_size
):
# MSE for the loss
action
=
program
(
self
.
states
[
index
])
desired_action
=
self
.
actions
[
index
]
for
index
in
range
(
batch_size
):
# MSE for the loss
action
=
program
(
self
.
states
[
index
])
desired_action
=
self
.
actions
[
index
]
sum_error
+=
np
.
mean
((
action
-
desired_action
)
**
2
)
sum_error
+=
np
.
mean
((
action
-
desired_action
)
**
2
)
avg_error
=
(
sum_error
/
(
batch_size
*
self
.
config
.
num_eval_runs
)
)
avg_error
=
(
sum_error
/
batch_size
)
fitness
=
(
1.0
-
avg_error
)
*
looked_proportion
except
InvalidProgramException
:
fitness
=
-
1000.0
...
...
This diff is collapsed.
Click to expand it.
postfix_program.py
+
56
−
46
View file @
aca53d17
# x 2 + 2 * <end> <end>
#
# Literals # positive
# Operators # negative, we have a finite number of them
# Input variables # negative, we can have many of them
# <end> # OPERATOR_END
#
# 1. PyGAD produces numpy arrays (lists of floats). Look at them in pairs of (mean, variance).
# sample a token from that normal distribution, and transform the sample to one
# of the tokens listed above
# 2. Run that
import
math
import
numpy
as
np
...
...
@@ -57,9 +46,6 @@ class Program:
def
on_literal_func
(
stack
,
token
):
stack
.
append
(
f
"
±
{
token
}
"
)
def
on_input_func
(
stack
,
input_index
):
stack
.
append
(
f
"
x[
{
input_index
}
]
"
)
def
on_operator_func
(
stack
,
operator
,
operands
):
# Put a string representation of the operator on the stack
if
len
(
operands
)
==
1
:
...
...
@@ -75,8 +61,8 @@ class Program:
stack
.
append
(
result
)
return
self
.
_visit_program
(
init_func
=
lambda
:
[
f
"
x[
{
i
}
]
"
for
i
in
range
(
self
.
state_dim
)]
*
20
,
on_literal_func
=
on_literal_func
,
on_input_func
=
on_input_func
,
on_operator_func
=
on_operator_func
)
...
...
@@ -88,26 +74,26 @@ class Program:
stack
.
append
(
token
)
def
on_input_func
(
stack
,
input_index
):
stack
.
append
(
inp
[
input_index
])
def
on_operator_func
(
stack
,
operator
,
operands
):
result
=
operator
.
function
(
*
operands
)
stack
.
append
(
result
)
return
self
.
_visit_program
(
on_literal_func
=
on_literal_func
,
on_input_func
=
on_input_func
,
on_operator_func
=
on_operator_func
)
AVG
=
500
x
=
0.0
for
i
in
range
(
AVG
):
x
+=
self
.
_visit_program
(
init_func
=
lambda
:
list
(
inp
)
*
20
,
on_literal_func
=
on_literal_func
,
on_operator_func
=
on_operator_func
)
def
num_inputs_looked_at
(
self
,
state_vars
):
return
x
/
AVG
def
num_inputs_looked_at
(
self
):
def
on_literal_func
(
stack
,
token
):
stack
.
append
(
set
([]))
# Literals don't look at inputs
def
on_input_func
(
stack
,
input_index
):
stack
.
append
(
set
([
input_index
]))
# Inputs look at inputs
def
on_operator_func
(
stack
,
operator
,
operands
):
looked_at
=
set
([])
...
...
@@ -117,13 +103,13 @@ class Program:
stack
.
append
(
looked_at
)
return
len
(
self
.
_visit_program
(
init_func
=
lambda
:
[
set
([
i
])
for
i
in
range
(
self
.
state_dim
)]
*
20
,
on_literal_func
=
on_literal_func
,
on_input_func
=
on_input_func
,
on_operator_func
=
on_operator_func
))
def
_visit_program
(
self
,
on_literal
_func
,
on_
input
_func
,
on_operator_func
):
stack
=
[]
def
_visit_program
(
self
,
init
_func
,
on_
literal
_func
,
on_operator_func
):
stack
=
init_func
()
for
token
in
self
.
tokens
:
if
token
>=
0.0
:
...
...
@@ -132,20 +118,10 @@ class Program:
# Now, cast token to an int, but with stochasticity so that a value
# close to x.5 is always cast to x, but other values may end up on x+1 or x-1
token
=
int
(
token
+
0.498
*
(
np
.
random
.
random
()
-
0.5
))
# Input variable
if
token
<
-
NUM_OPERATORS
:
input_index
=
-
token
-
NUM_OPERATORS
-
1
if
input_index
>=
self
.
state_dim
:
raise
InvalidProgramException
()
on_input_func
(
stack
,
input_index
)
continue
token
=
int
(
token
+
(
np
.
random
.
random
()
-
0.5
))
# Operators
operator_index
=
-
token
-
1
operator_index
=
(
-
token
-
1
)
%
len
(
OPERATORS
)
operator
=
OPERATORS
[
operator_index
]
# Pop the operands
...
...
@@ -164,15 +140,49 @@ class Program:
return
stack
[
-
1
]
if
__name__
==
'
__main__
'
:
def
dbg_average
()
:
# Compute the average output of programs
values
=
[]
for
l
in
range
(
20
):
for
i
in
range
(
100000
):
dna
=
np
.
random
.
random
((
l
,))
dna
[
0
:
-
1
:
2
]
*=
-
(
NUM_OPERATORS
+
1
)
# Tokens between -NUM_OPERATORS - state_dim and 0
p
=
Program
(
dna
)
values
.
append
(
p
([]))
dna
*=
-
(
NUM_OPERATORS
+
1
)
# Tokens between -NUM_OPERATORS - state_dim and 0
p
=
Program
(
dna
,
1
)
try
:
values
.
append
(
p
([
0.0
]))
except
InvalidProgramException
:
values
.
append
(
0.0
)
print
(
'
Average output of random programs of size
'
,
l
,
'
:
'
,
np
.
mean
(
values
),
'
+-
'
,
np
.
std
(
values
))
def
dbg_random_functions
():
import
cv2
AVG
=
1000
while
True
:
data
=
np
.
zeros
((
20
,
20
),
dtype
=
np
.
float32
)
dna
=
np
.
random
.
random
((
5
,))
dna
*=
-
(
NUM_OPERATORS
+
1
)
# Tokens between -NUM_OPERATORS - state_dim and 0
p
=
Program
(
dna
,
2
)
print
(
p
.
to_string
())
for
y
in
range
(
20
):
for
x
in
range
(
20
):
data
[
y
,
x
]
=
p
([
x
/
20
,
y
/
20
])
print
(
data
.
std
())
data
-=
data
.
min
()
data
/=
data
.
max
()
+
1e-3
image
=
(
data
*
255
).
astype
(
np
.
uint8
)
image
=
cv2
.
resize
(
image
,
(
200
,
200
))
cv2
.
imshow
(
'
image
'
,
image
)
cv2
.
waitKey
(
100
)
if
__name__
==
'
__main__
'
:
dbg_random_functions
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment