Skip to content
Snippets Groups Projects
Commit 3e058052 authored by Mats Gottenbos's avatar Mats Gottenbos
Browse files

Trying some different parameters

parent 2339e05c
No related branches found
No related tags found
No related merge requests found
...@@ -19,9 +19,9 @@ class MuZeroConfig: ...@@ -19,9 +19,9 @@ class MuZeroConfig:
### Game ### Game
self.observation_shape = (3, 96, 96) # Dimensions of the game observation, must be 3D (channel, height, width). For a 1D array, please reshape it to (1, 1, length of array) self.observation_shape = (3, 96, 96) # Dimensions of the game observation, must be 3D (channel, height, width). For a 1D array, please reshape it to (1, 1, length of array)
self.action_space = list(range(5)) # Fixed list of all possible actions. You should only edit the length self.action_space = list(range(7)) # Fixed list of all possible actions. You should only edit the length
self.players = list(range(1)) # List of players. You should only edit the length self.players = list(range(1)) # List of players. You should only edit the length
self.stacked_observations = 5 # Number of previous observations and previous actions to add to the current observation self.stacked_observations = 20 # Number of previous observations and previous actions to add to the current observation
# Evaluate # Evaluate
self.muzero_player = 0 # Turn Muzero begins to play (0: MuZero plays first, 1: MuZero plays second) self.muzero_player = 0 # Turn Muzero begins to play (0: MuZero plays first, 1: MuZero plays second)
...@@ -30,9 +30,9 @@ class MuZeroConfig: ...@@ -30,9 +30,9 @@ class MuZeroConfig:
### Self-Play ### Self-Play
self.num_workers = 1 # Number of simultaneous threads/workers self-playing to feed the replay buffer self.num_workers = 10 # Number of simultaneous threads/workers self-playing to feed the replay buffer
self.selfplay_on_gpu = False self.selfplay_on_gpu = False
self.max_moves = 500 # Maximum number of moves if game is not finished before self.max_moves = 2000 # Maximum number of moves if game is not finished before
self.num_simulations = 50 # Number of future moves self-simulated self.num_simulations = 50 # Number of future moves self-simulated
self.discount = 0.999 # Chronological discount of the reward self.discount = 0.999 # Chronological discount of the reward
self.temperature_threshold = None # Number of moves before dropping the temperature given by visit_softmax_temperature_fn to 0 (ie selecting the best action). If None, visit_softmax_temperature_fn is used every time self.temperature_threshold = None # Number of moves before dropping the temperature given by visit_softmax_temperature_fn to 0 (ie selecting the best action). If None, visit_softmax_temperature_fn is used every time
...@@ -77,7 +77,7 @@ class MuZeroConfig: ...@@ -77,7 +77,7 @@ class MuZeroConfig:
self.save_model = True # Save the checkpoint in results_path as model.checkpoint self.save_model = True # Save the checkpoint in results_path as model.checkpoint
self.training_steps = 100000 # Total number of training steps (ie weights update according to a batch) self.training_steps = 100000 # Total number of training steps (ie weights update according to a batch)
self.batch_size = 128 # Number of parts of games to train on at each training step self.batch_size = 128 # Number of parts of games to train on at each training step
self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing self.checkpoint_interval = 100 # Number of training steps before using the model for self-playing
self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze)
self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available
...@@ -165,7 +165,7 @@ class Game(AbstractGame): ...@@ -165,7 +165,7 @@ class Game(AbstractGame):
Returns: Returns:
An array of integers, subset of the action space. An array of integers, subset of the action space.
""" """
return list(range(5)) return list(range(7))
def reset(self): def reset(self):
""" """
...@@ -229,11 +229,13 @@ class Game(AbstractGame): ...@@ -229,11 +229,13 @@ class Game(AbstractGame):
""" """
possibleActions = [ possibleActions = [
[0, 0, 0], [0, 0, 0], # No action
[-1, 0, 0], [-1, 0, 0], # Steer left
[1, 0, 0], [1, 0, 0], # Steer right
[0, 1, 0], [0, 1, 0], # Gas
[0, 0, 1], [0, 0, 1], # Brake
[-1, 1, 0], # Steer left + gas
[1, 1, 0], # Steer right + gas
] ]
actionFormatted = possibleActions[action_number] actionFormatted = possibleActions[action_number]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment