diff --git a/games/racecar.py b/games/racecar.py index 2007bea337d41537ecd1341abb8954928c7638d8..a78f96000261bb011a2b553e92c2e406775248fa 100644 --- a/games/racecar.py +++ b/games/racecar.py @@ -19,9 +19,9 @@ class MuZeroConfig: ### Game self.observation_shape = (3, 96, 96) # Dimensions of the game observation, must be 3D (channel, height, width). For a 1D array, please reshape it to (1, 1, length of array) - self.action_space = list(range(5)) # Fixed list of all possible actions. You should only edit the length + self.action_space = list(range(7)) # Fixed list of all possible actions. You should only edit the length self.players = list(range(1)) # List of players. You should only edit the length - self.stacked_observations = 5 # Number of previous observations and previous actions to add to the current observation + self.stacked_observations = 20 # Number of previous observations and previous actions to add to the current observation # Evaluate self.muzero_player = 0 # Turn Muzero begins to play (0: MuZero plays first, 1: MuZero plays second) @@ -30,9 +30,9 @@ class MuZeroConfig: ### Self-Play - self.num_workers = 1 # Number of simultaneous threads/workers self-playing to feed the replay buffer + self.num_workers = 10 # Number of simultaneous threads/workers self-playing to feed the replay buffer self.selfplay_on_gpu = False - self.max_moves = 500 # Maximum number of moves if game is not finished before + self.max_moves = 2000 # Maximum number of moves if game is not finished before self.num_simulations = 50 # Number of future moves self-simulated self.discount = 0.999 # Chronological discount of the reward self.temperature_threshold = None # Number of moves before dropping the temperature given by visit_softmax_temperature_fn to 0 (ie selecting the best action). If None, visit_softmax_temperature_fn is used every time @@ -77,7 +77,7 @@ class MuZeroConfig: self.save_model = True # Save the checkpoint in results_path as model.checkpoint self.training_steps = 100000 # Total number of training steps (ie weights update according to a batch) self.batch_size = 128 # Number of parts of games to train on at each training step - self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing + self.checkpoint_interval = 100 # Number of training steps before using the model for self-playing self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available @@ -165,7 +165,7 @@ class Game(AbstractGame): Returns: An array of integers, subset of the action space. """ - return list(range(5)) + return list(range(7)) def reset(self): """ @@ -229,11 +229,13 @@ class Game(AbstractGame): """ possibleActions = [ - [0, 0, 0], - [-1, 0, 0], - [1, 0, 0], - [0, 1, 0], - [0, 0, 1], + [0, 0, 0], # No action + [-1, 0, 0], # Steer left + [1, 0, 0], # Steer right + [0, 1, 0], # Gas + [0, 0, 1], # Brake + [-1, 1, 0], # Steer left + gas + [1, 1, 0], # Steer right + gas ] actionFormatted = possibleActions[action_number]