diff --git a/games/racecar.py b/games/racecar.py index 2ee3bc6f52ba1a2466be53f410f3ba569930191a..de4908b094445017286582d384f0abd99d593d19 100644 --- a/games/racecar.py +++ b/games/racecar.py @@ -33,7 +33,7 @@ class MuZeroConfig: self.num_workers = 1 # Number of simultaneous threads/workers self-playing to feed the replay buffer self.selfplay_on_gpu = False self.max_moves = 1000 # Maximum number of moves if game is not finished before - self.num_simulations = 50 # Number of future moves self-simulated + self.num_simulations = 100 # Number of future moves self-simulated self.discount = 0.999 # Chronological discount of the reward self.temperature_threshold = None # Number of moves before dropping the temperature given by visit_softmax_temperature_fn to 0 (ie selecting the best action). If None, visit_softmax_temperature_fn is used every time @@ -77,7 +77,7 @@ class MuZeroConfig: self.save_model = True # Save the checkpoint in results_path as model.checkpoint self.training_steps = 100000 # Total number of training steps (ie weights update according to a batch) self.batch_size = 128 # Number of parts of games to train on at each training step - self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing + self.checkpoint_interval = 100 # Number of training steps before using the model for self-playing self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available