self.num_workers=10# Number of simultaneous threads/workers self-playing to feed the replay buffer
self.num_workers=50# Number of simultaneous threads/workers self-playing to feed the replay buffer
self.selfplay_on_gpu=False
self.max_moves=2000# Maximum number of moves if game is not finished before
self.max_moves=1000# Maximum number of moves if game is not finished before
self.num_simulations=50# Number of future moves self-simulated
self.discount=0.999# Chronological discount of the reward
self.temperature_threshold=None# Number of moves before dropping the temperature given by visit_softmax_temperature_fn to 0 (ie selecting the best action). If None, visit_softmax_temperature_fn is used every time
...
...
@@ -77,7 +77,7 @@ class MuZeroConfig:
self.save_model=True# Save the checkpoint in results_path as model.checkpoint
self.training_steps=100000# Total number of training steps (ie weights update according to a batch)
self.batch_size=128# Number of parts of games to train on at each training step
self.checkpoint_interval=100# Number of training steps before using the model for self-playing
self.checkpoint_interval=10# Number of training steps before using the model for self-playing
self.value_loss_weight=0.25# Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze)
self.train_on_gpu=torch.cuda.is_available()# Train on GPU if available