Skip to content
Snippets Groups Projects
Commit b5a3620e authored by Mats Gottenbos's avatar Mats Gottenbos
Browse files

More experimenting

parent d2eefde2
No related branches found
No related tags found
No related merge requests found
......@@ -33,7 +33,7 @@ class MuZeroConfig:
self.num_workers = 1 # Number of simultaneous threads/workers self-playing to feed the replay buffer
self.selfplay_on_gpu = False
self.max_moves = 1000 # Maximum number of moves if game is not finished before
self.num_simulations = 50 # Number of future moves self-simulated
self.num_simulations = 100 # Number of future moves self-simulated
self.discount = 0.999 # Chronological discount of the reward
self.temperature_threshold = None # Number of moves before dropping the temperature given by visit_softmax_temperature_fn to 0 (ie selecting the best action). If None, visit_softmax_temperature_fn is used every time
......@@ -77,7 +77,7 @@ class MuZeroConfig:
self.save_model = True # Save the checkpoint in results_path as model.checkpoint
self.training_steps = 100000 # Total number of training steps (ie weights update according to a batch)
self.batch_size = 128 # Number of parts of games to train on at each training step
self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing
self.checkpoint_interval = 100 # Number of training steps before using the model for self-playing
self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze)
self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment