Skip to content
Snippets Groups Projects
Commit 405609e8 authored by Mats Gottenbos's avatar Mats Gottenbos
Browse files

Removed game zooming, decreased observation size, more tuning

parent 75b96602
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,8 @@ import torch
from .abstract_game import AbstractGame
from .racecar_deps.car_racing import CarRacing
class MuZeroConfig:
def __init__(self):
......@@ -18,7 +20,7 @@ class MuZeroConfig:
### Game
self.observation_shape = (3, 96, 96) # Dimensions of the game observation, must be 3D (channel, height, width). For a 1D array, please reshape it to (1, 1, length of array)
self.observation_shape = (1, 12, 12) # Dimensions of the game observation, must be 3D (channel, height, width). For a 1D array, please reshape it to (1, 1, length of array)
self.action_space = list(range(7)) # Fixed list of all possible actions. You should only edit the length
self.players = list(range(1)) # List of players. You should only edit the length
self.stacked_observations = 20 # Number of previous observations and previous actions to add to the current observation
......@@ -33,7 +35,7 @@ class MuZeroConfig:
self.num_workers = 1 # Number of simultaneous threads/workers self-playing to feed the replay buffer
self.selfplay_on_gpu = False
self.max_moves = 1000 # Maximum number of moves if game is not finished before
self.num_simulations = 100 # Number of future moves self-simulated
self.num_simulations = 50 # Number of future moves self-simulated
self.discount = 0.999 # Chronological discount of the reward
self.temperature_threshold = None # Number of moves before dropping the temperature given by visit_softmax_temperature_fn to 0 (ie selecting the best action). If None, visit_softmax_temperature_fn is used every time
......@@ -77,7 +79,7 @@ class MuZeroConfig:
self.save_model = True # Save the checkpoint in results_path as model.checkpoint
self.training_steps = 100000 # Total number of training steps (ie weights update according to a batch)
self.batch_size = 128 # Number of parts of games to train on at each training step
self.checkpoint_interval = 100 # Number of training steps before using the model for self-playing
self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing
self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze)
self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available
......@@ -134,7 +136,7 @@ class Game(AbstractGame):
"""
def __init__(self, seed=None):
self.env = gym.make("CarRacing-v0")
self.env = CarRacing()
if seed is not None:
self.env.seed(seed)
......@@ -212,10 +214,23 @@ class Game(AbstractGame):
observation: the (96, 96, 3) numpy array of the observation to format
Returns:
The corresponding (3, 96, 96) numpy array of the observation
The corresponding (1, 96, 96) numpy array of the observation
"""
return numpy.transpose(observation, [2, 0, 1])
# Change order of dimensions
observationFormattedRgb = numpy.transpose(observation, [2, 0, 1])
# Sum over RGB values to create a single grayscale channel
observationFormattedGrayscale = numpy.sum(observationFormattedRgb, axis=0, dtype=numpy.uint8)
# Scale down
observationFormatted = numpy.zeros((12, 12), dtype = numpy.uint8)
for y in range(12):
for x in range(12):
observationFormatted[x, y] = observationFormattedGrayscale[x * 8, y * 8]
return numpy.array([observationFormatted])
def formatAction(self, action_number):
"""
......
......@@ -414,7 +414,8 @@ class CarRacing(gym.Env, EzPickle):
return # reset() not called yet
# Animate zoom first second:
zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
# zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
zoom = ZOOM * SCALE
scroll_x = self.car.hull.position[0]
scroll_y = self.car.hull.position[1]
angle = -self.car.hull.angle
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment