diff --git a/games/racecar.py b/games/racecar.py index de4908b094445017286582d384f0abd99d593d19..84183004bb9e204953e7797ea3b4a03cdeda0b21 100644 --- a/games/racecar.py +++ b/games/racecar.py @@ -7,6 +7,8 @@ import torch from .abstract_game import AbstractGame +from .racecar_deps.car_racing import CarRacing + class MuZeroConfig: def __init__(self): @@ -18,7 +20,7 @@ class MuZeroConfig: ### Game - self.observation_shape = (3, 96, 96) # Dimensions of the game observation, must be 3D (channel, height, width). For a 1D array, please reshape it to (1, 1, length of array) + self.observation_shape = (1, 12, 12) # Dimensions of the game observation, must be 3D (channel, height, width). For a 1D array, please reshape it to (1, 1, length of array) self.action_space = list(range(7)) # Fixed list of all possible actions. You should only edit the length self.players = list(range(1)) # List of players. You should only edit the length self.stacked_observations = 20 # Number of previous observations and previous actions to add to the current observation @@ -33,7 +35,7 @@ class MuZeroConfig: self.num_workers = 1 # Number of simultaneous threads/workers self-playing to feed the replay buffer self.selfplay_on_gpu = False self.max_moves = 1000 # Maximum number of moves if game is not finished before - self.num_simulations = 100 # Number of future moves self-simulated + self.num_simulations = 50 # Number of future moves self-simulated self.discount = 0.999 # Chronological discount of the reward self.temperature_threshold = None # Number of moves before dropping the temperature given by visit_softmax_temperature_fn to 0 (ie selecting the best action). If None, visit_softmax_temperature_fn is used every time @@ -77,7 +79,7 @@ class MuZeroConfig: self.save_model = True # Save the checkpoint in results_path as model.checkpoint self.training_steps = 100000 # Total number of training steps (ie weights update according to a batch) self.batch_size = 128 # Number of parts of games to train on at each training step - self.checkpoint_interval = 100 # Number of training steps before using the model for self-playing + self.checkpoint_interval = 10 # Number of training steps before using the model for self-playing self.value_loss_weight = 0.25 # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze) self.train_on_gpu = torch.cuda.is_available() # Train on GPU if available @@ -134,7 +136,7 @@ class Game(AbstractGame): """ def __init__(self, seed=None): - self.env = gym.make("CarRacing-v0") + self.env = CarRacing() if seed is not None: self.env.seed(seed) @@ -212,10 +214,23 @@ class Game(AbstractGame): observation: the (96, 96, 3) numpy array of the observation to format Returns: - The corresponding (3, 96, 96) numpy array of the observation + The corresponding (1, 96, 96) numpy array of the observation """ - return numpy.transpose(observation, [2, 0, 1]) + + # Change order of dimensions + observationFormattedRgb = numpy.transpose(observation, [2, 0, 1]) + + # Sum over RGB values to create a single grayscale channel + observationFormattedGrayscale = numpy.sum(observationFormattedRgb, axis=0, dtype=numpy.uint8) + + # Scale down + observationFormatted = numpy.zeros((12, 12), dtype = numpy.uint8) + for y in range(12): + for x in range(12): + observationFormatted[x, y] = observationFormattedGrayscale[x * 8, y * 8] + + return numpy.array([observationFormatted]) def formatAction(self, action_number): """ diff --git a/car_racing.py b/games/racecar_deps/car_racing.py similarity index 99% rename from car_racing.py rename to games/racecar_deps/car_racing.py index 78a0af5538c2c8bee743a26cbc8a25dfa1678ada..64f157323eab4ddd43e7fe1898cecc2912fcbb3c 100644 --- a/car_racing.py +++ b/games/racecar_deps/car_racing.py @@ -414,7 +414,8 @@ class CarRacing(gym.Env, EzPickle): return # reset() not called yet # Animate zoom first second: - zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) + # zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) + zoom = ZOOM * SCALE scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle