Removed game zooming, decreased observation size, more tuning

405609e8 · Mats Gottenbos · 75b96602 · 405609e8 · 405609e8
Commit 405609e8 authored 4 years ago by Mats Gottenbos
--- a/games/racecar.py
+++ b/games/racecar.py
@@ -7,6 +7,8 @@ import torch

 from .abstract_game import AbstractGame

+from .racecar_deps.car_racing import CarRacing
+

 class MuZeroConfig:
    def __init__(self):
@@ -18,7 +20,7 @@ class MuZeroConfig:


        ### Game
-        self.observation_shape = (3, 96, 96)  # Dimensions of the game observation, must be 3D (channel, height, width). For a 1D array, please reshape it to (1, 1, length of array)
+        self.observation_shape = (1, 12, 12)  # Dimensions of the game observation, must be 3D (channel, height, width). For a 1D array, please reshape it to (1, 1, length of array)
        self.action_space = list(range(7))  # Fixed list of all possible actions. You should only edit the length
        self.players = list(range(1))  # List of players. You should only edit the length
        self.stacked_observations = 20  # Number of previous observations and previous actions to add to the current observation
@@ -33,7 +35,7 @@ class MuZeroConfig:
        self.num_workers = 1  # Number of simultaneous threads/workers self-playing to feed the replay buffer
        self.selfplay_on_gpu = False
        self.max_moves = 1000  # Maximum number of moves if game is not finished before
-        self.num_simulations = 100  # Number of future moves self-simulated
+        self.num_simulations = 50  # Number of future moves self-simulated
        self.discount = 0.999  # Chronological discount of the reward
        self.temperature_threshold = None  # Number of moves before dropping the temperature given by visit_softmax_temperature_fn to 0 (ie selecting the best action). If None, visit_softmax_temperature_fn is used every time

@@ -77,7 +79,7 @@ class MuZeroConfig:
        self.save_model = True  # Save the checkpoint in results_path as model.checkpoint
        self.training_steps = 100000  # Total number of training steps (ie weights update according to a batch)
        self.batch_size = 128  # Number of parts of games to train on at each training step
-        self.checkpoint_interval = 100  # Number of training steps before using the model for self-playing
+        self.checkpoint_interval = 10  # Number of training steps before using the model for self-playing
        self.value_loss_weight = 0.25  # Scale the value loss to avoid overfitting of the value function, paper recommends 0.25 (See paper appendix Reanalyze)
        self.train_on_gpu = torch.cuda.is_available()  # Train on GPU if available

@@ -134,7 +136,7 @@ class Game(AbstractGame):
    """

    def __init__(self, seed=None):
-        self.env = gym.make("CarRacing-v0")
+        self.env = CarRacing()
        if seed is not None:
            self.env.seed(seed)

@@ -212,10 +214,23 @@ class Game(AbstractGame):
            observation: the (96, 96, 3) numpy array of the observation to format
        
        Returns:
-            The corresponding (3, 96, 96) numpy array of the observation
+            The corresponding (1, 96, 96) numpy array of the observation

        """
-        return numpy.transpose(observation, [2, 0, 1])
+
+        # Change order of dimensions
+        observationFormattedRgb = numpy.transpose(observation, [2, 0, 1])
+
+        # Sum over RGB values to create a single grayscale channel
+        observationFormattedGrayscale = numpy.sum(observationFormattedRgb, axis=0, dtype=numpy.uint8)
+
+        # Scale down
+        observationFormatted = numpy.zeros((12, 12), dtype = numpy.uint8)
+        for y in range(12):
+            for x in range(12):
+                observationFormatted[x, y] = observationFormattedGrayscale[x * 8, y * 8]
+
+        return numpy.array([observationFormatted])
    
    def formatAction(self, action_number):
        """

--- a/car_racing.py
+++ b/car_racing.py
@@ -414,7 +414,8 @@ class CarRacing(gym.Env, EzPickle):
            return  # reset() not called yet

        # Animate zoom first second:
-        zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
+        # zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
+        zoom = ZOOM * SCALE
        scroll_x = self.car.hull.position[0]
        scroll_y = self.car.hull.position[1]
        angle = -self.car.hull.angle