Kautenja · ItaiBear · Oct 13, 2022 · Oct 13, 2022 · Oct 13, 2022 · Oct 13, 2022
diff --git a/backup_restore.py b/backup_restore.py
@@ -10,7 +10,8 @@
             state = env.reset()
             done = False
         else:
-            state, reward, done, info = env.step(env.action_space.sample())
+            state, reward, terminated, truncated, info = env.step(env.action_space.sample())
+            done = terminated or truncated
         if (i + 1) % 12:
             env._backup()
         if (i + 1) % 27:

diff --git a/nes_py/app/cli.py b/nes_py/app/cli.py
@@ -21,6 +21,10 @@ def _get_args():
         choices=['human', 'random'],
         help='The execution mode for the emulation.',
     )
+    parser.add_argument('--seed', '-S',
+        type=int,
+        help='the random number seed to use'
+    )
     # add the argument for the number of steps to take in random mode
     parser.add_argument('--steps', '-s',
         type=int,
@@ -34,13 +38,15 @@ def main():
     """The main entry point for the command line interface."""
     # get arguments from the command line
     args = _get_args()
-    # create the environment
-    env = NESEnv(args.rom)
     # play the environment with the given mode
     if args.mode == 'human':
-        play_human(env)
+        # environment is initialized without a rendering mode, as play_human creates its own
+        env = NESEnv(args.rom)
+        play_human(env, seed=args.seed)
     else:
-        play_random(env, args.steps)
+        # create the environment
+        env = NESEnv(args.rom, render_mode='human')
+        play_random(env, args.steps, seed=args.seed)
 
 
 # explicitly define the outward facing API of this module

diff --git a/nes_py/app/play_human.py b/nes_py/app/play_human.py
@@ -1,5 +1,5 @@
 """A method to play gym environments using human IO inputs."""
-import gym
+import gymnasium as gym
 import time
 from pyglet import clock
 from .._image_viewer import ImageViewer
@@ -9,7 +9,7 @@
 _NOP = 0
 
 
-def play_human(env: gym.Env, callback=None):
+def play_human(env: gym.Env, callback=None, seed=None):
     """
     Play the environment using keyboard as a human.
 
@@ -44,7 +44,11 @@ def play_human(env: gym.Env, callback=None):
         relevant_keys=set(sum(map(list, keys_to_action.keys()), []))
     )
     # create a done flag for the environment
-    done = True
+    done = False
+    # reset the environment with the given seed
+    state, _ = env.reset(seed=seed)
+    # render the initial state
+    viewer.show(env.unwrapped.screen)
     # prepare frame rate limiting
     target_frame_duration = 1 / env.metadata['video.frames_per_second']
     last_frame_time = 0
@@ -62,15 +66,16 @@ def play_human(env: gym.Env, callback=None):
             # reset if the environment is done
             if done:
                 done = False
-                state = env.reset()
+                state, _ = env.reset()
                 viewer.show(env.unwrapped.screen)
             # unwrap the action based on pressed relevant keys
             action = keys_to_action.get(viewer.pressed_keys, _NOP)
-            next_state, reward, done, _ = env.step(action)
+            next_state, reward, terminated, truncated, _ = env.step(action)
+            done = terminated or truncated
             viewer.show(env.unwrapped.screen)
             # pass the observation data through the callback
             if callback is not None:
-                callback(state, action, reward, done, next_state)
+                callback(state, action, reward, terminated, truncated, next_state)
             state = next_state
             # shutdown if the escape key is pressed
             if viewer.is_escape_pressed:

diff --git a/nes_py/app/play_random.py b/nes_py/app/play_random.py
@@ -2,7 +2,7 @@
 from tqdm import tqdm
 
 
-def play_random(env, steps):
+def play_random(env, steps, seed=None):
     """
     Play the environment making uniformly random decisions.
 
@@ -15,13 +15,15 @@ def play_random(env, steps):
 
     """
     try:
-        done = True
+        done = False
+        _, _ = env.reset(seed=seed)
         progress = tqdm(range(steps))
         for _ in progress:
             if done:
-                _ = env.reset()
+                _, _ = env.reset()
             action = env.action_space.sample()
-            _, reward, done, info = env.step(action)
+            _, reward, terminated, truncated, info = env.step(action)
+            done = terminated or truncated
             progress.set_postfix(reward=reward, info=info)
             env.render()
     except KeyboardInterrupt: