From 69d1f3487fb499e4d1e6e5cc9d174ea5984ff3ed Mon Sep 17 00:00:00 2001 From: dnddnjs Date: Sun, 17 May 2026 16:33:41 +0900 Subject: [PATCH] Use loop break instead of sys.exit() on early stop sys.exit() raises SystemExit which short-circuits env.close() and any outer cleanup, can't be unit-tested, and kills the kernel under Jupyter/IPython. Replace with a `solved` flag (DQN/A2C, nested loops) or a plain break (PPO, single loop) so the function returns normally and the final save runs through the same path as the EPISODES-exhausted case. Credit to @AlexisBogroff in #84 for flagging the pattern; applied to the current PyTorch tree. --- 2-cartpole/1-dqn.py | 9 +++++---- 2-cartpole/2-a2c.py | 10 +++++----- 2-cartpole/3-ppo.py | 5 +---- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/2-cartpole/1-dqn.py b/2-cartpole/1-dqn.py index e7c0eaa..ddbb219 100644 --- a/2-cartpole/1-dqn.py +++ b/2-cartpole/1-dqn.py @@ -18,7 +18,6 @@ L(theta) = ( Q_theta(s)[a] - y )^2 """ import random -import sys from collections import deque import numpy as np @@ -135,8 +134,11 @@ def train_model(self): run_test_loop(env, agent.get_action) scores = [] + solved = False for e in range(EPISODES): + if solved: + break done = False score = 0 state, _ = env.reset() @@ -168,9 +170,8 @@ def train_model(self): # Early stop when consistently near max episode length. if np.mean(scores[-min(10, len(scores)):]) > 490: - torch.save(agent.model.state_dict(), SAVE_PATH) - print(f"Saved trained model to {SAVE_PATH}") - sys.exit() + solved = True + break torch.save(agent.model.state_dict(), SAVE_PATH) print(f"Saved trained model to {SAVE_PATH}") diff --git a/2-cartpole/2-a2c.py b/2-cartpole/2-a2c.py index 1a92b9b..a7d145a 100644 --- a/2-cartpole/2-a2c.py +++ b/2-cartpole/2-a2c.py @@ -19,7 +19,6 @@ Subtracting V_w(s) is the variance-reduction baseline; using a learned V (rather than the Monte-Carlo return) is what makes this *actor-critic*. """ -import sys import numpy as np import torch @@ -125,8 +124,11 @@ def train_model(self, state, action, reward, next_state, done): run_test_loop(env, agent.get_action) scores = [] + solved = False for e in range(EPISODES): + if solved: + break done = False score = 0 state, _ = env.reset() @@ -149,10 +151,8 @@ def train_model(self, state, action, reward, next_state, done): scores.append(score) print(f"episode: {e} score: {score}") if np.mean(scores[-min(10, len(scores)):]) > 490: - torch.save({"actor": agent.actor.state_dict(), - "critic": agent.critic.state_dict()}, SAVE_PATH) - print(f"Saved trained model to {SAVE_PATH}") - sys.exit() + solved = True + break torch.save({"actor": agent.actor.state_dict(), "critic": agent.critic.state_dict()}, SAVE_PATH) diff --git a/2-cartpole/3-ppo.py b/2-cartpole/3-ppo.py index 6850d72..174d266 100644 --- a/2-cartpole/3-ppo.py +++ b/2-cartpole/3-ppo.py @@ -27,7 +27,6 @@ L = L^CLIP - c_v * MSE(V, returns) + c_e * H[pi] """ -import sys import numpy as np import torch @@ -211,9 +210,7 @@ def pick(state): recent = ep_returns[-10:] print(f"update: {episode} recent_mean_return: {np.mean(recent):.1f} episodes: {len(ep_returns)}") if len(recent) >= 10 and np.mean(recent) > 490: - torch.save(model.state_dict(), SAVE_PATH) - print(f"Saved trained model to {SAVE_PATH}") - sys.exit() + break torch.save(model.state_dict(), SAVE_PATH) print(f"Saved trained model to {SAVE_PATH}")