Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions experiments/hybrid_cv01.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Validated hybrid reward: energy + crane_velocity penalty + position return.
# energy=1.0 (KE+PE physics signal), crane_velocity=0.1 (-x_dot^2 penalty to
# damp trolley oscillation), position=0.02 (-|x| to encourage return to origin).
# energy=1.0 (KE+PE physics signal), crane_velocity=-0.1 (x_dot^2 penalty to
# damp trolley oscillation; negative because rc.crane_velocity * x_dot^2 is used),
# position=0.1 (-|x| to encourage return to origin).
# terminal_penalty=-5.0 provides a one-shot crash signal propagated ~100 steps
# back by gamma=0.99. Trained with randomize_start=True on speeds +-[0.1, 1.0].
# Seeds 2718, 3141, 31415 achieve 6/6 OOD generalisation at start_speed=7.0.
Expand All @@ -13,7 +14,7 @@ reward:
terminal_penalty: -5.0
angle: 0.0
angular_velocity: 0.0
crane_velocity: 0.1
crane_velocity: -0.1
crane_acceleration: 0.0
angular_acceleration: 0.0
t_min_crane: 0.0
Expand Down
2 changes: 1 addition & 1 deletion experiments/hybrid_t_min.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ reward:
terminal_penalty: -5.0
angle: 0.0
angular_velocity: 0.0
crane_velocity: 0.1
crane_velocity: -0.1
crane_acceleration: 0.0
angular_acceleration: 0.0
t_min_crane: 0.01
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ dependencies = [
"matplotlib>=3.10",
"seaborn>=0.13.2",
"tqdm>=4.67",
"pyment>=0.3.3",
]

[project.urls]
Expand Down
6 changes: 3 additions & 3 deletions scripts/analyse_q.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import numpy as np

from crane_controller.crane_factory import build_crane
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumEnv
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumConfig, AntiPendulumEnv
from crane_controller.q_agent import QLearningAgent

LOGGER = logging.getLogger(__name__)
Expand All @@ -36,7 +36,7 @@ def _build_dummy_env() -> AntiPendulumEnv:
AntiPendulumEnv
Environment with discrete observation space.
"""
return AntiPendulumEnv(build_crane, discrete=AntiPendulumEnv.DEFAULT_DISCRETE.copy())
return AntiPendulumEnv(build_crane, conf=AntiPendulumConfig(discrete="energy"))


def main() -> None:
Expand All @@ -53,7 +53,7 @@ def main() -> None:
args = parser.parse_args()

env = _build_dummy_env()
agent = QLearningAgent(env, trained=(args.model_path, True))
agent = QLearningAgent(env, filename=args.model_path, use_file="r")

logging.basicConfig(level=logging.INFO, format="%(message)s")

Expand Down
20 changes: 11 additions & 9 deletions scripts/play_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import numpy as np

from crane_controller.crane_factory import build_crane
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumEnv
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumConfig, AntiPendulumEnv
from crane_controller.experiment_config import load_training_sidecar
from crane_controller.ppo_agent import EpisodeResult, ProximalPolicyOptimizationAgent

Expand Down Expand Up @@ -191,13 +191,15 @@ def main() -> None:
model_path=args.model_path,
env_kwargs={
"crane": build_crane,
"start_speed": speeds[0],
"randomize_start": args.randomize_start,
"render_mode": args.render_mode,
"reward_fac": config.reward,
"rail_limit": config.training.rail_limit,
"reward_limit": config.training.reward_limit,
"continuous_actions": args.continuous_actions,
"conf": AntiPendulumConfig(
start_speed=speeds[0],
randomize_start=args.randomize_start,
render_mode=args.render_mode,
reward_fac=config.reward,
rail_limit=config.training.rail_limit,
reward_limit=config.training.reward_limit,
continuous_actions=args.continuous_actions,
),
},
max_episode_steps=mep,
)
Expand All @@ -206,7 +208,7 @@ def main() -> None:
all_results: list[EpisodeResult] = []

for speed in speeds:
agent.env.unwrapped.start_speed = speed # type: ignore[attr-defined]
agent.env.unwrapped.conf = dataclasses.replace(agent.env.unwrapped.conf, start_speed=speed) # type: ignore[attr-defined]
for episode in range(args.episodes):
LOGGER.info("Episode %s/%s speed=%+.1f", episode + 1, args.episodes, speed)
png_path: str | None = None
Expand Down
12 changes: 7 additions & 5 deletions scripts/play_q.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import logging

from crane_controller.crane_factory import build_crane
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumEnv
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumConfig, AntiPendulumEnv
from crane_controller.q_agent import QLearningAgent

LOGGER = logging.getLogger(__name__)
Expand All @@ -31,11 +31,13 @@ def main() -> None:

env = AntiPendulumEnv(
build_crane,
start_speed=args.v0,
render_mode=args.render_mode,
discrete=AntiPendulumEnv.DEFAULT_DISCRETE.copy(),
conf=AntiPendulumConfig(
start_speed=args.v0,
render_mode=args.render_mode,
discrete="energy",
),
)
agent = QLearningAgent(env, trained=(args.model_path, True))
agent = QLearningAgent(env, filename=args.model_path, use_file="r")

logging.basicConfig(level=logging.INFO, format="%(message)s")
for episode in range(args.episodes):
Expand Down
37 changes: 20 additions & 17 deletions scripts/train_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pathlib import Path

from crane_controller.crane_factory import build_crane
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumEnv
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumConfig, AntiPendulumEnv
from crane_controller.experiment_config import (
ExperimentConfig,
RewardConfig,
Expand Down Expand Up @@ -185,8 +185,7 @@ def main() -> None: # noqa: PLR0915
n_envs=1,
env_kwargs={
"crane": build_crane,
"start_speed": -1.0,
"render_mode": "reward-tracking",
"conf": AntiPendulumConfig(start_speed=-1.0, render_mode="reward-tracking"),
},
)
agent.do_training(1000, progress_bar=False)
Expand Down Expand Up @@ -229,13 +228,15 @@ def main() -> None: # noqa: PLR0915
model_path=args.resume_from,
env_kwargs={
"crane": build_crane,
"start_speed": args.start_speed,
"randomize_start": args.randomize_start,
"render_mode": args.render_mode,
"reward_fac": resume_config.reward,
"rail_limit": args.rail_limit,
"reward_limit": resume_config.training.reward_limit,
"continuous_actions": args.continuous_actions,
"conf": AntiPendulumConfig(
start_speed=args.start_speed,
randomize_start=args.randomize_start,
render_mode=args.render_mode,
reward_fac=resume_config.reward,
rail_limit=args.rail_limit,
reward_limit=resume_config.training.reward_limit,
continuous_actions=args.continuous_actions,
),
},
save_path=args.save_path,
n_envs=args.n_envs,
Expand All @@ -254,13 +255,15 @@ def main() -> None: # noqa: PLR0915
n_envs=args.n_envs,
env_kwargs={
"crane": build_crane,
"start_speed": args.start_speed,
"randomize_start": args.randomize_start,
"render_mode": args.render_mode,
"reward_fac": experiment_config.reward,
"rail_limit": experiment_config.training.rail_limit,
"reward_limit": experiment_config.training.reward_limit,
"continuous_actions": args.continuous_actions,
"conf": AntiPendulumConfig(
start_speed=args.start_speed,
randomize_start=args.randomize_start,
render_mode=args.render_mode,
reward_fac=experiment_config.reward,
rail_limit=experiment_config.training.rail_limit,
reward_limit=experiment_config.training.reward_limit,
continuous_actions=args.continuous_actions,
),
},
save_path=args.save_path,
gamma=args.gamma,
Expand Down
20 changes: 12 additions & 8 deletions scripts/train_q.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pathlib import Path

from crane_controller.crane_factory import build_crane
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumEnv
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumConfig, AntiPendulumEnv
from crane_controller.q_agent import QLearningAgent

logging.basicConfig(level=logging.INFO, format="%(message)s")
Expand Down Expand Up @@ -55,20 +55,24 @@ def main() -> None:

env = AntiPendulumEnv(
build_crane,
start_speed=args.v0,
render_mode="plot" if args.dry_run else "none",
reward_limit=args.reward_limit,
discrete=AntiPendulumEnv.DEFAULT_DISCRETE.copy(),
conf=AntiPendulumConfig(
start_speed=args.v0,
render_mode="plot" if args.dry_run else "none",
reward_limit=args.reward_limit,
discrete="energy",
),
)

if args.dry_run:
agent = QLearningAgent(env, trained=None)
agent = QLearningAgent(env)
agent.do_episodes(n_episodes=50, max_steps=1000)

else:
Path(args.save_path).parent.mkdir(parents=True, exist_ok=True)
trained = (args.trained, True) if args.trained else (args.save_path, False)
agent = QLearningAgent(env, trained=trained)
if args.trained:
agent = QLearningAgent(env, filename=Path(args.trained), use_file="rw")
else:
agent = QLearningAgent(env, filename=Path(args.save_path), use_file="w")
agent.do_episodes(n_episodes=args.episodes, max_steps=5000)
LOGGER.info(f"Model saved to {args.save_path}")

Expand Down
28 changes: 15 additions & 13 deletions scripts/use_q_ide.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing import Any

from crane_controller.crane_factory import build_crane
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumEnv
from crane_controller.envs.controlled_crane_pendulum import AntiPendulumConfig, AntiPendulumEnv
from crane_controller.envs.simple_test_env import SimpleTestEnv
from crane_controller.experiment_config import RewardConfig
from crane_controller.q_agent import QLearningAgent
Expand Down Expand Up @@ -72,20 +72,22 @@ def do_use(conf: Config | dict[str, Any] | None = None) -> None:
_conf = Config() if conf is None else (Config(**conf) if isinstance(conf, dict) else conf)
env = AntiPendulumEnv(
build_crane,
start_speed=_conf.v0,
randomize_start=_conf.randomize_start,
seed=_conf.seed,
dt=_conf.dt,
render_mode=_conf.render,
discrete=_conf.discretization,
reward_fac=_conf.rc,
reward_limit=_conf.r_limit,
discount=_conf.discount,
conf=AntiPendulumConfig(
start_speed=_conf.v0,
randomize_start=_conf.randomize_start,
seed=_conf.seed,
dt=_conf.dt,
render_mode=_conf.render,
discrete=_conf.discretization,
reward_fac=_conf.rc,
reward_limit=_conf.r_limit,
discount=_conf.discount,
),
)

filename = _conf.file
filename = Path(_conf.file) if _conf.file is not None else None
if filename is not None:
Path(filename).parent.mkdir(parents=True, exist_ok=True)
filename.parent.mkdir(parents=True, exist_ok=True)
agent = QLearningAgent(env, filename=filename, use_file=_conf.use_file, strategy=_conf.strategy)
LOGGER.info(f"DISCRETE: {agent.env.discrete}")
agent.do_episodes(n_episodes=_conf.episodes, max_steps=_conf.steps, show=0)
Expand All @@ -110,7 +112,7 @@ def simple_env(episodes: int, render: str, file: str, use: str, r_limit: float |
dt=1.0,
render_mode=render,
)
agent = QLearningAgent(env, filename=file, use_file=use)
agent = QLearningAgent(env, filename=Path(file), use_file=use)
agent.do_episodes(n_episodes=episodes, max_steps=steps)


Expand Down
1 change: 1 addition & 0 deletions src/crane_controller/envs/controlled_crane_pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def __init__(self, crane: Callable[..., Crane], conf: AntiPendulumConfig | None
"""
self.crane_maker = crane
self.conf = AntiPendulumConfig() if conf is None else conf
self.render_mode: str | None = self.conf.render_mode # gymnasium convention: expose as direct attribute
self.crane: Crane = crane()
self.wire: Wire = self.crane.boom_by_name("wire") # type: ignore[assignment] # Wire is a sub-class of Boom
assert isinstance(self.wire, Wire), "Need a crane wire!"
Expand Down
3 changes: 2 additions & 1 deletion src/crane_controller/experiment_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ class RewardConfig:
Uses pure angular velocity ``(cm_v[0] - origin_v[0]) / wire.length``,
excluding crane translation.
crane_velocity : float
Weight for the squared crane velocity penalty ``-x_dot^2`` (default 0.0).
Weight for the squared crane velocity term ``+x_dot^2`` (default 0.0).
Positive values reward crane velocity; use a negative value to penalise it.
crane_acceleration : float
Weight for the squared crane acceleration penalty ``-x_ddot^2`` (default 0.0).
Equals the control action squared (acc = action * self.acc).
Expand Down
4 changes: 2 additions & 2 deletions src/crane_controller/ppo_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,8 @@ def do_one_episode(
self.env.unwrapped.render(save_path=save_png) # type: ignore[attr-defined, call-arg]
env_u = self.env.unwrapped
energy_final = 0.5 * float(env_u.wire.cm_v[0]) ** 2 # type: ignore[attr-defined]
if env_u.continuous_actions: # type: ignore[attr-defined]
acc_final = float(np.asarray(last_action).flat[0]) * float(env_u.acc) # type: ignore[attr-defined]
if env_u.conf.continuous_actions: # type: ignore[attr-defined]
acc_final = float(np.asarray(last_action).flat[0]) * float(env_u.conf.acc) # type: ignore[attr-defined]
else:
acc_final = float(env_u.action_to_acc[int(last_action)]) # type: ignore[attr-defined]
return EpisodeResult(
Expand Down
3 changes: 2 additions & 1 deletion src/crane_controller/q_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ def do_episodes(self, n_episodes: int = 1000, max_steps: int = 5000, show: int =
num_truncated += int(trunc)
if _episode >= n_episodes - 100:
log_r0 = np.log(-self.env.rewards[0]) # type: ignore[attr-defined] ## extended class
_t = [-i * self.env.conf.dt / (np.log(-r) - log_r0) for i, r in enumerate(self.env.rewards[1:])] # type: ignore[attr-defined] ## extended class
_env_dt = getattr(getattr(self.env, "conf", self.env), "dt", 1.0)
_t = [-i * _env_dt / (np.log(-r) - log_r0) for i, r in enumerate(self.env.rewards[1:])] # type: ignore[attr-defined] ## extended class
tau.append(np.average(_t))
rewards[0].extend(list(range(len(self.env.rewards)))) # type: ignore[attr-defined] ## extended class
rewards[1].extend([np.log(-x) - log_r0 for x in self.env.rewards]) # type: ignore[attr-defined] ## extended class
Expand Down
8 changes: 6 additions & 2 deletions tests/test_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
logger = logging.getLogger(__name__)


@pytest.mark.skip(reason="Test needs to be updated")
@pytest.mark.skip(
reason="AlgorithmAgent uses obs[1]/obs[2] for pos/speed; 'energy' discretization moved them to obs[2]/obs[3]"
)
def test_algorithm_strategies(
crane: Callable[..., Crane],
*,
Expand All @@ -29,7 +31,9 @@ def test_algorithm_strategies(
agent.do_strategies(max_steps=5000 if show else 10)


@pytest.mark.skip(reason="Test needs to be updated.")
@pytest.mark.skip(
reason="AlgorithmAgent uses obs[1]/obs[2] for pos/speed; 'energy' discretization moved them to obs[2]/obs[3]"
)
def test_algorithm(crane: Callable[..., Crane], *, show: bool) -> None:
env = AntiPendulumEnv(
crane,
Expand Down
18 changes: 0 additions & 18 deletions tests/test_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,24 +256,6 @@ def test_step_accepts_correct_action(crane: Callable[..., Crane], continuous_act
assert obs.shape == (4,)


@pytest.mark.skip(reason="The t_min reward term is not in use any more")
def test_t_min_crane_reward_term(crane: Callable[..., Crane]) -> None:
"""t_min_crane weight adds -t_min to the reward; zero at origin at rest."""
rc = RewardConfig(energy=0.0, positional=0.0, position=0.0, acceleration=0.0, t_min_crane=1.0)
env = AntiPendulumEnv(crane, conf=AntiPendulumConfig(start_speed=1.0, reward_fac=rc, continuous_actions=False))
_ = env.reset()
# Displace crane so t_min > 0
env.crane.position[0] = 1.0
env.crane.velocity[0] = 0.0
_, reward, _, _, _ = env.step(1) # coast — minimal physics change
assert reward < 0.0, f"Expected negative reward from t_min penalty, got {reward}"
# At origin at rest t_min = 0 → contribution is exactly 0
env.crane.position[0] = 0.0
env.crane.velocity[0] = 0.0
t_min = env._t_min_crane() # type: ignore[reportPrivateUsage]
assert t_min == 0.0, f"Expected t_min=0 at origin at rest, got {t_min}"


if __name__ == "__main__":
import os
from pathlib import Path
Expand Down
Loading