-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathevaluate_performance.py
More file actions
147 lines (122 loc) · 5.22 KB
/
evaluate_performance.py
File metadata and controls
147 lines (122 loc) · 5.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import gymnasium
import torch
import torch.nn as nn
import numpy as np
import sys
import os
import argparse
from tqdm import tqdm
# --- Neural Network Definition (Must match training) ---
class DQN(nn.Module):
def __init__(self, state_dim, action_dim):
super(DQN, self).__init__()
self.fc1 = nn.Linear(state_dim, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, action_dim)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
return self.fc3(x)
def rule_based_policy(state):
# Simple logic: Push left if leaning left, push right if leaning right
pole_angle = state[2]
return 0 if pole_angle < 0 else 1
def evaluate(model_path="student_model.pth", num_episodes=50, render=False, use_baseline=False):
if not use_baseline and not os.path.exists(model_path):
print(f"Error: Model file '{model_path}' not found. Please train your model first.")
return
render_mode = "human" if render else None
env = gymnasium.make('CartPole-v1', render_mode=render_mode)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = None
if not use_baseline:
model = DQN(state_dim, action_dim)
try:
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()
except Exception as e:
print(f"Error loading model structure: {e}")
return
print(f"Evaluating Model: {model_path}")
else:
print(f"Evaluating Baseline: Rule-Based Agent")
print(f"Episodes: {num_episodes}")
print("-" * 30)
stats = {
"rewards": [],
"steps": [],
"avg_abs_position": [], # Metric for Centering
"avg_abs_angle_vel": [] # Metric for Smoothness
}
for i in tqdm(range(num_episodes), desc="Evaluating Episodes"):
state, _ = env.reset()
done = False
episode_reward = 0
steps = 0
# Episode metrics
positions = []
angle_vels = []
while not done:
if use_baseline:
action = rule_based_policy(state)
else:
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
with torch.no_grad():
q_values = model(state_tensor)
action = torch.argmax(q_values).item()
next_state, reward, terminated, truncated, _ = env.step(action)
done = terminated or truncated
# Record state data
positions.append(next_state[0])
angle_vels.append(next_state[3])
state = next_state
episode_reward += reward
steps += 1
# Store episode stats
stats["rewards"].append(episode_reward)
stats["steps"].append(steps)
stats["avg_abs_position"].append(np.mean(np.abs(positions)) if positions else 0)
stats["avg_abs_angle_vel"].append(np.mean(np.abs(angle_vels)) if angle_vels else 0)
env.close()
# --- Calculate Final Scores ---
avg_reward = np.mean(stats["rewards"])
success_rate = sum(r >= 475 for r in stats["rewards"]) / num_episodes * 100
avg_centering_error = np.mean(stats["avg_abs_position"])
avg_smoothness_error = np.mean(stats["avg_abs_angle_vel"])
# Scoring Logic
# 1. Survival Score (0-40 pts): It's easy to survive, so we give less points here.
score_survival = min(40, avg_reward / 500 * 40)
# 2. Centering Score (0-30 pts):
# Target: average position deviation < 0.2 (It was 0.5 before)
# If avg_centering_error > 0.2, score is 0.
score_centering = 30 * max(0, 1 - (avg_centering_error / 0.2))
# 3. Smoothness Score (0-30 pts):
# Target: average angular velocity < 0.2 rad/s (It was 1.0 before)
# If avg_smoothness_error > 0.2, score is 0.
score_smoothness = 30 * max(0, 1 - (avg_smoothness_error / 0.2))
total_score = score_survival + score_centering + score_smoothness
print("\n" + "="*40)
print(" PERFORMANCE REPORT ")
print("="*40)
print(f"Metrics (Average over {num_episodes} episodes):")
print(f" - Reward (Survival): {avg_reward:.2f} / 500.0")
print(f" - Success Rate: {success_rate:.1f}%")
print(f" - Centering Error: {avg_centering_error:.4f} (Target < 0.2)")
print(f" - Smoothness Error: {avg_smoothness_error:.4f} (Target < 0.2)")
print("-" * 40)
print("Scoring Breakdown:")
print(f" [+] Survival Score: {score_survival:.1f} / 40")
print(f" [+] Centering Score: {score_centering:.1f} / 30")
print(f" [+] Smoothness Score: {score_smoothness:.1f} / 30")
print("="*40)
print(f" FINAL SCORE: {total_score:.1f} / 100")
print("="*40)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--baseline", action="store_true", help="Evaluate the rule-based baseline instead of the trained model")
parser.add_argument("--render", action="store_true", help="Render the environment")
args = parser.parse_args()
evaluate(render=args.render, use_baseline=args.baseline)