KT_Q_Table/usecases/evaluate_agent_usecase.py

17 lines
654 B
Python

from agents.offline_agent import QLearningAgent
from negotiation_agent.environment import NegotiationEnv
class EvaluateAgentUseCase:
def execute(self, agent: QLearningAgent, env: NegotiationEnv, num_episodes: int):
total_rewards = 0
for _ in range(num_episodes):
obs, _ = env.reset()
episode_reward = 0
terminated = False
while not terminated:
action = agent.get_action(obs)
obs, reward, terminated, _, _ = env.step(action)
episode_reward += reward
total_rewards += episode_reward
return total_rewards / num_episodes