from agents.offline_agent import QLearningAgent from negotiation_agent.environment import NegotiationEnv class EvaluateAgentUseCase: def execute(self, agent: QLearningAgent, env: NegotiationEnv, num_episodes: int): total_rewards = 0 for _ in range(num_episodes): obs, _ = env.reset() episode_reward = 0 terminated = False while not terminated: action = agent.get_action(obs) obs, reward, terminated, _, _ = env.step(action) episode_reward += reward total_rewards += episode_reward return total_rewards / num_episodes