import numpy as np from typing import Type from negotiation_agent.environment import NegotiationEnv from negotiation_agent.agent import QLearningAgent import config class TrainAgentUseCase: """'에이전트 훈련'이라는 비즈니스 로직을 담당하는 클래스""" def __init__(self, env: NegotiationEnv, agent: QLearningAgent): self.env = env self.agent = agent def execute(self): """유스케이스를 실행합니다.""" print("--- [UseCase] 학습 시작 ---") for episode in range(config.TOTAL_EPISODES): state, info = self.env.reset() terminated = False while not terminated: action = self.agent.get_action(state) next_state, reward, terminated, truncated, info = self.env.step(action) self.agent.learn(state, action, reward, next_state) state = next_state # Epsilon 값 업데이트 로직 self.agent.epsilon = config.EPSILON_END + ( config.EPSILON_START - config.EPSILON_END ) * np.exp(-config.EPSILON_DECAY_RATE * episode) if (episode + 1) % 1000 == 0: print( f"Episode {episode + 1}/{config.TOTAL_EPISODES} | Epsilon: {self.agent.epsilon:.4f}" ) print("\n✅ [UseCase] 학습 완료!") self.agent.save_q_table(config.Q_TABLE_SAVE_PATH) self.env.close()