from negotiation_agent.environment import NegotiationEnv
from negotiation_agent.agent import QLearningAgent
import config


def evaluate():
    env = NegotiationEnv(
        scenario=config.SCENARIO,
        target_price=config.TARGET_PRICE,
        threshold_price=config.THRESHOLD_PRICE,
    )

    # 에이전트를 생성하되, 학습된 Q-Table을 불러옵니다.
    agent = QLearningAgent(
        state_dims=env.observation_space.nvec,
        action_size=env.action_space.n,
        learning_rate=0,  # 평가 시에는 학습하지 않음
        gamma=0,
        epsilon=0,  # 평가 시에는 탐험하지 않고 최선의 행동만 선택
    )
    agent.load_q_table(config.Q_TABLE_SAVE_PATH)

    print("--- 학습된 에이전트 평가 시작 ---")
    state, info = env.reset()
    terminated = False
    total_reward = 0

    while not terminated:
        action = agent.get_action(state)
        state, reward, terminated, truncated, info = env.step(action)
        total_reward += reward
        print(f"상태: {state}, 선택한 행동: {action}, 보상: {reward:.4f}")

    print("\n✅ 평가 종료!")
    print(f"최종 협상 가격: {env.current_price:.2f} (목표가: {env.target_price})")
    print(f"총 보상: {total_reward:.4f}")
    env.close()


if __name__ == "__main__":
    evaluate()