from negotiation_agent.environment import NegotiationEnv from agents.offline_agent import QLearningAgent import yaml import numpy as np def main(): # 환경 설정 로드 with open('configs/offline_env_config.yaml', 'r') as f: config = yaml.safe_load(f) # 환경 초기화 env = NegotiationEnv( scenario=config['env']['scenario'], target_price=config['env']['target_price'], threshold_price=config['env']['threshold_price'] ) # 에이전트 초기화 및 Q-table 로드 state_dims = env.observation_space.nvec state_size = np.prod(state_dims) # 전체 상태 공간 크기 action_size = env.action_space.n agent = QLearningAgent(config['agent'], state_size, action_size) agent.load_q_table('saved_models/q_table.npy') print(f"State space size: {state_size}") print(f"Action space size: {action_size}") print(f"Q-table shape: {agent.q_table.shape}") # 평가 실행 num_episodes = 10 total_rewards = [] for episode in range(num_episodes): state, _ = env.reset() episode_reward = 0 done = False while not done: # 상태를 인덱스로 변환 state_idx = np.ravel_multi_index(tuple(state), env.observation_space.nvec) # 최적의 행동 선택 action = np.argmax(agent.q_table[state_idx]) # 환경에서 한 스텝 진행 next_state, reward, done, _, _ = env.step(action) episode_reward += reward state = next_state # 현재 상태 출력 print(f"Episode {episode + 1}") print(f"State: {env.spaces.get_state_description(state)}") print(f"Action: {env.spaces.get_action_description(action)}") print(f"Reward: {reward:.2f}") print(f"Current Price: {env.current_price:.2f}") print("--------------------") total_rewards.append(episode_reward) print(f"Episode {episode + 1} finished with total reward: {episode_reward:.2f}") print("========================================") print(f"Average reward over {num_episodes} episodes: {np.mean(total_rewards):.2f}") if __name__ == "__main__": main()