KT_Q_Table/negotiation_agent/environment.py

70 lines
2.7 KiB
Python

import gymnasium as gym
from gymnasium import spaces
import numpy as np
class NegotiationEnv(gym.Env):
"""'Q-Table 설계 보고서' 기반의 협상 환경 Custom Environment"""
def __init__(self, scenario=0, target_price=100, threshold_price=120):
super(NegotiationEnv, self).__init__()
self.observation_space = spaces.MultiDiscrete([4, 3, 3])
self.action_space = spaces.Discrete(9)
self.initial_scenario = scenario
self.target_price = target_price
self.threshold_price = threshold_price
self.current_price = None
self.initial_price = None
self.state = None
def _get_state(self):
"""현재 정보를 바탕으로 State 배열을 계산"""
if self.current_price <= self.target_price:
price_zone = 0
elif self.target_price < self.current_price <= self.threshold_price:
price_zone = 1
else:
price_zone = 2
acceptance_rate_val = (
self.initial_price - self.current_price
) / self.initial_price
if acceptance_rate_val < 0.1:
acceptance_rate_level = 0
elif 0.1 <= acceptance_rate_val < 0.25:
acceptance_rate_level = 1
else:
acceptance_rate_level = 2
return np.array([self.initial_scenario, price_zone, acceptance_rate_level])
def reset(self, seed=None, options=None):
"""환경을 초기 상태로 리셋"""
super().reset(seed=seed)
self.initial_price = self.threshold_price * 1.2
self.current_price = self.initial_price
self.state = self._get_state()
return self.state, {}
def step(self, action):
"""행동을 수행하고 다음 상태, 보상 등을 반환"""
# 아주 간단한 판매자 반응 시뮬레이션: 행동과 무관하게 가격이 조금씩 내려감
price_drop = np.random.uniform(0.02, 0.08) * self.current_price
self.current_price -= price_drop
terminated = self.current_price < self.target_price * 0.95
reward = self._calculate_reward(self._get_state(), terminated)
self.state = self._get_state()
return self.state, reward, terminated, False, {}
def _calculate_reward(self, state, terminated):
"""보고서 기반 보상 함수 (간략화된 버전)"""
scenario_idx, price_zone_idx, _ = state
s_n_map = {0: 1.0, 1: 0.75, 2: 0.5, 3: 0.25}
pz_n_map = {0: 0.1, 1: 0.5, 2: 1.0}
W = (s_n_map[scenario_idx] + pz_n_map[price_zone_idx]) / 2
reward = W * (self.target_price / max(self.current_price, 1)) + (1 - W) * (
1 if terminated else 0
)
return reward