diff --git a/agents/__pycache__/offline_agent.cpython-312.pyc b/agents/__pycache__/offline_agent.cpython-312.pyc index b4aac63..7365f1f 100644 Binary files a/agents/__pycache__/offline_agent.cpython-312.pyc and b/agents/__pycache__/offline_agent.cpython-312.pyc differ diff --git a/agents/__pycache__/offline_agent.cpython-39.pyc b/agents/__pycache__/offline_agent.cpython-39.pyc index e453d77..fd97048 100644 Binary files a/agents/__pycache__/offline_agent.cpython-39.pyc and b/agents/__pycache__/offline_agent.cpython-39.pyc differ diff --git a/agents/__pycache__/policy.cpython-312.pyc b/agents/__pycache__/policy.cpython-312.pyc new file mode 100644 index 0000000..037abe4 Binary files /dev/null and b/agents/__pycache__/policy.cpython-312.pyc differ diff --git a/agents/offline_agent.py b/agents/offline_agent.py index 3b91715..aaf0c09 100644 --- a/agents/offline_agent.py +++ b/agents/offline_agent.py @@ -1,31 +1,45 @@ import numpy as np -import random import os +from .policy import EpisodePolicy + class QLearningAgent: def __init__(self, agent_params, state_size, action_size): self.state_size = state_size self.action_size = action_size - self.lr = agent_params['learning_rate'] - self.gamma = agent_params['discount_factor'] - self.epsilon = agent_params.get('epsilon', 0.1) # Add epsilon for exploration/evaluation + self.lr = agent_params["learning_rate"] + self.gamma = agent_params["discount_factor"] + # Initialize policy + self.episode_policy = EpisodePolicy(epsilon=agent_params.get("epsilon", 0.1)) self.q_table = np.zeros((state_size, action_size)) - def get_action(self, state): - if random.uniform(0, 1) < self.epsilon: - return random.randint(0, self.action_size - 1) - else: - return np.argmax(self.q_table[state, :]) + def get_action(self, state, action_mask=None): + q_values = self.q_table[state, :] + if action_mask is None: + action_mask = self.episode_policy.get_action_mask() + action = self.episode_policy.select_action(q_values, action_mask) + + if action is None: + # All actions have been taken in this episode + return None + + return action def learn(self, batch): for state, action, reward, next_state, terminated in zip( - batch['observations'], batch['actions'], batch['rewards'], batch['next_observations'], batch['terminals'] + batch["observations"], + batch["actions"], + batch["rewards"], + batch["next_observations"], + batch["terminals"], ): old_value = self.q_table[state, action] next_max = np.max(self.q_table[next_state, :]) - new_value = old_value + self.lr * (reward + self.gamma * next_max * (1 - terminated) - old_value) + new_value = old_value + self.lr * ( + reward + self.gamma * next_max * (1 - terminated) - old_value + ) self.q_table[state, action] = new_value def save_model(self, path): @@ -38,3 +52,7 @@ class QLearningAgent: print(f"Q-Table loaded from {file_path}") else: print(f"Error: No Q-Table found at {file_path}") + + def reset_episode(self): + """Reset agent for new episode""" + self.policy.reset_episode() diff --git a/agents/policy.py b/agents/policy.py new file mode 100644 index 0000000..597a818 --- /dev/null +++ b/agents/policy.py @@ -0,0 +1,75 @@ +from abc import ABC, abstractmethod +import numpy as np +import random + + +class Policy(ABC): + @abstractmethod + def select_action(self, q_values, action_mask=None): + pass + + +class EpisodePolicy(Policy): + def __init__(self, epsilon=0.1): + self.epsilon = epsilon + self.episode_actions = set() # Track actions taken in current episode + self.current_idx = 0 # For sequential action selection + + def get_action_mask(self): + # Create a mask with all actions available + action_mask = np.ones(9) # Assuming 9 actions + + # Mask already taken actions + for action in self.episode_actions: + action_mask[action] = 0 + + return action_mask + + def select_action(self, q_values, action_mask=None): + # Create default mask if none provided + if action_mask is None: + action_mask = self.get_action_mask() + + # Apply action mask + masked_q_values = q_values * action_mask + + # Check for available actions + valid_actions = np.where(action_mask)[0] + if len(valid_actions) == 0: + self.reset_episode() + return None + + # Get Q-values for valid actions + masked_q_values = q_values * action_mask + max_q = np.max(masked_q_values) + + # When all Q-values are effectively zero (very small), select actions sequentially + if np.allclose(masked_q_values[action_mask > 0], 0, atol=1e-10): + # Find the first available action in sequence + while self.current_idx in self.episode_actions and self.current_idx < len( + q_values + ): + self.current_idx += 1 + + if self.current_idx >= len(q_values): + self.reset_episode() + return None + + action = self.current_idx + self.episode_actions.add(action) + return action + + # Epsilon-greedy with masking for non-zero Q-values + if random.uniform(0, 1) < self.epsilon: + action = np.random.choice(valid_actions) + else: + max_actions = np.where(masked_q_values == max_q)[0] + action = np.random.choice(max_actions) + + self.episode_actions.add(action) + return action + + def reset_episode(self): + """Reset for new episode""" + self.episode_actions.clear() + self.current_idx = 0 # Reset sequential index diff --git a/interactive_negotiation.py b/interactive_negotiation.py new file mode 100644 index 0000000..b41749b --- /dev/null +++ b/interactive_negotiation.py @@ -0,0 +1,145 @@ +import random +import numpy as np +from negotiation_agent.environment import NegotiationEnv +from negotiation_agent.spaces import State, PriceZone, AcceptanceRate, Scenario +from agents.offline_agent import QLearningAgent +from usecases.initialize_env_usecase import initialize_environment_usecase + + +def convert_action_to_response(action_idx, proposed_price): + """에이전트의 행동을 상황에 맞는 응답 텍스트로 변환""" + action_responses = { + 0: [ + "강한 수락 (Action 0: STRONG_ACCEPT): 제안을 매우 흡족하게 수락하겠습니다." + ], + 1: ["중간 수락 (Action 1: MEDIUM_ACCEPT): 제안을 수락하겠습니다."], + 2: ["약한 수락 (Action 2: WEAK_ACCEPT): 고민 끝에 제안을 수락하겠습니다."], + 3: [ + f"강한 거절 (Action 3: STRONG_REJECT): {proposed_price}은(는) 너무 높은 가격입니다. 대폭 낮춰주셔야 합니다." + ], + 4: [ + f"중간 거절 (Action 4: MEDIUM_REJECT): {proposed_price}은(는) 높습니다. 더 낮은 가격을 제안해주세요." + ], + 5: [ + f"약한 거절 (Action 5: WEAK_REJECT): {proposed_price}은(는) 조금 높습니다. 더 조정이 필요합니다." + ], + 6: ["강한 가격 제안 (Action 6: STRONG_PROPOSE): 대폭 낮은 가격을 제안합니다."], + 7: ["중간 가격 제안 (Action 7: MEDIUM_PROPOSE): 조정된 가격을 제안합니다."], + 8: ["약한 가격 제안 (Action 8: WEAK_PROPOSE): 소폭 조정된 가격을 제안합니다."], + } + + response = random.choice( + action_responses.get( + action_idx, [f"Action {action_idx}: 가격 조정이 필요합니다."] + ) + ) + return f"{proposed_price}에 대한 응답 - {response}" + + +def run_interactive_negotiation(): + """대화형 협상 시뮬레이션 실행""" + # 환경 및 에이전트 초기화 + env = initialize_environment_usecase() + agent_params = { + "learning_rate": 0.001, + "discount_factor": 0.99, + "epsilon": 0.0, # 평가 모드에서는 탐험하지 않음 + } + # MultiDiscrete 공간의 크기 계산 + state_size = np.prod(env.observation_space.nvec) # 상태 공간의 전체 크기 + action_size = ( + env.action_space.n + if hasattr(env.action_space, "n") + else np.prod(env.action_space.nvec) + ) # 행동 공간의 전체 크기 + + agent = QLearningAgent(agent_params, state_size, action_size) + + # Q-table 로드 + agent.load_q_table("saved_models/q_table.npy") + + while True: + # 새로운 에피소드 시작 + state = env.reset() + target_price = env.target_price + threshold_price = env.threshold_price + episode_done = False + + print("\n=== 새로운 협상 시작 ===") + print(f"목표 가격: {target_price}") + print(f"임계 가격: {threshold_price}") + print("\n협상을 시작합니다. 가격을 제안해주세요.") + + while not episode_done: + # 사용자 입력 받기 + try: + user_price = float(input("\n당신의 제안 가격을 입력하세요: ")) + + # 목표가격 이하로 제안이 들어오면 즉시 수락 및 종료 + if user_price <= target_price: + print("\n=== 협상 성공! ===") + print( + f"제안된 가격 ({user_price})이 목표가격 ({target_price}) 이하입니다." + ) + print("에이전트: 즉시 수락 (특별 행동: 즉시 수락)") + print("\n시뮬레이션을 종료합니다.") + return # 전체 시뮬레이션 종료 + + except ValueError: + print("올바른 가격을 입력해주세요") + continue + + # 현재 가격 업데이트 및 상태 계산 + env.current_price = user_price + next_state = env._get_state() + + # 상태 인덱스 계산 + try: + state_idx = np.ravel_multi_index(next_state, env.observation_space.nvec) + except ValueError as e: + print(f"\n디버그 정보:") + print(f"현재 상태 벡터: {next_state}") + print(f"상태 공간 크기: {env.observation_space.nvec}") + print(f"에러: {e}") + state_idx = 0 + + # 현재 상태의 Q값들과 액션 마스크 출력 + print(f"\n디버그 정보:") + print(f"현재 상태 벡터: {next_state}") + print(f"계산된 상태 인덱스: {state_idx}") + q_values = agent.q_table[state_idx] + + # 액션 마스크 가져오기 + action_mask = agent.episode_policy.get_action_mask() + + print("\n현재 상태의 Q값들과 선택 가능 여부:") + print("(O: 선택 가능, X: 이미 사용됨)") + for action_idx, (q_value, mask) in enumerate(zip(q_values, action_mask)): + available = "O" if mask == 1 else "X" + print(f"Action {action_idx}: {q_value:.4f} [{available}]") + + # 에이전트의 응답 생성 (epsilon=0이므로 항상 최대 Q값의 행동 선택) + agent_action = agent.get_action(state_idx) + masked_q_values = q_values * action_mask + max_q = np.max(masked_q_values) + print(f"\n선택된 행동: {agent_action} (Q값: {q_values[agent_action]:.4f})") + if np.allclose(masked_q_values[action_mask > 0], 0, atol=1e-10): + print("(순차적 선택: 모든 유효한 Q값이 0에 가까움)") + + # 에이전트의 응답 출력 + response = convert_action_to_response(agent_action, user_price) + print(f"\n에이전트의 응답: {response}") + + state = next_state + + # 다시 시작 여부 확인 + if ( + not input("\n새로운 협상을 시작하시겠습니까? (y/n): ") + .lower() + .startswith("y") + ): + break + + +if __name__ == "__main__": + run_interactive_negotiation() diff --git a/negotiation_agent/__pycache__/__init__.cpython-312.pyc b/negotiation_agent/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..7577c78 Binary files /dev/null and b/negotiation_agent/__pycache__/__init__.cpython-312.pyc differ diff --git a/negotiation_agent/__pycache__/action_space.cpython-312.pyc b/negotiation_agent/__pycache__/action_space.cpython-312.pyc new file mode 100644 index 0000000..f4647ce Binary files /dev/null and b/negotiation_agent/__pycache__/action_space.cpython-312.pyc differ diff --git a/negotiation_agent/__pycache__/environment.cpython-312.pyc b/negotiation_agent/__pycache__/environment.cpython-312.pyc new file mode 100644 index 0000000..d11a757 Binary files /dev/null and b/negotiation_agent/__pycache__/environment.cpython-312.pyc differ diff --git a/negotiation_agent/__pycache__/spaces.cpython-312.pyc b/negotiation_agent/__pycache__/spaces.cpython-312.pyc new file mode 100644 index 0000000..51cf000 Binary files /dev/null and b/negotiation_agent/__pycache__/spaces.cpython-312.pyc differ diff --git a/poetry.lock b/poetry.lock index cf91e6a..54db6b1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -12,6 +12,19 @@ files = [ {file = "cloudpickle-3.1.1.tar.gz", hash = "sha256:b216fa8ae4019d5482a8ac3c95d8f6346115d8835911fd4aefd1a445e4242c64"}, ] +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["dev"] +markers = "sys_platform == \"win32\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + [[package]] name = "farama-notifications" version = "0.0.4" @@ -146,6 +159,18 @@ files = [ [package.dependencies] numpy = ">=1.19.3" +[[package]] +name = "iniconfig" +version = "2.1.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -566,6 +591,71 @@ files = [ {file = "nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e"}, ] +[[package]] +name = "packaging" +version = "25.0" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["coverage", "pytest", "pytest-benchmark"] + +[[package]] +name = "pygments" +version = "2.19.2" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, + {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + +[[package]] +name = "pytest" +version = "8.4.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, + {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, +] + +[package.dependencies] +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +iniconfig = ">=1" +packaging = ">=20" +pluggy = ">=1.5,<2" +pygments = ">=2.7.2" + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] + [[package]] name = "pyyaml" version = "6.0.2" @@ -772,4 +862,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.14" -content-hash = "46bdc65ebbf8732cbae2738e1da7875aea5a378314abe29e32cefcfe6474126a" +content-hash = "d077803cae14e91eee21b3756e24c72c5c514267d79a6aa130c4f69b90a794a8" diff --git a/pyproject.toml b/pyproject.toml index ff11596..39cf374 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,23 +1,21 @@ -[project] +[tool.poetry] name = "q-table" version = "0.1.0" description = "" -authors = [ - {name = "fbdeme",email = "90471819+fbdeme@users.noreply.github.com"} -] +authors = ["fbdeme <90471819+fbdeme@users.noreply.github.com>"] readme = "README.md" -requires-python = ">=3.11,<3.14" -dependencies = [ - "gymnasium (>=1.2.0,<2.0.0)", - "numpy (>=2.3.3,<3.0.0)", - "h5py (>=3.14.0,<4.0.0)", - "pyyaml (>=6.0.2,<7.0.0)", - "torch (>=2.8.0,<3.0.0)" -] +packages = [{include = "."}] +[tool.poetry.dependencies] +python = ">=3.11,<3.14" +gymnasium = ">=1.2.0,<2.0.0" +numpy = ">=2.3.3,<3.0.0" +h5py = ">=3.14.0,<4.0.0" +pyyaml = ">=6.0.2,<7.0.0" +torch = ">=2.8.0,<3.0.0" -[tool.poetry] -package-mode = false +[tool.poetry.group.dev.dependencies] +pytest = "^8.4.2" [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] diff --git a/saved_models/q_table.json b/saved_models/q_table.json index 54e01e3..c5fbebf 100644 --- a/saved_models/q_table.json +++ b/saved_models/q_table.json @@ -1,2026 +1,112 @@ { - "metadata": { - "state_size": 36, - "action_size": 9, - "timestamp": "2025-09-22T16:20:10.539545", - "training_episodes": 10 - }, - "q_values": [ - { - "state_idx": 0, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격 이하, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.08795120152833774 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.07980060621013178 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.06578247676712926 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.05640440663220407 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.08343614486312115 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.07708021145254244 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.06649834752893999 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0483122563738206 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.07784918257350587 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.08795120152833774 - } - }, - { - "state_idx": 1, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격 이하, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.04181960568421511 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.01650653844752281 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.02224513478479088 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.04105294727331152 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.05931104531941931 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.04004415229683608 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.02881340637109566 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.033459877776815736 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.05507207930384824 - } - ], - "optimal_action": { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.05931104531941931 - } - }, - { - "state_idx": 2, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격 이하, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.07486255107754664 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.07980060621013178 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.05926445786066637 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.04275562307545951 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.04916661909531326 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.07053122078911914 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.04567119680331397 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.04118128186893584 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0647616644539918 - } - ], - "optimal_action": { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.07980060621013178 - } - }, - { - "state_idx": 3, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 4, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 5, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 6, - "state_desc": "State(scenario=높은 구매 의지, price_zone=임계가격 초과, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 7, - "state_desc": "State(scenario=높은 구매 의지, price_zone=임계가격 초과, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 8, - "state_desc": "State(scenario=높은 구매 의지, price_zone=임계가격 초과, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 9, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격 이하, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 10, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격 이하, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 11, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격 이하, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 12, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 13, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 14, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 15, - "state_desc": "State(scenario=중간 구매 의지, price_zone=임계가격 초과, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 16, - "state_desc": "State(scenario=중간 구매 의지, price_zone=임계가격 초과, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 17, - "state_desc": "State(scenario=중간 구매 의지, price_zone=임계가격 초과, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 18, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 19, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 20, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 21, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 22, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 23, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 24, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 25, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 26, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 27, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 28, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 29, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 30, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 31, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 32, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 33, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 34, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 35, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - } - ] + "0": [ + 0.07860207418303787, + 0.09669185947649497, + 0.00545838101358499, + 0.0634685349345901, + 0.04896882345490129, + 0.011071837685599328, + 0.02373142720866326, + 0.040710050390840025, + 0.057908788619546694 + ], + "1": [ + 0.03131144037823771, + 0.026904571593437822, + 0.0746202421515731, + 0.07668658840526418, + 0.0566929766904952, + 0.0928366837674772, + 0.09992295981895805, + 0.06804924626417622, + 0.03465275246415239 + ], + "2": [ + 0.0337775546511296, + 0.019233162710519693, + 0.09337842138095943, + 0.0418180529168455, + 0.0455664603958991, + 0.034035827299915423, + 0.05772592768996248, + 0.052621723571909175, + 0.04969945041515063 + ], + "3": [ + 0.014178139983042926, + 0.004869457147381262, + 0.013296329386477658, + 0.07159586757640321, + 0.04650155306866092, + 0.03908809426776613, + 0.09829371848543218, + 0.09551734097779814, + 0.09933597843126746 + ], + "4": [ + 0.01314656381661038, + 0.03772331883055289, + 0.010219538647158645, + 0.08742142563213022, + 0.01887893282227795, + 0.08690083610071421, + 0.00044717877475308754, + 0.06368163817427484, + 0.033458612940268084 + ], + "5": [ + 0.07889862014704474, + 0.057224439403031326, + 0.0939164792832018, + 0.041856396706002386, + 0.0048492850418650705, + 0.06579737247159666, + 0.08856882483528883, + 0.002164381756437062, + 0.06677124541980636 + ], + "6": [ + 0.07821060715082709, + 0.08834973107011755, + 0.020383487470968076, + 0.0949968242101048, + 0.09724403969134705, + 0.042860882296615936, + 0.08358102011182952, + 0.06585468804852727, + 0.045960969954383715 + ], + "7": [ + 0.037836904050297854, + 0.042865046541691376, + 0.04152604015726677, + 0.08352854281531587, + 0.09382152590451939, + 0.016753650711956194, + 0.020857806219263586, + 0.05572523796471475, + 0.013474646369689692 + ], + "8": [ + 0.023989831261016906, + 0.057352182089764586, + 0.03290126013901756, + 0.06726275608456586, + 0.005863722248365922, + 0.06197326609192151, + 0.00675663049407681, + 0.035809720335395055, + 0.06880322134415771 + ], + "9": [ + 0.0039004471963525636, + 0.07827699924944835, + 0.023437234009404506, + 0.07636098855864303, + 0.03187103806012823, + 0.0010322715404335826, + 0.0038868438826868304, + 0.0010063667863797088, + 0.015321259421990231 + ] } \ No newline at end of file diff --git a/saved_models/q_table.npy b/saved_models/q_table.npy index e13b6c1..e6eea98 100644 Binary files a/saved_models/q_table.npy and b/saved_models/q_table.npy differ diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__pycache__/__init__.cpython-312.pyc b/tests/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..f2776c0 Binary files /dev/null and b/tests/__pycache__/__init__.cpython-312.pyc differ diff --git a/tests/__pycache__/test_episode_policy.cpython-312-pytest-8.4.2.pyc b/tests/__pycache__/test_episode_policy.cpython-312-pytest-8.4.2.pyc new file mode 100644 index 0000000..33d0411 Binary files /dev/null and b/tests/__pycache__/test_episode_policy.cpython-312-pytest-8.4.2.pyc differ diff --git a/tests/test_episode_policy.py b/tests/test_episode_policy.py new file mode 100644 index 0000000..fb36973 --- /dev/null +++ b/tests/test_episode_policy.py @@ -0,0 +1,86 @@ +import pytest +import numpy as np +from agents.policy import EpisodePolicy + +@pytest.fixture +def policy(): + return EpisodePolicy(epsilon=0.0) # epsilon=0 for deterministic testing + +@pytest.fixture +def q_values(): + return np.array([1.0, 2.0, 3.0, 0.5]) # Example Q-values + +def test_select_action_without_mask(policy, q_values): + """Test action selection without any mask""" + # First action should be the highest Q-value + action = policy.select_action(q_values) + assert action == 2 # index 2 has highest value (3.0) + + # Second action should exclude the previous one + action = policy.select_action(q_values) + assert action == 1 # index 1 has second highest value (2.0) + +def test_select_action_with_mask(policy, q_values): + """Test action selection with explicit action mask""" + action_mask = np.array([1, 1, 0, 1]) # Mask out action 2 + action = policy.select_action(q_values, action_mask) + assert action == 1 # index 1 has highest value among unmasked actions + +def test_episode_tracking(policy, q_values): + """Test if actions are properly tracked within an episode""" + # Take some actions + policy.select_action(q_values) + policy.select_action(q_values) + policy.select_action(q_values) + + # Check if actions were tracked + assert len(policy.episode_actions) == 3 + +def test_reset_episode(policy, q_values): + """Test episode reset functionality""" + # Take some actions + policy.select_action(q_values) + policy.select_action(q_values) + + # Reset episode + policy.reset_episode() + + # Check if actions were cleared + assert len(policy.episode_actions) == 0 + +def test_all_actions_taken(policy, q_values): + """Test behavior when all actions have been taken""" + # Take all possible actions + actions_taken = [] + for _ in range(len(q_values)): + action = policy.select_action(q_values) + assert action is not None + actions_taken.append(action) + + # Verify all actions were unique + assert len(set(actions_taken)) == len(q_values) + + # Try to take one more action + action = policy.select_action(q_values) + assert action is None # Should return None when no actions are available + + # Check if episode was automatically reset + assert len(policy.episode_actions) == 0 + +@pytest.mark.parametrize("epsilon,min_unique_actions", [ + (0.0, 1), # Deterministic - should always take best action first + (1.0, 3) # Random - should see multiple different actions +]) +def test_epsilon_greedy(q_values, epsilon, min_unique_actions): + """Test epsilon-greedy behavior with different epsilon values""" + policy = EpisodePolicy(epsilon=epsilon) + actions = set() + + # Take multiple actions and verify they're appropriate for the epsilon value + for _ in range(50): # Run multiple times to ensure statistical significance + action = policy.select_action(q_values) + if action is not None: + actions.add(action) + policy.reset_episode() + + assert len(actions) >= min_unique_actions diff --git a/tests/test_qlearning_agent.py b/tests/test_qlearning_agent.py new file mode 100644 index 0000000..4b717f4 --- /dev/null +++ b/tests/test_qlearning_agent.py @@ -0,0 +1,109 @@ +import pytest +import numpy as np +from agents.offline_agent import QLearningAgent + +@pytest.fixture +def agent_params(): + return { + 'learning_rate': 0.1, + 'discount_factor': 0.99, + 'epsilon': 0.0 # Deterministic for testing + } + +@pytest.fixture +def agent(agent_params): + return QLearningAgent(agent_params, state_size=4, action_size=3) + +def test_agent_initialization(agent): + """Test agent initialization""" + assert agent.state_size == 4 + assert agent.action_size == 3 + assert agent.lr == 0.1 + assert agent.gamma == 0.99 + assert agent.q_table.shape == (4, 3) + assert np.all(agent.q_table == 0) # Q-table should be initialized to zeros + +def test_get_action_with_mask(agent): + """Test action selection with action masking""" + # Set up known Q-values + agent.q_table[0] = np.array([1.0, 2.0, 3.0]) + + # Test without mask + action = agent.get_action(0) + assert action == 2 # Should choose highest Q-value + + # Test with mask + action_mask = np.array([1, 1, 0]) # Mask out the highest value + action = agent.get_action(0, action_mask) + assert action == 1 # Should choose second highest value + +def test_episode_tracking(agent): + """Test action tracking within an episode""" + agent.q_table[0] = np.array([1.0, 2.0, 3.0]) + + # Take all possible actions + actions = [] + for _ in range(agent.action_size): + action = agent.get_action(0) + assert action is not None + actions.append(action) + + # Verify all actions were unique + assert len(set(actions)) == agent.action_size + + # Next action should be None as all actions are taken + assert agent.get_action(0) is None + +def test_episode_reset(agent): + """Test episode reset functionality""" + agent.q_table[0] = np.array([1.0, 2.0, 3.0]) + + # Take some actions + agent.get_action(0) + agent.get_action(0) + + # Reset episode + agent.reset_episode() + + # Should be able to take the best action again + action = agent.get_action(0) + assert action == 2 # Highest Q-value action + +def test_learning(agent): + """Test Q-learning update""" + # Create a simple batch + batch = { + 'observations': np.array([0]), + 'actions': np.array([1]), + 'rewards': np.array([1.0]), + 'next_observations': np.array([1]), + 'terminals': np.array([False]) + } + + # Set up known Q-values + agent.q_table[1] = np.array([0.5, 0.8, 0.3]) # Next state Q-values + old_value = agent.q_table[0, 1] + + # Perform learning update + agent.learn(batch) + + # Check if Q-value was updated correctly + # Q(s,a) = Q(s,a) + lr * (R + gamma * max(Q(s')) - Q(s,a)) + expected_value = old_value + agent.lr * (1.0 + agent.gamma * 0.8 - old_value) + assert np.isclose(agent.q_table[0, 1], expected_value) + +def test_save_and_load(agent, tmp_path): + """Test model saving and loading""" + # Set some Q-values + agent.q_table[0] = np.array([1.0, 2.0, 3.0]) + + # Save model + save_path = tmp_path / "q_table.npy" + agent.save_model(save_path) + + # Create new agent and load model + new_agent = QLearningAgent(agent_params(), state_size=4, action_size=3) + new_agent.load_q_table(save_path) + + # Check if Q-values match + assert np.all(agent.q_table == new_agent.q_table) diff --git a/usecases/__pycache__/initialize_env_usecase.cpython-312.pyc b/usecases/__pycache__/initialize_env_usecase.cpython-312.pyc new file mode 100644 index 0000000..0fcf53b Binary files /dev/null and b/usecases/__pycache__/initialize_env_usecase.cpython-312.pyc differ