From 6de135680eca81353892abe4f6d34c2c04f7dd5d Mon Sep 17 00:00:00 2001 From: mgjeon Date: Wed, 24 Sep 2025 16:51:44 +0900 Subject: [PATCH] feat: Enhance action selection and Q-table initialization Key changes: - Add random Q-table initialization with small values (0-0.1) - Implement action masking mechanism to prevent repeated actions - Add debug information to show available actions and Q-values - Add epsilon-greedy selection with action masking - Add tests for policy and agent behavior --- .../__pycache__/offline_agent.cpython-312.pyc | Bin 3018 -> 3303 bytes .../__pycache__/offline_agent.cpython-39.pyc | Bin 2523 -> 1959 bytes agents/__pycache__/policy.cpython-312.pyc | Bin 0 -> 3383 bytes agents/offline_agent.py | 40 +- agents/policy.py | 75 + interactive_negotiation.py | 145 ++ .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 221 bytes .../__pycache__/action_space.cpython-312.pyc | Bin 0 -> 6036 bytes .../__pycache__/environment.cpython-312.pyc | Bin 0 -> 4662 bytes .../__pycache__/spaces.cpython-312.pyc | Bin 0 -> 7209 bytes poetry.lock | 92 +- pyproject.toml | 26 +- saved_models/q_table.json | 2134 +---------------- saved_models/q_table.npy | Bin 2720 -> 848 bytes tests/__init__.py | 0 tests/__pycache__/__init__.cpython-312.pyc | Bin 0 -> 209 bytes ...pisode_policy.cpython-312-pytest-8.4.2.pyc | Bin 0 -> 11315 bytes tests/test_episode_policy.py | 86 + tests/test_qlearning_agent.py | 109 + .../initialize_env_usecase.cpython-312.pyc | Bin 0 -> 1034 bytes 20 files changed, 657 insertions(+), 2050 deletions(-) create mode 100644 agents/__pycache__/policy.cpython-312.pyc create mode 100644 agents/policy.py create mode 100644 interactive_negotiation.py create mode 100644 negotiation_agent/__pycache__/__init__.cpython-312.pyc create mode 100644 negotiation_agent/__pycache__/action_space.cpython-312.pyc create mode 100644 negotiation_agent/__pycache__/environment.cpython-312.pyc create mode 100644 negotiation_agent/__pycache__/spaces.cpython-312.pyc create mode 100644 tests/__init__.py create mode 100644 tests/__pycache__/__init__.cpython-312.pyc create mode 100644 tests/__pycache__/test_episode_policy.cpython-312-pytest-8.4.2.pyc create mode 100644 tests/test_episode_policy.py create mode 100644 tests/test_qlearning_agent.py create mode 100644 usecases/__pycache__/initialize_env_usecase.cpython-312.pyc diff --git a/agents/__pycache__/offline_agent.cpython-312.pyc b/agents/__pycache__/offline_agent.cpython-312.pyc index b4aac63973e29da392a93f68837e2b1ba357f513..7365f1fec6e48145c3a141506170dbf5484461a6 100644 GIT binary patch delta 1359 zcmY*YUuauZ7(eI!%gs%bHcOVIS=T0-3AbAdD^aJWo#MtA8>LzW&19Er?zK(KznyzK zo08HZeTWYt<$!tcrS@r~dl>9NA4L&FK^rnjxhV8SWUt9UP#-+sO(NU(@Vnpr{(tBE z&X3VQ#{)n4{R(38;(mM8^6Um;TWR*!L|}uB16fL7>3zuo=iYQXjm3)gYMMwED#Hx3gowWBXLgil9$Fw|m>htEKuC;a#J88q1r1;pkJ(|`F9X_K#W+i|Lk5qi@b zRt^R!n;e+9f(I7cVll(J*%a!+Hm;$%u#E2FMO7dnmO{oDIHPPWP0!i+N?9xE)`yPY zGK*$zXj4TJhA`2xb=!1&Pe#hOwA*@d&9oe;Z?;4hngAgYS{9~nLI-P^!8SSyMVp!3 z%$~S^Yfo;4)P{WIiyZnUkK&V^_)IH4(}~Zw;`8nJ#m2Rx;K@!f)e5FM!MRp&?*4_Z zZ?uC~8`7~a*jP9YMUO{MHvA;^;IZ%p&cGq5lEZK0tNpct%;4}PeVzE3Mgrhh23x$Z z+qqS88te%M^k-=aN9dF%fhXvzp6_vtCcWcyO5Qx@RuL@yw&_-N#45>`ZI$a65~{kr zY5|GeU4M|L@pH5%&*0dDq*uXbo@X9MGS~B#ZIOPdVnsLl1H$uyLUc)qum^gu97WSg6=SVP|4~kCMIj^!0ENg4 z02u)2d>8dgxTCn>28=4r@OK!&i4YGnCd4(3L}hI zUMp2=?&T0TUa|m?f_LXuC9i|#P7Lk-mNV{<<^s7)zw|{y-rrI3FJTs^x`=@r_#cxp BI->vp delta 1025 zcmZ8eO-vI(7@gUl?rwh&ETuGBDJczZ1%lAV6rr4qq6v|}sgRi3Ez;V*Ocz552_86L zVq%(!7UDrXiNP3uE}V=PFL0sufLS?`7>*R8#>B*#ZC9d`?3=f<-}mOdZ@;;=oDJJH zn;DRGd}Vv)H(zXMUF;=qC7=KWra%PhI}>5_osF;zxC#_z2`KCaL;C=Ta0MuH2b7M+ zC>jga$&sIs5{T<+L1{`IKnN!q=c`* zb9hH;VUi7a8G3D^aYq5jJl=!>DB)w+Vb<6?aTU?8xR1GDk_bU&9AjR=aoosuJH9x* zyUxzCvvXZ~(_e93TIL>F_wgj_a>nXubYT-gmOMuqzAk#L3RD;)EzXK9op(tw@~TEa zkfaU&Ed?&%x00hzVdlwzjrMV%T8zS^)nF(40w4(=v!3aVy7)Md*-=Ez9nce5)J!Ov zQL<@Gn9U@nvnZ{{)dYH)$f$_^P1Ha@GeyyzbaW0i(mnUliKj#oMV z)G6|kA0u!d>{*%~g&%}h+E>OZmbSHy7hTW0K3jVB?5oLV>9S1IU)&SY9gD#KTS8G9ghee=)g(g6Tx2bNtYgy%SZ3Uby1Da zAu<3XDVK|?Gbcf-q93+lx74jSyRaf14)*DH#OD%uHD9yDrLs|_X7M8RRG*qOok+!R z{VP~B_dqZHXucshNad(3;_nvUVl&~$O`(TEKZOv1mjKZ+WT(Ixz!Q=`LtqJ18G*lC zJtX;dqu5OB}hTn8=}nd_mW@YU^=VJrs82V Wj6Yag?6$8!`oVeOP!$l+RsR4U-`@iO diff --git a/agents/__pycache__/offline_agent.cpython-39.pyc b/agents/__pycache__/offline_agent.cpython-39.pyc index e453d772232596680c7aabcd3bf60782386ad426..fd970487cef8c8ab517bac29a2bf97f83c8b3ce6 100644 GIT binary patch literal 1959 zcmZWpPj6f`5V!sQ?q-{|QBV_Af<*+xA~h=^B&4c>Kw9-c*{VvpJQY1TYwz}@@6X2e zrpapelyc_^2x)WVBk&pc2EKCYof8s*8NUtLB!2RE>^B~Je)IG28#kH++Fy_Uc==C2 z$e%b_ZXQf_pzBXS2qI`s66!{lFgJRM_m~JKyf;L6r!4UW-6I|UE!dM@ALVjmFxi2w zKLa71FYrSJdqWaWc*2L#7l8<23`8Vi7(>wzO&BAw0-jrT_1P22RhgB;uZFTTeIHz% zH`KB3Lf2n|n6fEJ$_cb{bYimQ|EZeESW9^4Q-bRSpMFqaQib|mi|yej%t!9U*_7Cg(h zLGONh(suv4Pd|Oy{pR^@_gVTJuhDg<>uxm|qIGBar!!YyaqwF8PFbI6iln2!VYil{AE@dRY!z!z+ToUuJ(f_Vt@ z2(k#A{DBQ1muJ8_mo2ZSDEN7cq7&Ot@lsSp5{=7jP^rS9okX~rmB!(y8enQet^kW; zwSxPY!f!OfbN zRCvHnDtsq{O>!DMB;OBS1T!}CX8tUgVf?ga>L!^*hRuQDuo5gIGK;4X;x_d`2Ge+* zh;SNikacp0Orv_7HKvUVUk}LPmP0Y>*;cg=v^wHQTp(o~FepmC=1ORLyOhT!y=2rd zQUL>buGMXbsuNmol8qdsHOL+!jCd&wztdd6yw6SlrCNt2NP~MRNmw6~G~FpK4@q<3 z-bi5^RW8yao{wb`I}`A18>M_*FJ8@A;2!WhaDv7+Lnx!~{=4R_(hasnC+kWDQ?)yOjf0OX4i_7Y z6cF*8asG&#msc;}MC-Jugv{TAqcvWH&{cYkZWngfE{{dV(d|Ac2B^GzFTZ^_i7@Stb0%m zpk$T2kZEe0DFQl%V^Z5n)59^(>p$ui_*2Lf^*)LZQFKrsah3$WeTBThTCDL&$24xl uVcd*;x84bCP>zeyiHlXOq5VfFaMO|_=hs!1_(6TAFR*yIx&kg~%>Dxgy4(H$ literal 2523 zcmZuzOOF#r5bo}Ide#QBSy&*6LJ|%*3F|-(L=lQWOTr?IjcPC0wwhZeWE{epz{OJbAIft4KH+D`LvCbmSC zET`itsN=!0Rd+#Dq@p!BVly@;lvLKtu3_0Zt4ZXb1zR+h+gLR-eI6&U4cmJ22dvTa zwWB6uAteJ#`%#n@X%$796-DSiN(&K7Z4HuJC~l+pre|qeiEN;Qv=~pSNTr8DuauK& zX}qSo6-7xFD-}hr$$xwOUzCulKN4k;r^WDK(tnih$XFiqH?wlWx2sadL(zX&mcvXu zkm;VdIeL~SWgg$TH_YQS+W@`)_~4r9>p!{ur2o^?&Hm5P(|9KneVAKRs$UKU8JssV z=EnFyI`Bm8{c-01oip&7!8LR0Wep9-zt)&fp z06g~8z=YRy3qt6?CndX~Ek!nw9xNj946+5H$0U+d(|vg!eMWTA;|)srdn|byjK!I| zv`Yhec;V#Ci!nVHV^zetfLH}ll$U&xq27+7=aV>VdJF_|4Hxe$B9e$Ki=tCockmFn zzhMA=xk8rtwCx&C0>+sK0lEjT`U=F7(uU6DgwEXIF3{TH9`~Vjc?&of==PH)NcS;h z3GTCeP{-H6{v#;Jj)WcRrbiK$;g_ya~L&(^n8VWukvRt^hAN- zxxJ-38IZUD;zcs9gg(zxm6VgBiUx54aq7{KWwN&-ufneKLlpRl+5t>Ork!$JrFm%I z=W(usaV!Dz3h1m`rUS??C9*8^9&i8fs8mXO8OUf*^z0_mZNtdOOjU9PH^2~|ih+S2 zq5lA0Woj3MJz#BmnI2v|joJG-YXSyS4R}vMF&qU>VriIJPzLCffzO_kKd({%XXZdL zpi}EtsB|{`jx0)w?k%V*UqdOu-)~q_$jq%RSjC$+B|}nJyXVVQXml`;WgZoie0-pt zK~~0oMVq`Y62qc8O;IBue}*0MGb`IE>sdW^RS(^Dl+nGpS#?9 z#khZkOoQ46LbvK*jlg*KGFc<*WV!+?1$C=lnXSypoK4#>rakLm>95sTH5Xx(bO)=<4!kV?ti0tLzf%vVi*ZaPwxkyo;I;`sUu-D2k>et~918 kl=5SksolrsqB0fM%zWQQ+P~Y}XWt_O7rV7-o2_>K2NwKOw*UYD diff --git a/agents/__pycache__/policy.cpython-312.pyc b/agents/__pycache__/policy.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..037abe4ee2634c9d24e5f27dedffed67fa6fe0b4 GIT binary patch literal 3383 zcmbsrOKcm*b@t1(Ns6>2QWB}pmX=e|a6}_*k-|SUwJRUo$g$-F2%BcH=5A7#X zo_>Y2z@n2#&zX_-J2HatN^p3F7{@96*lR6Ojwev!@ctBEGgc+bxX%qDzBTxLavyt zlvBUWoyRJ!q>dYASz9idSk3CGlct$9^b3u?}o2Rij?MW$=1 z(=VM)ojP|sbw)X-o;UOq>@Ak8)GAecezoERK`T8|Qe0)$M%^pT_sxo+oJHQ~DtaUZ zIzS*S0ZIyvq$owTpeu^ATTu$8RyGJ7P?U>h)o@oZF$5kW2;Lpx5J5qLh+K+@Y-<$I zI%*3ePgzC?b~q%Cen}{I1L4iUzXxoW>dXPL{ZyK$G3!j4JQo6w@o7Aa1P%NmEmpXs zyrj{aR&J}YdMOlmXsXy<{ z`^kThe4ZK3V7QOXJj#83vd#xi$_&v8Qg#AF@1b$6V73as}64#bqkLJ zSCVtD9RS)9T~G8P!o7bet9vd=<7I8C-wXrGVGv~f63|ujMRa^4doz1W{bRoFe zcq_W!j_!Yom|e5=#dczE{nRaSGcfang9%^=$L~ooYO4OYCO%4SwF!6wRoSayutLvb zu-XOBJ!;enYp}1zZ9C6ScB`_UWi@63q}2El*wKY5zrD5su8#M>sl=26;GN2#3Rt|# ze&bx4efu|6IZt*?cuqs|3&~mgFKR9m2GQ%KTRgSrzXsG&}Ao0cGMk$>N~yGjuLC) z&i2cX3>XxKRndD#2Iyr}ig-MEl1T;tUXQ=ijc+)E1mFNrtIMTc4#<<#JOFu;YzAQ4 zS8HPN&~qNOL#_ai5{N&0ns+_l_XAG#c`|;C&60wfu(N;vN9(_zS05ZrG7hhnOye;L z@#GMe;e)3DIl{XabgVm^p%xP-y~-iLVai23Lv->J zx|=yIQk$Wfpx{=bw@L{*#Y}qpjhDJ)CH0 ztrrg2@EbmKx71oVZ7-bOI{W&)H{NcZRW=utc6hWEmN&z4;}tu+?|xW*!l99|Z{ONn zP)M>LZG~p+(9B0aYh3=z(aq37DlTD%6IXGiuyr68A*?SR}0%-Vt3 z#v5CK1MQKC`m5v;!|xrtcI?(qZk=iRXBx8~ANcUVrhoonWNhQjn{T=T8ii)$=hqg$ zh)sSlcYCg}@R@G!J=}~PxqHTrE!0o-^R6@_FVz>J*J??Vwlvw2WLuIO(Jg7N9g5W5 zj;ODYlTf9_X%Pr0PACuGLC6!10oX1Zw1eq|JhaW+TI4KoZW&xvF6!^fT_>k~%@WjX zk~th9W9TZTo}Of=2U{dy+`a|B0OX+G^+gO+6l6^TqOGNO-GnVAwxrp<#wE#Q-@i@Z zZwqeo>BFSF;5`Hl6GVL24O7}|lCuXpPLjwlL8Ka5V}RCChvkH+r@JK~-Wi<}qMdUL zjNPV_a6-$-spm787>~BHj!-NYRx30Y;QeHl^rlqmz;RzmN8Fb982$yY5W84JD`+(s j=5w_70UCaQg3ox43D%dNA^1GoeUzEHvG{)kAF9IFP|%|A literal 0 HcmV?d00001 diff --git a/agents/offline_agent.py b/agents/offline_agent.py index 3b91715..aaf0c09 100644 --- a/agents/offline_agent.py +++ b/agents/offline_agent.py @@ -1,31 +1,45 @@ import numpy as np -import random import os +from .policy import EpisodePolicy + class QLearningAgent: def __init__(self, agent_params, state_size, action_size): self.state_size = state_size self.action_size = action_size - self.lr = agent_params['learning_rate'] - self.gamma = agent_params['discount_factor'] - self.epsilon = agent_params.get('epsilon', 0.1) # Add epsilon for exploration/evaluation + self.lr = agent_params["learning_rate"] + self.gamma = agent_params["discount_factor"] + # Initialize policy + self.episode_policy = EpisodePolicy(epsilon=agent_params.get("epsilon", 0.1)) self.q_table = np.zeros((state_size, action_size)) - def get_action(self, state): - if random.uniform(0, 1) < self.epsilon: - return random.randint(0, self.action_size - 1) - else: - return np.argmax(self.q_table[state, :]) + def get_action(self, state, action_mask=None): + q_values = self.q_table[state, :] + if action_mask is None: + action_mask = self.episode_policy.get_action_mask() + action = self.episode_policy.select_action(q_values, action_mask) + + if action is None: + # All actions have been taken in this episode + return None + + return action def learn(self, batch): for state, action, reward, next_state, terminated in zip( - batch['observations'], batch['actions'], batch['rewards'], batch['next_observations'], batch['terminals'] + batch["observations"], + batch["actions"], + batch["rewards"], + batch["next_observations"], + batch["terminals"], ): old_value = self.q_table[state, action] next_max = np.max(self.q_table[next_state, :]) - new_value = old_value + self.lr * (reward + self.gamma * next_max * (1 - terminated) - old_value) + new_value = old_value + self.lr * ( + reward + self.gamma * next_max * (1 - terminated) - old_value + ) self.q_table[state, action] = new_value def save_model(self, path): @@ -38,3 +52,7 @@ class QLearningAgent: print(f"Q-Table loaded from {file_path}") else: print(f"Error: No Q-Table found at {file_path}") + + def reset_episode(self): + """Reset agent for new episode""" + self.policy.reset_episode() diff --git a/agents/policy.py b/agents/policy.py new file mode 100644 index 0000000..597a818 --- /dev/null +++ b/agents/policy.py @@ -0,0 +1,75 @@ +from abc import ABC, abstractmethod +import numpy as np +import random + + +class Policy(ABC): + @abstractmethod + def select_action(self, q_values, action_mask=None): + pass + + +class EpisodePolicy(Policy): + def __init__(self, epsilon=0.1): + self.epsilon = epsilon + self.episode_actions = set() # Track actions taken in current episode + self.current_idx = 0 # For sequential action selection + + def get_action_mask(self): + # Create a mask with all actions available + action_mask = np.ones(9) # Assuming 9 actions + + # Mask already taken actions + for action in self.episode_actions: + action_mask[action] = 0 + + return action_mask + + def select_action(self, q_values, action_mask=None): + # Create default mask if none provided + if action_mask is None: + action_mask = self.get_action_mask() + + # Apply action mask + masked_q_values = q_values * action_mask + + # Check for available actions + valid_actions = np.where(action_mask)[0] + if len(valid_actions) == 0: + self.reset_episode() + return None + + # Get Q-values for valid actions + masked_q_values = q_values * action_mask + max_q = np.max(masked_q_values) + + # When all Q-values are effectively zero (very small), select actions sequentially + if np.allclose(masked_q_values[action_mask > 0], 0, atol=1e-10): + # Find the first available action in sequence + while self.current_idx in self.episode_actions and self.current_idx < len( + q_values + ): + self.current_idx += 1 + + if self.current_idx >= len(q_values): + self.reset_episode() + return None + + action = self.current_idx + self.episode_actions.add(action) + return action + + # Epsilon-greedy with masking for non-zero Q-values + if random.uniform(0, 1) < self.epsilon: + action = np.random.choice(valid_actions) + else: + max_actions = np.where(masked_q_values == max_q)[0] + action = np.random.choice(max_actions) + + self.episode_actions.add(action) + return action + + def reset_episode(self): + """Reset for new episode""" + self.episode_actions.clear() + self.current_idx = 0 # Reset sequential index diff --git a/interactive_negotiation.py b/interactive_negotiation.py new file mode 100644 index 0000000..b41749b --- /dev/null +++ b/interactive_negotiation.py @@ -0,0 +1,145 @@ +import random +import numpy as np +from negotiation_agent.environment import NegotiationEnv +from negotiation_agent.spaces import State, PriceZone, AcceptanceRate, Scenario +from agents.offline_agent import QLearningAgent +from usecases.initialize_env_usecase import initialize_environment_usecase + + +def convert_action_to_response(action_idx, proposed_price): + """에이전트의 행동을 상황에 맞는 응답 텍스트로 변환""" + action_responses = { + 0: [ + "강한 수락 (Action 0: STRONG_ACCEPT): 제안을 매우 흡족하게 수락하겠습니다." + ], + 1: ["중간 수락 (Action 1: MEDIUM_ACCEPT): 제안을 수락하겠습니다."], + 2: ["약한 수락 (Action 2: WEAK_ACCEPT): 고민 끝에 제안을 수락하겠습니다."], + 3: [ + f"강한 거절 (Action 3: STRONG_REJECT): {proposed_price}은(는) 너무 높은 가격입니다. 대폭 낮춰주셔야 합니다." + ], + 4: [ + f"중간 거절 (Action 4: MEDIUM_REJECT): {proposed_price}은(는) 높습니다. 더 낮은 가격을 제안해주세요." + ], + 5: [ + f"약한 거절 (Action 5: WEAK_REJECT): {proposed_price}은(는) 조금 높습니다. 더 조정이 필요합니다." + ], + 6: ["강한 가격 제안 (Action 6: STRONG_PROPOSE): 대폭 낮은 가격을 제안합니다."], + 7: ["중간 가격 제안 (Action 7: MEDIUM_PROPOSE): 조정된 가격을 제안합니다."], + 8: ["약한 가격 제안 (Action 8: WEAK_PROPOSE): 소폭 조정된 가격을 제안합니다."], + } + + response = random.choice( + action_responses.get( + action_idx, [f"Action {action_idx}: 가격 조정이 필요합니다."] + ) + ) + return f"{proposed_price}에 대한 응답 - {response}" + + +def run_interactive_negotiation(): + """대화형 협상 시뮬레이션 실행""" + # 환경 및 에이전트 초기화 + env = initialize_environment_usecase() + agent_params = { + "learning_rate": 0.001, + "discount_factor": 0.99, + "epsilon": 0.0, # 평가 모드에서는 탐험하지 않음 + } + # MultiDiscrete 공간의 크기 계산 + state_size = np.prod(env.observation_space.nvec) # 상태 공간의 전체 크기 + action_size = ( + env.action_space.n + if hasattr(env.action_space, "n") + else np.prod(env.action_space.nvec) + ) # 행동 공간의 전체 크기 + + agent = QLearningAgent(agent_params, state_size, action_size) + + # Q-table 로드 + agent.load_q_table("saved_models/q_table.npy") + + while True: + # 새로운 에피소드 시작 + state = env.reset() + target_price = env.target_price + threshold_price = env.threshold_price + episode_done = False + + print("\n=== 새로운 협상 시작 ===") + print(f"목표 가격: {target_price}") + print(f"임계 가격: {threshold_price}") + print("\n협상을 시작합니다. 가격을 제안해주세요.") + + while not episode_done: + # 사용자 입력 받기 + try: + user_price = float(input("\n당신의 제안 가격을 입력하세요: ")) + + # 목표가격 이하로 제안이 들어오면 즉시 수락 및 종료 + if user_price <= target_price: + print("\n=== 협상 성공! ===") + print( + f"제안된 가격 ({user_price})이 목표가격 ({target_price}) 이하입니다." + ) + print("에이전트: 즉시 수락 (특별 행동: 즉시 수락)") + print("\n시뮬레이션을 종료합니다.") + return # 전체 시뮬레이션 종료 + + except ValueError: + print("올바른 가격을 입력해주세요") + continue + + # 현재 가격 업데이트 및 상태 계산 + env.current_price = user_price + next_state = env._get_state() + + # 상태 인덱스 계산 + try: + state_idx = np.ravel_multi_index(next_state, env.observation_space.nvec) + except ValueError as e: + print(f"\n디버그 정보:") + print(f"현재 상태 벡터: {next_state}") + print(f"상태 공간 크기: {env.observation_space.nvec}") + print(f"에러: {e}") + state_idx = 0 + + # 현재 상태의 Q값들과 액션 마스크 출력 + print(f"\n디버그 정보:") + print(f"현재 상태 벡터: {next_state}") + print(f"계산된 상태 인덱스: {state_idx}") + q_values = agent.q_table[state_idx] + + # 액션 마스크 가져오기 + action_mask = agent.episode_policy.get_action_mask() + + print("\n현재 상태의 Q값들과 선택 가능 여부:") + print("(O: 선택 가능, X: 이미 사용됨)") + for action_idx, (q_value, mask) in enumerate(zip(q_values, action_mask)): + available = "O" if mask == 1 else "X" + print(f"Action {action_idx}: {q_value:.4f} [{available}]") + + # 에이전트의 응답 생성 (epsilon=0이므로 항상 최대 Q값의 행동 선택) + agent_action = agent.get_action(state_idx) + masked_q_values = q_values * action_mask + max_q = np.max(masked_q_values) + print(f"\n선택된 행동: {agent_action} (Q값: {q_values[agent_action]:.4f})") + if np.allclose(masked_q_values[action_mask > 0], 0, atol=1e-10): + print("(순차적 선택: 모든 유효한 Q값이 0에 가까움)") + + # 에이전트의 응답 출력 + response = convert_action_to_response(agent_action, user_price) + print(f"\n에이전트의 응답: {response}") + + state = next_state + + # 다시 시작 여부 확인 + if ( + not input("\n새로운 협상을 시작하시겠습니까? (y/n): ") + .lower() + .startswith("y") + ): + break + + +if __name__ == "__main__": + run_interactive_negotiation() diff --git a/negotiation_agent/__pycache__/__init__.cpython-312.pyc b/negotiation_agent/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7577c783c2a2f61e93df3364023ed7f7786ac28b GIT binary patch literal 221 zcmX@j%ge<81U1EnGC=fW5P=Rpvj9b=GgLBYGWxA#C}INgK7-W!s?iTEPAw|d&q~eD z%gxM7uPoK~$xJFrEUMIZ&dD!L2`5>wpb{iI?&2GD^AXTcmzbfq?yJ;0`O0|`?e@Yy!OW30FYtNa7A0cVG z|8^3ebMHBK?%X-&JLjG||8O|01g zq(h8{39%kFWbhai851;mjG$wJCXWd?8#IS39t%YTiY5mLZx|rF@fxkpdrh0(V^wq} z(3y3e#};Qe%l9~fqht&3i~9UQpCoaN%r*!7QJH-@AVp>Kk?v?967~h#U=;g|FM8as zO{y?P>&!n5jZq>H55*G?{X7vEf#s^cN3~fC|_lB0{ z?d~3XgL|L0V71q-Eqc5A9>sC2Xoq=3gp24v0&pQ$!R(R{?2zz%-1qt$pO*wtYU~ms z;ZPvl8ILtS9XKZX#CYSrU?j%3MVGlX0I3h?e)r5uQwFoV?k`&z24VizM!UI z0^z9a^m={aa3rd%S%NVPEKghp(_AHDB`~o9iA5)f1?fQ`Z;@}!w$lgFR>$d9C1EK3J$81i3m5&QJt?^cG(8UjoU4pI^q)2>&XO0-%`zh+UAdJOo4s z3lRzc2e6xX6UfZ)v%t@)>FvA?^k&`;dUd=5`WCe>SmFK>=-ZTgSivq>m8Y=06QmAx zmIA2@q$PZ*HaE+cLEkC3cvdjM%E|}WT!ttGnLZoMSHQTkIK@@UHr2`P=>0a?=8uFs z0-fG&%mamif{9ECuul|I^iutMqtH^X#X_n}Pks6z;8By_iK3H~>zWqET}l$wr7B-c z##20fP0LDsQ~*NVn+r||RA=!F*z2YAXHXlO5h*#K-Ghjuy+|4$%!nLhHHR3N~?yKsXSE zn5rl+Hw2X`MJ!7Pfk0H*DwDR=H&<3)uD@7+x#42NO;^P`FP$%$aXpZ7Juu^HNVys& z0#mE!8G2R8;Nh&1JiKL)9de}2_8IfalzC<1iLcD{DxT36MNUM3D^C0Z8lxnd%b>j! zn)N!P8+I${qobOU$)b6&m)4Ei#{iDuPv3Va@&>~j5Vg(Zm3l(@Sa3_u@gUZ&UY0lF zDBvcIn>B9HxPtxbwrd)cC^E#cB+k~6%|xP3(G)pKDEJ#{dYw5+PEnja_6SUPsQpNr zdv0hbGxkYlbT~6`$(^}yCG%Foof*F{`_ade;x{H3`IS7+ZIoPGQKO)@3k zGsL1D4?Paq*erzo5gt-607dh&nzC&$xthWA$=k4`ieqIq(k+AmN&qgIMWD!5AFrc@ zGvvV{QynskJG5-no`|pP#!F3Z9CUlCim+1%yc$qTzXj4yzH^igZ%jxNFMjFRnkAIQ zJym+MeEDe8NK@k3^E(Ec?=m0**eqX_Xc~#!CMHYC;QqATb?(TSBkA(0#JZ6k>1C@D z`$t|)SFau4nX2BLu39s0O;v5wdDcc8)&C(zN68Ni+N z)|-~JjXxK%a06mPLFMbvfV~^{XlO0GP7=|X$0Y3J6z(M%(~gw!YvC#)PLtF(N9P6lYW&uap6cU`j&cD8AR6 zzZLpDm4(jFqi7@Or2RnpN!Cp4CBqe?H6t}CNFim_i8m(134XFMS+jS#Y+us8@4M>t zX=iD=bXj^uU3ys!{NJ$|YpjFpIma2t@UbtkqVz7Z35CyzO_{(nlHCrY)#oX)yVOQ z6H{xFH7(O+hm!U~7)z~ZS`%E#zD_Y`kqudxf86bUs4$NGcoO^dDZ^xeUmheem5b&&eACSG>E$)JO#N9xO>}Ks0qPo$FfSH#+ ziT6AV8tYUV5=ozn3}z@hVY6<-PO>JyglU_&&NFD5bPc!8t1S zI7~Tr_6A_now@Px+*?DpJ{q6>!*zt{>>np)&yN=%b#^i_cQKK-u$~Z%VCI-0DMtX1 zDbQ4Q6r5bb_t5jcygYAE$$KpB)sI6xb%p6*i9wha=9_>(ZZMLy>p!fyQZrM#HC4NH zx^~--4DGU}m)A@^_UY~$yFWiZ{lp7r_uocgme3aKPqzs)=dny1t7u~X!Zxx0w?5ed zi`0B_Yx2VExq<)18)7rutsKO3SKy&c{AE|bAw)Tk{;VtXL3nx=(I4UgAhJa%DDNw> zaYs;+!y%~5;A_DWVg4(j4%u!=sMCv`Xdx`n-wQ5~;<^@GAk|Mt1sa~dWPs4wrG`#` z>SLn0f*fj0s1&ao3TjL?TB_gNRi?M{9CvWu$eXU4uVLDR3%-z8@GoC$#x42qTv!*z zEm~b@jctRQ=Mn>%_pav>;lTS4#JHgp)Qd`@9PiJ(_dz~tMNDGKjxJND#D}nh$#wtt zxF(L4?Lt_JiLi%A{y;!B2EsfX5!yJby8S5VT56{k5xFAPtWf>MLfv{vcnHOaKoQq1 zJ_ba>B&ZYx>WRWG`EvV~)U%>68x(4OgSUMn;ahGtf8Nv+;8z53A~ zOD!u(K)h^K)^6gUF*X+z z4|i6QlCohg(LS;5OUENv3GSSH?I!GkEoWMW`NW=yr>1JZvhTpcbEl&H>2?*|eRqZ4 zzbB#d-`hj&tuTJJdF9?J=I>R8y=zTHUwAMsl-Ov1M$y@2(JwlBRS26Q6+<%@d;iu) zf4w!SLxrfXVefWLZjNhR4b8`uO?n8&?EHJ6guHPHIEZu zs_!RvjKozw{OIV;k)6}d+N8NwEfaF5F&M!IUF@)}Z+=CVz*4ekpDH{Q z!8UgG{kgMK7zOw5U2uYd+O`1I3r%82y?zufhGd~8085Zo0)exNvwXODv~{F)reZ^? zV#9RBgVWB9N%Ka<9vs!im5a+^ptu4FmNp`m#$q*+2a({hNW{5Bd@=D!BzVXb_aecN z5uZZRg5+5svbkG?Lzx(ji_f9#MI?B7QXZ+Ewbo!43DgQi`ZtgP@&j!&Y#2W{(SD_M zorR6Te&T)U10Gr_Fk!9Br zV@Z~#4Qt0Afvw%JcAn7%Su<^DNbuv^Q>!=K0ab%)3$y@dhtIM4hhwN%5R$Sn8t;bF zv4|K|ZLD|&)0kL?15CKF1>vJ5p6pbAC^jv&^F8wp^{d}5@nsl?=$Cc_$ubm0eM4&h vN$OLi{%d0UnpnOeE1-g*ZFgxib%M$gr0@c?EvpFbR=!A44YvtWW%2(70*kbs literal 0 HcmV?d00001 diff --git a/negotiation_agent/__pycache__/environment.cpython-312.pyc b/negotiation_agent/__pycache__/environment.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d11a7574900bdd1f6f6b86269c5832bd48795d3d GIT binary patch literal 4662 zcmai2e{37|6~D86_SvzMIE~XLOI^J^LZ#J|NCP3*&li->YvD%g((xajsr1I36wy`sR#{U zTbxSRA~xEP*&_~++2c&Y8F40D5m$nZunBj>o$y3FG-acPDZw#I35-ZjwwT5%-%ECb zj(BJ2kn;vIqaj*z%2Rw)ltYZ>9~CE(N{m-xNohNqXfX5FwL`z#e< z1XhRg!ks1QS-AC>l=a0q3U}^UCn{UqsSKq#J z^X-Mw?1k>&&3Beom-3~H`QTdqZ0Xd`gKOvWH{be8aBoUhl8GQ#h>@g}5G5t*ST}K; zH~!sfqbcefJx?iCqhd4340XDWqArzLno?Y5iz&-coMkmbO|oWtWqwt1vEQkF@9WQO{>I@^J>Bo z-5eK<^RmowGVXT}%E_;O7JdpQD~BgVfJIE2m`R17h#ez5nF;TWCsV?hk|g|u7#>b0 zC*tBh5<4N@H~Cy5noRKhyC)KSEZz%r_|QyHZwnvpI~*Pv-y1%{=@1P|mAl1(9Z3m` z6%6)H%@Ew)^v18Fyw}{-Mrf1?R9~K?uCcxsrM#fB_g-atueWrsw1jew7wd|hcmIC# zYnuz5kExxHvBw5(5>iX zY3Wey-nB=H+NN#K!C0gL+AXM8?CE1ROW;MN^&?xruV})^jqB!S!GFC5j8R|VO{(_H z(lADC{WM&(tStlXP@lP9!2{OIBto>RWbIN|pffgorMQEPV650&Ri$@xRaao4cjG$y zH6laSuC$u{V7@Ryb;-~f`(?WsL9z~|!_=7EVp5fF#xdDh;Y`YOj;3P*x9&*ogx0nE zZ0VKD!P3GxaNw)IeK)we^zz!NbEOOKuD*65s7D|WEtTf4lrGK&!O@qVU;6Yn=dt{G z;8Vl=@<7N=HUh1AbU(nQF<3S5Rd z4if^UoJ+k(wI)Xp7OeIrXsp?^f}ANkNXqH7MQtpjrhuVbe^wY4vI@G~a`-Mm{= zuhmWU?kIX24cX_6ZR^%$b~C7Xw=Qh>pswp$S6`v)0Tq7U&Z4hzxw-qQuXnNQLtn4n zvPDUT63}D`ex~}9D#%!5v z0#3%2V8GNtM|y!vH&>xex80A*qfkM_y3YFMTywts&YbTm8-(~$=(u0)xPNJ{+R<-@ zoI*!f?FbuOpmm-*%dG^qL0t0`8#cU@o=fNZaz9;WJB-6zA6cwubP!_H`h=}k{Fy}U zsv`4!(K<$fEI_XoI6!KpUX`g^kg=O{ z&pN(Elj5vI6*Ox_F-OKOH~?Y{K#WN}1f#9}dT#Zl^E$+Gd0^JgEerJ zyxoh5H|E%@zxpF;LgEd`ocKxeo5TM|zdTSTt_(~Z*UP^L9uKi(8yMG|gqMV5LUX00 z*zqJuRJ^n1=c0T(nu>$h=7{)Xo(QA@Ps49&P9vK%UA{n)Q<_H+3FL3kO#t(H$7)aM zkQH$%y`c)VC>(%E$)}+@N!@l*Et}?_KKpbbuuTnYTTm9qR|0)YLqGpGgzLDwHB_`6kq_oCwu4R188 zY~S_HLo5Erb58JOHNgJB!b6LxrDMwt4=%G0e$#=Wlh7U5n?OJYMn3~w)}D1_8AP)H zrzkKN99gH}1ez9H7wlPA#+k8Wq{z534*aI!Phj;-gw3#k2;i%Wvfi~>POPFOxJ~q- z9YnI=&aiskVG<$-d)bq;<2aA$=_oJ%s(f9nScQ8j6{Y__tLQA*W(tY_mi$bAyx7xe= zz0Q@MeQL`-)j#yU|NSF>4SeL^Jhx}T|GC|EfcAV&*_u2#Cq(PUmYh$AVR<8O2r#7^ z(@=$MqoW}YzW*dXA#xn#AzUIUq~b_>IqpX(K3;Bdah#Bh!V?kQnQ@yjevk*T!tKy} z<(FAhjK?`HL>nOo&vUo2X}FR4gA+~2TcP@LmiowX_>QBFY5SsW6Vr2Nr<2+8MWdV9 z^`90u)1psW?-BB8?unU%#LKZ%LbC&Dyiyq{m4N4$?vFM})vvT(V7J$Q^90Tve^_dxZTou=uJsf~Z9+CQe6{%dp6wmTG5 Wx0@cP>6SBN^GDAfy+a|b+xkD}6HBuI literal 0 HcmV?d00001 diff --git a/negotiation_agent/__pycache__/spaces.cpython-312.pyc b/negotiation_agent/__pycache__/spaces.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51cf0001c7d9799596c18b55c7344e1d22f3d685 GIT binary patch literal 7209 zcmd5>U2Gf25xygj6#tL1ejF>dNSStHCX#5`i6sYi99v{9)i#}2vK=!>FEsD0(B+S^ zM+G2ZBt)SWF;c)z(86+)Gzx0KQXLd(lLCI|Lm&Fq7ZOB4;ou-C+Lzc4z*qtT1byht z-jO{1$VrQ$=m8qe&CKoX&U`aFd-{vhX=C8}^@GAIT~3DiJKj_;lg6z58JKH~$cSu$ zNwN`^Mc$AwB#jYcl8bOjQ^Z7N#)LW95$T{jm#`$Q5o^*GvC(@|!k%mM-ZHB1iMXiL0#d6^%2TNgq;{RuEp<@21LV$I26cp6+JqyWRObR6U)4o6 zW!?VHFVMozDrR{$8k1zdQQ?B|SVrNF$K{M-Je0iHWcx6ETQv#3&jfoM?=gL@r{M zIz*FXxnKZ4aJ89RvmJhmVws9bsVIr339jr57768WXvIvV%p6I@P+Yw%z@Tz10puEU ziMd$S=zc{OyToh(#T4T=h3C3~yZ6&~?!TY+-u=Tz z_io?m9+`sYm-Ie(^ihDo)ZR-<{;!rnl-2df@x1HY4DXxjhqZ7hpcsdlG zo;(>=+$Tc8$Oga;fB60*6CzT1P@Ee}ZDt1wlVTs7G14YL`!MC6C{Y6KA!Ml(0zM{id*m>xdieKL3nj8nou>Q3v3AnnDij& z1)^AHNqSZynXF=zGlcYkY?&b#I)b1eP)JmQsFX*5%rmP!z4`F!=0^%#_iA@f{>18* zz5>_#xye~Em`(15sRs=FtQi@z>njwfqv1g9Q)?JB?NduKi=5N}X=T!-l_hKTcPQ59 zNIWJ*(kY4N6+zDu9!V#@NS{xLdYVtu!2I{}DGnK>ffA{yx zkT#!u`HO`)w9w06zyGWGyC2W1M)WDYzJm!t}(c(A31q<3WW#eB|VrP+dSa!%NkCP=kn3&Gpngrbpy9G(7;MhRAJ2 z$F_p!Ns7FoqGPBK_yOg;MTfVreYDcS0SYaMLMw+Tl1JcAegX&}t*bkKeATz3zwjDyuPC;>dSN{8}c1J$=Eg->%8$eR) zjc}i>7oyXQWRy6O#pIAoz!@lK2F{2amL|!9`K=bHh*h$Q&P0UFIHw=QkQ!~^QOC~Ku35jNNF6xEh>iyGT zEeC+)nN@f9HRs#TvfEd5`xd8f3LgsPolh5cK7Bh_KJZ-ez;opTXNm{T6!^0R%UN0i z6eElo)AvQSZt7(i@)~rjmzYd#ceuogEJpB_CdpI_1_K&HgQkiS)EM=ekGN{rOFO8h z?Z|JW{-`?w1elz((orBoD{71-WHJg;8E5YG(yFc4E|7Fm^O0NiQC)Hnt@i@SGk@bZ z-Rar35L+C$excN}^Uh}PV%OsHANH0u@4B<~@uj^>uP*N^ZQZj@3pbQ&8}(Gf7TEy< zQ6#+s3>*)zxdg8HY8Ur+H*7gKu->2%-N2fAzk|9p$5RjNs;0`poO94y&z6I^O@rR1 zggN*P>(}6@Za5o*u|p7``w@hkQ}wfU8v+N&CxGOc&t2OV#1E3!ljZ)=V*hAi+gOER z_xG*#^?wk!9w_%cRqT7Juw@?zM?I?^U)i&_=-FHL94dMa75bn36I(GEoa1c8SapBc z?`qyH5f%zB2+>q34RryUNLdi-I3b8Y#TZXz$Ufxk^jMRWGOwgXf=Q!ptq6?ftrfG0 zAi>ftzX0SXu%O-&*SqT8QsTC(_V`MiufnmW7ubaz3-WdU8iPFasOHX!!DO;6OfUMb zpM#duWLp}2fIqnRs=A5`4Z7A*s_w)1su=v$u4?kEnIX~YC01nTnFtnHgJinss9Dz- zD3(TD0qxK%-8F+61D2f5(hA2h8;oj)5)4POhyxC%PPlhTHd?SbT7-ExoN5x#IlCJBPcPX9W`eA|| zL*<5~#9Ba;ayu~BbXBW*!o0)?wMv#{2nN@0AU60>oI;eIZUuTeX)V5j5KqN30QkO^ zISf~IVHilpsN|VD+jhPC(wp30Eq!W$TehNF0m*~Cey;7(I#Y!Mlc{YkwF+FllXa)X_`=2jR>yPDm@sqV2KMKL zEDS2+fq?qt${Uy8Sjdz;BSp_h*)v}BjF7 z{9utETsl|cM+=rwb&Vz&JHCeXHKq9__F^rm>kF!;d==a*-2))o>FlTQ?fTg}q!gV! zeOtR2>GeOvr+IDe-B(_}{CbJ^6f7Qf#&$Y}+J-dDyA~IOJfu8O^}6^SV?lQ%+_IW?Q*Xg3j;X? zZ&h0iS_AZv=&fT-(0vHX?P>*7drJ|Q)r71?KP;=vj}`f`f@Q3Aaacn!2y56V=&Lc5 zi-!iMHDSCk0y&4|1t1O4>A?xB=+Lk1vGyx#F`U8i5WX0%O;?{RckL*4?O1%V)HPD# z_Y^F9RQy?DkT5f8lKqzO!9dN|!`E65Ul1NtHrL%R3+J;!^|jR;qgkShEO{B8%K6(b zR;^?W<6u9%))dDwzrV=uzrCx(j~6WC+Ipc8l~OaASN_NAt-iLp-t#bDeYgn%@jI-y zI!+SI3Ff2Nqn|NH*3C}V{bpvJ yfuD7+opoR3)*1L&--`S+=0.4.6)"] + +[[package]] +name = "pytest" +version = "8.4.2" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, + {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, +] + +[package.dependencies] +colorama = {version = ">=0.4", markers = "sys_platform == \"win32\""} +iniconfig = ">=1" +packaging = ">=20" +pluggy = ">=1.5,<2" +pygments = ">=2.7.2" + +[package.extras] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] + [[package]] name = "pyyaml" version = "6.0.2" @@ -772,4 +862,4 @@ files = [ [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.14" -content-hash = "46bdc65ebbf8732cbae2738e1da7875aea5a378314abe29e32cefcfe6474126a" +content-hash = "d077803cae14e91eee21b3756e24c72c5c514267d79a6aa130c4f69b90a794a8" diff --git a/pyproject.toml b/pyproject.toml index ff11596..39cf374 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,23 +1,21 @@ -[project] +[tool.poetry] name = "q-table" version = "0.1.0" description = "" -authors = [ - {name = "fbdeme",email = "90471819+fbdeme@users.noreply.github.com"} -] +authors = ["fbdeme <90471819+fbdeme@users.noreply.github.com>"] readme = "README.md" -requires-python = ">=3.11,<3.14" -dependencies = [ - "gymnasium (>=1.2.0,<2.0.0)", - "numpy (>=2.3.3,<3.0.0)", - "h5py (>=3.14.0,<4.0.0)", - "pyyaml (>=6.0.2,<7.0.0)", - "torch (>=2.8.0,<3.0.0)" -] +packages = [{include = "."}] +[tool.poetry.dependencies] +python = ">=3.11,<3.14" +gymnasium = ">=1.2.0,<2.0.0" +numpy = ">=2.3.3,<3.0.0" +h5py = ">=3.14.0,<4.0.0" +pyyaml = ">=6.0.2,<7.0.0" +torch = ">=2.8.0,<3.0.0" -[tool.poetry] -package-mode = false +[tool.poetry.group.dev.dependencies] +pytest = "^8.4.2" [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] diff --git a/saved_models/q_table.json b/saved_models/q_table.json index 54e01e3..c5fbebf 100644 --- a/saved_models/q_table.json +++ b/saved_models/q_table.json @@ -1,2026 +1,112 @@ { - "metadata": { - "state_size": 36, - "action_size": 9, - "timestamp": "2025-09-22T16:20:10.539545", - "training_episodes": 10 - }, - "q_values": [ - { - "state_idx": 0, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격 이하, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.08795120152833774 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.07980060621013178 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.06578247676712926 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.05640440663220407 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.08343614486312115 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.07708021145254244 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.06649834752893999 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0483122563738206 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.07784918257350587 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.08795120152833774 - } - }, - { - "state_idx": 1, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격 이하, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.04181960568421511 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.01650653844752281 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.02224513478479088 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.04105294727331152 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.05931104531941931 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.04004415229683608 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.02881340637109566 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.033459877776815736 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.05507207930384824 - } - ], - "optimal_action": { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.05931104531941931 - } - }, - { - "state_idx": 2, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격 이하, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.07486255107754664 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.07980060621013178 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.05926445786066637 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.04275562307545951 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.04916661909531326 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.07053122078911914 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.04567119680331397 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.04118128186893584 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0647616644539918 - } - ], - "optimal_action": { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.07980060621013178 - } - }, - { - "state_idx": 3, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 4, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 5, - "state_desc": "State(scenario=높은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 6, - "state_desc": "State(scenario=높은 구매 의지, price_zone=임계가격 초과, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 7, - "state_desc": "State(scenario=높은 구매 의지, price_zone=임계가격 초과, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 8, - "state_desc": "State(scenario=높은 구매 의지, price_zone=임계가격 초과, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 9, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격 이하, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 10, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격 이하, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 11, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격 이하, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 12, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 13, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 14, - "state_desc": "State(scenario=중간 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 15, - "state_desc": "State(scenario=중간 구매 의지, price_zone=임계가격 초과, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 16, - "state_desc": "State(scenario=중간 구매 의지, price_zone=임계가격 초과, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 17, - "state_desc": "State(scenario=중간 구매 의지, price_zone=임계가격 초과, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 18, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 19, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 20, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 21, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 22, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 23, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 24, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 25, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 26, - "state_desc": "State(scenario=낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 27, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 28, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 29, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격 이하, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 30, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 31, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 32, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=목표가격~임계가격, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 33, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=낮음 (<10%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 34, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=중간 (10-25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - }, - { - "state_idx": 35, - "state_desc": "State(scenario=매우 낮은 구매 의지, price_zone=임계가격 초과, acceptance_rate=높음 (>25%))", - "actions": [ - { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - }, - { - "action_idx": 1, - "action_desc": "중간 수락", - "q_value": 0.0 - }, - { - "action_idx": 2, - "action_desc": "약한 수락", - "q_value": 0.0 - }, - { - "action_idx": 3, - "action_desc": "강한 거절", - "q_value": 0.0 - }, - { - "action_idx": 4, - "action_desc": "중간 거절", - "q_value": 0.0 - }, - { - "action_idx": 5, - "action_desc": "약한 거절", - "q_value": 0.0 - }, - { - "action_idx": 6, - "action_desc": "강한 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 7, - "action_desc": "중간 가격 제안", - "q_value": 0.0 - }, - { - "action_idx": 8, - "action_desc": "약한 가격 제안", - "q_value": 0.0 - } - ], - "optimal_action": { - "action_idx": 0, - "action_desc": "강한 수락", - "q_value": 0.0 - } - } - ] + "0": [ + 0.07860207418303787, + 0.09669185947649497, + 0.00545838101358499, + 0.0634685349345901, + 0.04896882345490129, + 0.011071837685599328, + 0.02373142720866326, + 0.040710050390840025, + 0.057908788619546694 + ], + "1": [ + 0.03131144037823771, + 0.026904571593437822, + 0.0746202421515731, + 0.07668658840526418, + 0.0566929766904952, + 0.0928366837674772, + 0.09992295981895805, + 0.06804924626417622, + 0.03465275246415239 + ], + "2": [ + 0.0337775546511296, + 0.019233162710519693, + 0.09337842138095943, + 0.0418180529168455, + 0.0455664603958991, + 0.034035827299915423, + 0.05772592768996248, + 0.052621723571909175, + 0.04969945041515063 + ], + "3": [ + 0.014178139983042926, + 0.004869457147381262, + 0.013296329386477658, + 0.07159586757640321, + 0.04650155306866092, + 0.03908809426776613, + 0.09829371848543218, + 0.09551734097779814, + 0.09933597843126746 + ], + "4": [ + 0.01314656381661038, + 0.03772331883055289, + 0.010219538647158645, + 0.08742142563213022, + 0.01887893282227795, + 0.08690083610071421, + 0.00044717877475308754, + 0.06368163817427484, + 0.033458612940268084 + ], + "5": [ + 0.07889862014704474, + 0.057224439403031326, + 0.0939164792832018, + 0.041856396706002386, + 0.0048492850418650705, + 0.06579737247159666, + 0.08856882483528883, + 0.002164381756437062, + 0.06677124541980636 + ], + "6": [ + 0.07821060715082709, + 0.08834973107011755, + 0.020383487470968076, + 0.0949968242101048, + 0.09724403969134705, + 0.042860882296615936, + 0.08358102011182952, + 0.06585468804852727, + 0.045960969954383715 + ], + "7": [ + 0.037836904050297854, + 0.042865046541691376, + 0.04152604015726677, + 0.08352854281531587, + 0.09382152590451939, + 0.016753650711956194, + 0.020857806219263586, + 0.05572523796471475, + 0.013474646369689692 + ], + "8": [ + 0.023989831261016906, + 0.057352182089764586, + 0.03290126013901756, + 0.06726275608456586, + 0.005863722248365922, + 0.06197326609192151, + 0.00675663049407681, + 0.035809720335395055, + 0.06880322134415771 + ], + "9": [ + 0.0039004471963525636, + 0.07827699924944835, + 0.023437234009404506, + 0.07636098855864303, + 0.03187103806012823, + 0.0010322715404335826, + 0.0038868438826868304, + 0.0010063667863797088, + 0.015321259421990231 + ] } \ No newline at end of file diff --git a/saved_models/q_table.npy b/saved_models/q_table.npy index e13b6c1e8033d91b7686f242e5cfed8d13089611..e6eea984e5aca94c16320a6fa15205c2b0f0f68a 100644 GIT binary patch delta 738 zcmV<80v-LJ70?EdJpwT>kv~Cy*N+4GLm#w1al`&E1%x+y<>bE`g>PCB(Ns2)&9Fjg+~6KeF05=d5e7 zKiG~LZ2`WaKRbgVsY*+sKPP`&v)i$gKViH3@ulatKOziL11)K#KXt`pvV2shKQ(ts z+9YkEKWB;_w?K}qKW*EY3q0|vKiUyf)HHIbKW)2Qyln}MKiUc?^ZxO3KNMj39veH0 zKTh|=RU1^YKUGCZGY!vwr$5XF7R_1%q(99PO!CGlxj%(fvk%jAxIa9vttu66xj%YL zE$x-;ia!ngSc!O0qdy0AC_?+~gg-M#4hLnwm$$Mi8g9Z zJwI=7P?>N|us@(FvcP;Hp+8V4J2A5|v_CB|iI0U$tv?B>6A$Ts3b;Q29QK_vZlyok z0(nGLl-;_Uy%``o>QMf+^j5I?0U2}u+ps^P zYOy^sh^If)vGme^TVJC;h<_7GX!4~$+|bal=0c@E5umJRAY!#Y&a`U*umre2SNWB9 z-Y1biVCqk zjs2&ywU$CLTkS75L`S#p-fUm@^}aGE+eZ6S7Ir~Ndsf)%&aCOOYGgUjej^b&b2pF z?KfPmxX@nvh(LIT(Hi?bdnOqeYHfzQr_gh`#mn$@_R_s8mI^#sYX7%X`B$vYO8cVw zi`qi@HreYWx(k{oEw_)dUfsG)bgBJHue*5`Q#RN$jDpb+7!85Z5Eu=C(GVC7fzc2c J4FR%3004Q&es}-? diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/__pycache__/__init__.cpython-312.pyc b/tests/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f2776c0e6b1f3be33cb4de1d3e4b8f4127531179 GIT binary patch literal 209 zcmX@j%ge<81pX5)XMpI(AOanHW&w&!XQ*V*Wb|9fP{ah}eFmxdRiGbQoLW?@pOu=Q zmz$ZFURkQ|lbKYMSX8O+oReRg5?qpBl$f5X@1CEZo|EcQlv$Rlo0XlLoS&O$?vS3F zn3u{y2*kx8#z$sGM#ds$APWEh#W^_u literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_episode_policy.cpython-312-pytest-8.4.2.pyc b/tests/__pycache__/test_episode_policy.cpython-312-pytest-8.4.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..33d0411e409717bf6e4975ce610024aa034076d3 GIT binary patch literal 11315 zcmeHNTW=f372aJg$>mMd#ge7ii76*?#MqWB%NIGdoTRxuxKR?s2~f2KL2FksXg?l<(mcdQUy@o^;KPPR8#eqxxhLdhk7I zvP};GP3mEw?YaiELyrLMgc^2vwR0|?=A8G`qk2q_&nVXvmZm3st#nu9blWY=n7ArC zi5F*cW?460D;IK8l_{?dNeVy&|92h%azV0Wo~g^9D%afD8&Wzj@lT!u6^4yjGgl~= z2o}96-NCH8?+m|b8pIsV8|6|lSDLBJ4gV;2l4MC``1wM4PJhEHlkAK!{BpTGQ!rj2 zxiiMz{ORIUxtKlt>`XD6D+~cY{7NN7^M+sD|LXAZ6VDI7o;i^{Sulnz!?a9FGX{H~ z%q)GOq1lQPVl0V6Pw;b_2_WaCf5dt(cP+;Tmec`)^W&a(S0RDE`#xa&@1A#u6p}@l zfl2eR>>CkfBCWb(vi8i3EBCC+Xq@@#nY7|4rCBGKB_vxR5fG$3AuvfLM|(SSCR>;@ zOws{S%rrZJV8;_5jlDm1eam1ymR?fRtoQzYg+Y$?{wqkmAlXu0d^^Z-gDrYoRdl%; zuoXLSdQg(8s;!#8x3D}7c^r#%L04>bTGnZqY2}(Lp{o|QOhaDdocH95;f!Q<`|78Q z)00BZ7W0evCGiWc3!Y?2hW5Q)Hqlcvrd)g zAfr@C6|?5)TewE9%6FCV@vHKD=fL1>WnbD%jgP009Zj1~h}CgbaRQKsWb|%Z)&%Ub zsLVl-fy9Fow{StFBTj;~mSN?b=(DpG#G*`=I31a(Y@v`L#w-Esn#~f!i9>eg6;$vKvYurkXc_oR`)@Qrni5M1L*Oe|fN;*nR$`)$X1pwP)eb;_L7KU`6eL-whUJ z5kYI1%3^hoT~>P*Ul%fh0pb5qOV`z&O)&Ojm(pDtQ#DX9PcsFN*CG)cNQ-BfZFNL$V#nL^?)rBOy3882GyIfOjy>7qHdSw+2fjm}b z&I18uqI1iI!QTx2;b^^c@ci-BL>FJUSB}>cWB;Y4+i*eQi%_Ly3C6m)(jqriYg&|W z7aX)MLRGQE^7Ir8Sdy^8`4_AaYCSJmi!W1xVv_pBrb zYDq97cYip#lpLtXb^&{(AIMT{*OmTSZ1;+azng9XiwRo8ax7N&Sdo2Q-Mw%|WOH^I z>Pil9-!iMPT4gRyi2S1~9a3%ugFUhqhh`G|$(CjU`;PZM8?;oA<~T7z#?+ zrakc_DcWu>>!I!DazfE-(spyS?dAjLtEquc?Y>IhdbCYZv~|~diWsJ0rKaaf)G*8z za#n@lzE5$Lj37th3Ke8BisTTI!$2m|aRzV+S_$_rkuel_5R4;7phN>y>ZGW2Z{ip?5G{1HgMdIgTno#@7)_I+?pMSFBZX4zAqMSS-thE&keqqCCmlh@1_d( zh7#}v!9oAfdW6O&4K4R*LkZ{+92m6Rqh_;OY#&W$wUC4!#C>13(X19TrHNdWkAwc* z`|62@HpXIwG#z6h31ggSGIlXBc5Rn5Y8EB*q~2~PT;ynL89CZ`P5kL12YO_p@Q+tx z4}jL-Ep7p=J1AOrBEI|9e(T0|vw1BrxapV+Nf`6*23kvbwCC1^YS4$dV%Ml9^{#5% zPTFybyX|&+3yu!WZU@}mf~9<8|J_@G-4AUA`c?1E{h>kxaZ^#I^OPB`*IaeB7Bvkqcm8MF+699w4Nw^M` z41E!L?gAcM)Og4NAm}&QkK_Q7L*$^Fc7(c4M&|GFsNnGvdQ?W!vZLfocrS541w-LF zcbL&a?Ue?mO1jMnl5AEuQ1JlZztF3%NBUc#Qs8TJ(#c%l7H znNfe_Aalwv-_6KA@&l}h9)r1ln*I004v?p@{i8_6ksJeJ;sF46ar>Uk+ucLWCp>Ap z_hY;Th0u~SKLh=XdUd+Do`2#?1{elm4s@Q+1cmSQ_E7i%$a5aI8po4{g%*03y# z)jd{ZUxzf)%XV3R_Tsq6=Ik;w1+Pz=g$ke_q5xxPFGaCWrq*RcnJa@Lo3l{s<>ER? zh~7{~!ED`dXxO);_Q9dyiG^b;Y9IV=uqcZNTEkQpt9$IS+P8Q@$Os06zh?&W{3Gs# znmurFCNo#cy*+2}sh(B{dZg&N6!2)SkbOX={9ZWRny68|2dMv`(w09Js$n(YDFWOd zR#m-I5M3zhU}}SJ1kArcUTk{Mf*U+96Dn-)cGzlOc#yaZT<;LqC%`8d$fMc6F7Q5p z5jh4%r0|w`=4CZ(hi&EbPoXcGt$~8_xOaJ@kx>i>qZ%kEK`iA{P(t4Jmv3;W$ooNk zgW_^XquXAVI9%o1BN&EW31M=1FS@Y=s0VQP*>aD%ef2l;l?f#j)YVOU;*BcwAlyV& z9JA^^H$s!K^S9rqr($+B?WvH2o@#pJl1hz~2pZIO#8qjc79PdY;CmN5nOzrbVJh-( z1}Y7n*@UZFC6JEKKka%E>5aC%GsH0T3aBnzvw^dkl%AW$D;gyW4s57c%DuIPtK$|r z2vNaNj*Y{PzVAL~H2nAmR1VZdjIOk!yM%dlioX!RipnCd-cVaW|zqE3gU0OMu zP7?IKvn_rCIeKst$m%+I4xL(B0Ut6Ce|jBjX^)EZANoc-YFlDc={=+d^m zwbR5E48#Bj~6msA@TCF-uN6F}UXhHXlpTX(DR zu6DFo*5?YwF_MQ;=-f4b1!OHC%ktkO={@NeiN8vnUrJAYC5M0c*t?H?t;q7OI{`^) zUqcQyZ$;zjPl@!WZih5^>+LNnkhmuAxxFO{q_;!fv$hKhY= min_unique_actions diff --git a/tests/test_qlearning_agent.py b/tests/test_qlearning_agent.py new file mode 100644 index 0000000..4b717f4 --- /dev/null +++ b/tests/test_qlearning_agent.py @@ -0,0 +1,109 @@ +import pytest +import numpy as np +from agents.offline_agent import QLearningAgent + +@pytest.fixture +def agent_params(): + return { + 'learning_rate': 0.1, + 'discount_factor': 0.99, + 'epsilon': 0.0 # Deterministic for testing + } + +@pytest.fixture +def agent(agent_params): + return QLearningAgent(agent_params, state_size=4, action_size=3) + +def test_agent_initialization(agent): + """Test agent initialization""" + assert agent.state_size == 4 + assert agent.action_size == 3 + assert agent.lr == 0.1 + assert agent.gamma == 0.99 + assert agent.q_table.shape == (4, 3) + assert np.all(agent.q_table == 0) # Q-table should be initialized to zeros + +def test_get_action_with_mask(agent): + """Test action selection with action masking""" + # Set up known Q-values + agent.q_table[0] = np.array([1.0, 2.0, 3.0]) + + # Test without mask + action = agent.get_action(0) + assert action == 2 # Should choose highest Q-value + + # Test with mask + action_mask = np.array([1, 1, 0]) # Mask out the highest value + action = agent.get_action(0, action_mask) + assert action == 1 # Should choose second highest value + +def test_episode_tracking(agent): + """Test action tracking within an episode""" + agent.q_table[0] = np.array([1.0, 2.0, 3.0]) + + # Take all possible actions + actions = [] + for _ in range(agent.action_size): + action = agent.get_action(0) + assert action is not None + actions.append(action) + + # Verify all actions were unique + assert len(set(actions)) == agent.action_size + + # Next action should be None as all actions are taken + assert agent.get_action(0) is None + +def test_episode_reset(agent): + """Test episode reset functionality""" + agent.q_table[0] = np.array([1.0, 2.0, 3.0]) + + # Take some actions + agent.get_action(0) + agent.get_action(0) + + # Reset episode + agent.reset_episode() + + # Should be able to take the best action again + action = agent.get_action(0) + assert action == 2 # Highest Q-value action + +def test_learning(agent): + """Test Q-learning update""" + # Create a simple batch + batch = { + 'observations': np.array([0]), + 'actions': np.array([1]), + 'rewards': np.array([1.0]), + 'next_observations': np.array([1]), + 'terminals': np.array([False]) + } + + # Set up known Q-values + agent.q_table[1] = np.array([0.5, 0.8, 0.3]) # Next state Q-values + old_value = agent.q_table[0, 1] + + # Perform learning update + agent.learn(batch) + + # Check if Q-value was updated correctly + # Q(s,a) = Q(s,a) + lr * (R + gamma * max(Q(s')) - Q(s,a)) + expected_value = old_value + agent.lr * (1.0 + agent.gamma * 0.8 - old_value) + assert np.isclose(agent.q_table[0, 1], expected_value) + +def test_save_and_load(agent, tmp_path): + """Test model saving and loading""" + # Set some Q-values + agent.q_table[0] = np.array([1.0, 2.0, 3.0]) + + # Save model + save_path = tmp_path / "q_table.npy" + agent.save_model(save_path) + + # Create new agent and load model + new_agent = QLearningAgent(agent_params(), state_size=4, action_size=3) + new_agent.load_q_table(save_path) + + # Check if Q-values match + assert np.all(agent.q_table == new_agent.q_table) diff --git a/usecases/__pycache__/initialize_env_usecase.cpython-312.pyc b/usecases/__pycache__/initialize_env_usecase.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0fcf53bd33d48509815a46d75445ba0e6f69d5f8 GIT binary patch literal 1034 zcmZ8gPfXKL7=K;Yu?>Nkh=KSg&y)1P3Zf{47zrAU3PiyRF0 zc!3CEB*SnZMuYK>_EH{E~-wB$lv}7haAq&-{`v!;N?o zHGE*thVzVBKR}|IA&Z8rsdv;#)2~N}DpFZ9PZ=VO5+gi8Tl%nJWmj=&Yr34>#Vu{E9sEDwaaC~3Ger|^ zcGo9W`#+EMOF&5qKo_nG2$rkd{LKX&9i#_(igstlpx(rgZwvvby)DOV2% z&$HAE>QTD~)CY$5P+MV>gT4`i_x-nRvIN>)C#y1aUkU4mXb_VL>o(LD2;;*k;fYSD zE{qdRjmc^>5f>iHqg12`p-<7`(vYE1F-nBHnif^aZ7NTa*74`Dh!zvC--^aWSqVWb z^e3>hCk$R06dnxs36H|V;;2G|xK1LXPISQ~A-^PHAQg7kLlX(};>j(U0u!Pd)f8F} zZ0#!b5STPNIFD+Z^QW8hjhFIGmk(;ZwZ3#;C5W2NEhgp??_PcPo=Nx5wijya)A#oQ zfd)V}xNB%nvds%HJZD}|kN$};6sQ^@pjG|$UsYMSHYVq^-yyMvXak^W9hgIZPfU~I z3b{$o!qol?^lM-$EW! literal 0 HcmV?d00001