q_table_demo/app/services/negotiation_env.py

"""
협상 환경 시뮬레이터 서비스
"""
import random
import numpy as np
from typing import Dict, Tuple, Optional
from app.models.schemas import ScenarioType, PriceZoneType, CardType


class NegotiationEnvironment:
    """협상 환경 시뮬레이터"""

    def __init__(self):
        # 문서 기준 가중치 설정
        self.scenario_weights = {
            ScenarioType.A: 1.0,    # S_1 = A
            ScenarioType.D: 0.75,   # S_2 = D
            ScenarioType.C: 0.5,    # S_3 = C
            ScenarioType.B: 0.25    # S_4 = B
        }

        self.price_zone_weights = {
            PriceZoneType.PZ1: 0.1,   # P < A (가장 좋은 구간)
            PriceZoneType.PZ2: 0.5,   # A < P < T (중간 구간)
            PriceZoneType.PZ3: 1.0    # T < P (나쁜 구간)
        }

        # 카드별 협상 효과 (시뮬레이션용)
        self.card_effects = {
            CardType.C1: {"price_multiplier": 1.2, "success_rate": 0.3},
            CardType.C2: {"price_multiplier": 1.1, "success_rate": 0.5},
            CardType.C3: {"price_multiplier": 1.0, "success_rate": 0.7},
            CardType.C4: {"price_multiplier": 0.9, "success_rate": 0.8}
        }

        # 시나리오별 협상 난이도
        self.scenario_difficulty = {
            ScenarioType.A: 1.3,    # 가장 어려운 협상
            ScenarioType.B: 1.1,    # 보통 난이도
            ScenarioType.C: 0.95,   # 쉬운 협상
            ScenarioType.D: 0.85    # 가장 쉬운 협상
        }

    def calculate_reward(
        self,
        scenario: ScenarioType,
        price_zone: PriceZoneType,
        anchor_price: float,
        proposed_price: float,
        is_end: bool
    ) -> Tuple[float, float]:
        """
        보상함수 계산: R(s,a) = W × (A/P) + (1-W) × End

        Args:
            scenario: 시나리오 타입
            price_zone: 가격 구간
            anchor_price: 목표가 (A)
            proposed_price: 제안가 (P)
            is_end: 협상 종료 여부

        Returns:
            (reward, weight): 보상값과 가중치
        """
        s_n = self.scenario_weights[scenario]
        pz_n = self.price_zone_weights[price_zone]

        # 가중치 계산: W = (S_n + PZ_n) / 2
        w = (s_n + pz_n) / 2

        # 가격 비율 계산 (0으로 나누기 방지)
        if proposed_price == 0:
            price_ratio = float('inf')
        else:
            price_ratio = anchor_price / proposed_price

        # 보상 계산
        reward = w * price_ratio + (1 - w) * (1 if is_end else 0)

        return reward, w

    def get_price_zone(
        self,
        price: float,
        anchor_price: float,
        threshold_multiplier: float = 1.2
    ) -> PriceZoneType:
        """
        가격에 따른 구간 결정

        Args:
            price: 제안 가격
            anchor_price: 목표가
            threshold_multiplier: 임계값 배수

        Returns:
            가격 구간
        """
        threshold = anchor_price * threshold_multiplier

        if price <= anchor_price:
            return PriceZoneType.PZ1  # 목표가 이하 (좋음)
        elif price <= threshold:
            return PriceZoneType.PZ2  # 목표가와 임계값 사이 (보통)
        else:
            return PriceZoneType.PZ3  # 임계값 이상 (나쁨)

    def simulate_opponent_response(
        self,
        current_card: CardType,
        scenario: ScenarioType,
        anchor_price: float,
        step: int = 0
    ) -> float:
        """
        상대방 응답 시뮬레이션

        Args:
            current_card: 현재 사용한 카드
            scenario: 현재 시나리오
            anchor_price: 목표가
            step: 현재 협상 단계

        Returns:
            상대방 제안 가격
        """
        # 카드 효과
        card_effect = self.card_effects[current_card]["price_multiplier"]

        # 시나리오 난이도
        scenario_difficulty = self.scenario_difficulty[scenario]

        # 협상 진행에 따른 양보 (단계가 늘어날수록 가격 하락)
        step_discount = 1.0 - (step * 0.05)
        step_discount = max(step_discount, 0.7)  # 최소 30% 할인

        # 기본 가격 계산
        base_multiplier = card_effect * scenario_difficulty * step_discount

        # 랜덤 노이즈 추가 (현실적 변동성)
        noise = np.random.uniform(0.85, 1.15)

        # 최종 제안 가격
        proposed_price = anchor_price * base_multiplier * noise

        # 최소 가격 보장 (목표가의 70% 이상)
        min_price = anchor_price * 0.7
        proposed_price = max(proposed_price, min_price)

        return round(proposed_price, 2)

    def is_negotiation_successful(
        self,
        proposed_price: float,
        anchor_price: float,
        tolerance: float = 0.05
    ) -> bool:
        """
        협상 성공 여부 판단

        Args:
            proposed_price: 제안 가격
            anchor_price: 목표가
            tolerance: 허용 오차 (5%)

        Returns:
            협상 성공 여부
        """
        success_threshold = anchor_price * (1 + tolerance)
        return proposed_price <= success_threshold

    def get_all_states(self) -> list[str]:
        """모든 가능한 상태 목록 반환"""
        states = ["C0S0P0"]  # 초기 상태

        for card in CardType:
            for scenario in ScenarioType:
                for price_zone in PriceZoneType:
                    state_id = f"{card.value}{scenario.value}{price_zone.value}"
                    states.append(state_id)

        return states

    def get_all_actions(self) -> list[CardType]:
        """모든 가능한 행동 목록 반환"""
        return list(CardType)

    def parse_state(self, state_id: str) -> Optional[Dict[str, str]]:
        """
        상태 ID를 파싱하여 구성 요소 반환

        Args:
            state_id: 상태 ID (예: "C1APZ1")

        Returns:
            상태 구성 요소 딕셔너리 또는 None
        """
        if state_id == "C0S0P0":
            return {"card": "C0", "scenario": "S0", "price_zone": "P0"}

        if len(state_id) != 6:  # 예: C1APZ1 (6글자)
            return None

        try:
            card = state_id[:2]  # C1
            scenario = state_id[2]  # A
            price_zone = state_id[3:]  # PZ1

            # 유효성 검사
            if (card in [c.value for c in CardType] and
                scenario in [s.value for s in ScenarioType] and
                price_zone in [pz.value for pz in PriceZoneType]):

                return {
                    "card": card,
                    "scenario": scenario,
                    "price_zone": price_zone
                }
        except:
            pass

        return None