Analizer Agent

2026-03-05 10:30:13 +03:00
parent 36a7d530c1
commit a28b520b8a
1 changed files with 285 additions and 164 deletions
--- a/agents_service/src/agents/analyze_agent.py
+++ b/agents_service/src/agents/analyze_agent.py
@@ -1,193 +1,314 @@
-from math import inf
+from __future__ import annotations
 import json
 import re
 from math import inf
 from typing import Any, Dict, List, Optional
-def safe_div(a, b):
+from src.llm.gigachat_client import GigaChatClient
    return (a / b) if b else None
 KPI_LABELS = {
    "ctr": "CTR",
    "cpc": "CPC",
    "cr": "CR",
    "cpl": "CPL",
    "cpa": "CPA",
 }
-def parse_policy_text(text: str) -> dict:
+def _extract_json(text: str) -> dict:
-    """Very small heuristic parser for free-form requests.
+    """Extract first JSON object from model output."""
    m = re.search(r"\{[\s\S]*\}", text)
    if not m:
        raise ValueError("LLM did not return JSON")
    return json.loads(m.group(0))
 KPI_LABELS = {"ctr": "CTR", "cpc": "CPC", "cr": "CR", "cpl": "CPL", "cpa": "CPA"}
 def _kpi_default_for_objective(objective: str) -> str:
    obj = (objective or "leads").lower()
    if obj in ("leads", "lead"):
        return "cpl"
    if obj in ("conversions", "conversion", "purchase"):
        return "cpa"
    if obj in ("clicks", "traffic"):
        return "cpc"
    return "cpl"
 def _direction_for_kpi(kpi: str) -> str:
    k = (kpi or "").lower()
    return "max" if k in ("ctr", "cr") else "min"
    Supports phrases like:
      'Оптимизируй по CPA, хороший до 800, средний до 1200, клики минимум 20'
    """
    if not text:
        return {}
    t = text.lower()
    kpi = None
    for k in ["cpa","cpl","cpc","ctr","cr"]:
        if k in t:
            kpi = k
            break
    direction = "max" if kpi in ("ctr","cr") else "min"
    nums = list(map(float, re.findall(r"(\d+[\.,]?\d*)", t)))
    good = nums[0] if len(nums) >= 1 else None
    ok = nums[1] if len(nums) >= 2 else None
    min_clicks = 0
    m = re.search(r"клик[аио]в?\s*(?:минимум|min)\s*(\d+)", t)
    if m: min_clicks = int(m.group(1))
    min_impr = 0
    m = re.search(r"показ[а-я]*\s*(?:минимум|min)\s*(\d+)", t)
    if m: min_impr = int(m.group(1))
    return {
        "mode": "text",
        "primary_kpi": kpi or "cpl",
        "direction": direction,
        "good_threshold": good,
        "ok_threshold": ok,
        "min_clicks": min_clicks,
        "min_impressions": min_impr,
        "query_text": text,
    }
 class AnalyzeAgent:
-    def analyze(self, req: dict) -> dict:
+    """
-        rows = req["rows"]
+    Agent #2 (LLM-first):
        objective = (req.get("objective") or "leads").lower()
        policy = req.get("policy") or {}
        if policy.get("mode") == "text" and policy.get("query_text"):
            policy = {**parse_policy_text(policy.get("query_text")), **policy}
-        primary_kpi = (policy.get("primary_kpi") or "").lower() or ("cpl" if objective=="leads" else "cpa" if objective=="conversions" else "cpc")
+    Input:
-        direction = (policy.get("direction") or ("max" if primary_kpi in ("ctr","cr") else "min")).lower()
+      - objective (goal)
-        good_th = policy.get("good_threshold", None)
+      - rows: list of segment-result rows per assignment (impressions/clicks/conversions/leads/spend)
        ok_th = policy.get("ok_threshold", None)
        min_impr = int(policy.get("min_impressions") or 0)
        min_clicks = int(policy.get("min_clicks") or 0)
-        # score per (variant, segment) first
+    Output:
-        by_variant = {}
+      - ranking of variants
      - per-segment metrics table (CTR/CPC/CR/CPL/CPA) for each variant
      - recommendations
    """
    def __init__(self):
        self.llm: Optional[Any] = None
        try:
            self.llm = GigaChatClient(temperature=0.2, max_tokens=1600).llm
        except Exception:
            self.llm = None
    def _fallback_compute(self, objective: str, rows: List[dict]) -> dict:
        """Deterministic fallback if LLM unavailable. Uses correct min/max logic."""
        def safe_div(a: float, b: float) -> Optional[float]:
            return (a / b) if b else None
        primary = _kpi_default_for_objective(objective)
        direction = _direction_for_kpi(primary)
        by_variant: Dict[Any, dict] = {}
        for r in rows:
            vid = r.get("variant_id")
            fmt = r.get("format")
            seg_id = r.get("segment_id")
-            seg_name = r.get("segment_name") or (f"Сегмент {seg_id}" if seg_id is not None else None)
+            seg_name = r.get("segment_name") or (f"Сегмент {seg_id}" if seg_id is not None else "Сегмент")
-            impressions = int(r.get("impressions") or 0)
+            imp = int(r.get("impressions") or 0)
-            clicks = int(r.get("clicks") or 0)
+            clk = int(r.get("clicks") or 0)
-            conversions = int(r.get("conversions") or 0)
+            conv = int(r.get("conversions") or 0)
            leads = int(r.get("leads") or 0)
            spend = float(r.get("spend") or 0.0)
-            ctr = safe_div(clicks, impressions)
+            metrics = {
-            cr = safe_div(conversions, clicks)
+                "ctr": safe_div(clk, imp),
-            cpc = safe_div(spend, clicks)
+                "cpc": safe_div(spend, clk),
-            cpa = safe_div(spend, conversions) if conversions else None
+                "cr": safe_div(conv, clk),
-            cpl = safe_div(spend, leads) if leads else None
+                "cpl": safe_div(spend, leads) if leads else None,
-
+                "cpa": safe_div(spend, conv) if conv else None,
            m = {
                "impressions": impressions, "clicks": clicks, "conversions": conversions, "leads": leads, "spend": spend,
                "ctr": ctr, "cr": cr, "cpc": cpc, "cpa": cpa, "cpl": cpl
            }
-            entry = by_variant.setdefault(vid, {"variant_id": vid, "format": fmt, "totals": {"impressions":0,"clicks":0,"conversions":0,"leads":0,"spend":0.0}, "segments": []})
+            entry = by_variant.setdefault(
                vid,
                {"variant_id": vid, "format": fmt, "segments": [], "totals": {"impressions": 0, "clicks": 0, "conversions": 0, "leads": 0, "spend": 0.0}},
            )
            entry["format"] = entry["format"] or fmt
-            entry["totals"]["impressions"] += impressions
+            entry["totals"]["impressions"] += imp
-            entry["totals"]["clicks"] += clicks
+            entry["totals"]["clicks"] += clk
-            entry["totals"]["conversions"] += conversions
+            entry["totals"]["conversions"] += conv
            entry["totals"]["leads"] += leads
            entry["totals"]["spend"] += spend
-            entry["segments"].append({"segment_id": seg_id, "segment_name": seg_name, "metrics": m})
+            entry["segments"].append({"segment_id": seg_id, "segment_name": seg_name, "metrics": metrics})
-        # compute aggregated metrics per variant
+        scored = []
-        scored=[]
+        for vid, v in by_variant.items():
-        for vid, data in by_variant.items():
+            t = v["totals"]
-            t = data["totals"]
+            agg = {
-            ctr = safe_div(t["clicks"], t["impressions"])
+                "ctr": safe_div(t["clicks"], t["impressions"]),
-            cr = safe_div(t["conversions"], t["clicks"])
+                "cpc": safe_div(t["spend"], t["clicks"]),
-            cpc = safe_div(t["spend"], t["clicks"])
+                "cr": safe_div(t["conversions"], t["clicks"]),
-            cpa = safe_div(t["spend"], t["conversions"]) if t["conversions"] else None
+                "cpl": safe_div(t["spend"], t["leads"]) if t["leads"] else None,
-            cpl = safe_div(t["spend"], t["leads"]) if t["leads"] else None
+                "cpa": safe_div(t["spend"], t["conversions"]) if t["conversions"] else None,
            }
            val = agg.get(primary)
            sort_val = (inf if direction == "min" else -inf) if val is None else (val if direction == "min" else -val)
            scored.append((sort_val, vid, v["format"], agg, v["segments"]))
-            agg = {**t, "ctr": ctr, "cr": cr, "cpc": cpc, "cpa": cpa, "cpl": cpl}
+        scored.sort(key=lambda x: x[0])
-            # choose KPI value
+        ranking = []
-            kpi_value = agg.get(primary_kpi)
+        for i, (_, vid, fmt, agg, segs) in enumerate(scored, start=1):
-            # low data rule
+            ranking.append(
-            low_data = (t["impressions"] < min_impr) or (t["clicks"] < min_clicks)
+                {
-            status = "low_data" if low_data else "unknown"
+                    "rank": i,
-            if not low_data and kpi_value is not None:
+                    "variant_id": vid,
-                if direction == "min":
+                    "format": fmt,
-                    if good_th is not None and kpi_value <= good_th:
+                    "status": "unknown",
-                        status = "good"
+                    "reason": None,
-                    elif ok_th is not None and kpi_value <= ok_th:
+                    "kpi": primary,
-                        status = "ok"
+                    "kpi_value": agg.get(primary),
-                    else:
+                    "metrics": agg,
-                        status = "bad"
+                    "segments": segs,
-                else:
+                }
-                    if good_th is not None and kpi_value >= good_th:
+            )
                        status = "good"
                    elif ok_th is not None and kpi_value >= ok_th:
                        status = "ok"
                    else:
                        status = "bad"
            # sort key
            if kpi_value is None:
                sort_k = inf if direction=="min" else -inf
            else:
                sort_k = kpi_value if direction=="min" else -kpi_value
            scored.append({
                "variant_id": vid,
                "format": data["format"],
                "status": status,
                "kpi": primary_kpi,
                "kpi_value": kpi_value,
                "metrics": agg,
                "segments": data["segments"],
                "_sort": (0 if status!="low_data" else 1, sort_k, -(ctr or 0)),
            })
        scored.sort(key=lambda x: x["_sort"])
        ranking=[]
        for i, s in enumerate(scored, start=1):
            ranking.append({
                "rank": i,
                "variant_id": s["variant_id"],
                "format": s["format"],
                "status": s["status"],
                "kpi": s["kpi"],
                "kpi_value": s["kpi_value"],
                "metrics": s["metrics"],
                "segments": s["segments"],
            })
        # recommendations (deterministic, but "agent-like")
        rec=[]
        if ranking:
            good = [x for x in ranking if x["status"]=="good"]
            bad = [x for x in ranking if x["status"]=="bad"]
            ok = [x for x in ranking if x["status"]=="ok"]
            ld = [x for x in ranking if x["status"]=="low_data"]
            if good:
                rec.append(f"Масштабируйте: лучший текст #{good[0]['variant_id']} (статус: хороший по {KPI_LABELS.get(primary_kpi, primary_kpi)}).")
            if ok:
                rec.append("Средние варианты можно улучшить: проверьте УТП/CTA и уточните аудиторию сегмента.")
            if bad:
                rec.append("Плохие варианты лучше переписать: попробуйте другой заголовок/обещание, проверьте ограничения и соответствие сегменту.")
            if ld:
                rec.append("Для части вариантов мало данных. Наберите больше показов/кликов, затем повторите анализ.")
        return {
-            "policy_used": {
+            "policy_used": {"primary_kpi": primary, "direction": direction},
                "mode": policy.get("mode") or "thresholds",
                "primary_kpi": primary_kpi,
                "direction": direction,
                "good_threshold": good_th,
                "ok_threshold": ok_th,
                "min_impressions": min_impr,
                "min_clicks": min_clicks,
                "query_text": policy.get("query_text"),
            },
            "ranking": ranking,
-            "recommendations": rec,
+            "recommendations": ["LLM недоступен — показан детерминированный рейтинг по цели теста."],
        }
    def _llm_analyze(self, objective: str, rows: List[dict], policy: dict) -> dict:
        """
        Ask GigaChat to:
          - compute metrics
          - choose KPI (or respect policy.primary_kpi)
          - rank and set statuses
          - return per-segment metrics table per variant
        """
        primary_hint = (policy.get("primary_kpi") or "").lower().strip() or _kpi_default_for_objective(objective)
        direction_hint = (policy.get("direction") or "").lower().strip() or _direction_for_kpi(primary_hint)
        system = (
            "Ты маркетинговый аналитик A/B тестов. "
            "Тебе дают сырые данные: показы, клики, конверсии, лиды, расход по сегментам и вариантам. "
            "Твоя задача — корректно вычислить метрики CTR/CPC/CR/CPL/CPA, выбрать основной KPI, "
            "ранжировать варианты и выставить статусы good/ok/bad/low_data. "
            "ВАЖНО: соблюдай направление оптимизации: для CPA/CPL/CPC меньше=лучше; для CTR/CR больше=лучше. "
            "Отвечай СТРОГО валидным JSON без лишнего текста."
        )
        user = f"""Цель теста: {objective}
 Подсказка по KPI (если не противоречит здравому смыслу):
 primary_kpi_hint: {primary_hint}
 direction_hint: {direction_hint}
 Политика (может быть частично задана, пороги могут отсутствовать):
 {json.dumps(policy, ensure_ascii=False)}
 Сырые данные rows:
 - variant_id (int)
 - format (str)
 - segment_id (int)
 - segment_name (str)
 - impressions (int)
 - clicks (int)
 - conversions (int)
 - leads (int)
 - spend (float)
 rows:
 {json.dumps(rows, ensure_ascii=False)}
 Нужно вернуть JSON строго формата:
 {{
  "primary_kpi": "cpl|cpa|cpc|ctr|cr",
  "direction": "min|max",
  "ranking": [
    {{
      "rank": 1,
      "variant_id": 1,
      "status": "good|ok|bad|low_data",
      "reason": "коротко почему (1 строка)",
      "kpi_value": 123.45
    }}
  ],
  "variants": [
    {{
      "variant_id": 1,
      "format": "search_ad|social_post|email",
      "segments": [
        {{
          "segment_name": "Группа A",
          "metrics": {{
            "ctr": 0.012,
            "cpc": 40.0,
            "cr": 0.0333,
            "cpl": 480.0,
            "cpa": 1200.0
          }}
        }}
      ],
      "totals": {{
        "ctr": 0.012,
        "cpc": 40.0,
        "cr": 0.0333,
        "cpl": 480.0,
        "cpa": 1200.0
      }}
    }}
  ],
  "recommendations": ["...", "..."]
 }}
 Требования:
 - ranking должен содержать ВСЕ variant_id ровно один раз.
 - rank должен быть 1..N.
 - variants должен содержать ВСЕ variant_id.
 - Метрики считай так:
  CTR = clicks / impressions
  CPC = spend / clicks
  CR  = conversions / clicks
  CPL = spend / leads
  CPA = spend / conversions
 - Если деление невозможно (0 в знаменателе) — ставь null.
 """
        raw = self.llm.invoke(f"{system}\n\n{user}")
        text = raw if isinstance(raw, str) else getattr(raw, "content", str(raw))
        return _extract_json(text)
    def analyze(self, req: dict) -> dict:
        objective = (req.get("objective") or "leads").lower()
        rows = req.get("rows") or []
        policy = req.get("policy") or {}
        if self.llm is None:
            return self._fallback_compute(objective, rows)
        try:
            out = self._llm_analyze(objective, rows, policy)
        except Exception:
            # if LLM fails — return safe deterministic
            return self._fallback_compute(objective, rows)
        # --- SAFETY: enforce correct ordering by returned KPI/direction using kpi_value ---
        primary = (out.get("primary_kpi") or _kpi_default_for_objective(objective)).lower()
        direction = (out.get("direction") or _direction_for_kpi(primary)).lower()
        ranking = out.get("ranking") or []
        # If LLM forgot ranks or order — fix it deterministically by kpi_value
        if isinstance(ranking, list) and ranking:
            # validate uniqueness
            seen = set()
            cleaned = []
            for r in ranking:
                vid = r.get("variant_id")
                if vid in seen:
                    continue
                seen.add(vid)
                cleaned.append(r)
            # sort by kpi_value using direction, None goes last
            def key_fn(r):
                v = r.get("kpi_value")
                if v is None:
                    return (1, inf)
                return (0, float(v) if direction == "min" else -float(v))
            cleaned.sort(key=key_fn)
            for i, r in enumerate(cleaned, start=1):
                r["rank"] = i
            out["ranking"] = cleaned
        # Build response compatible with your current backend/UI expectations:
        # UI сейчас ждёт "ranking": [{..., "segments": [...] }].
        # Мы склеим из out["variants"] + out["ranking"].
        variants_map = {v["variant_id"]: v for v in (out.get("variants") or []) if "variant_id" in v}
        final_ranking = []
        for r in out.get("ranking", []):
            vid = r.get("variant_id")
            v = variants_map.get(vid, {})
            final_ranking.append(
                {
                    "rank": r.get("rank"),
                    "variant_id": vid,
                    "format": v.get("format"),
                    "status": r.get("status"),
                    "reason": r.get("reason"),
                    "kpi": primary,
                    "kpi_value": r.get("kpi_value"),
                    "metrics": v.get("totals") or {},
                    "segments": [
                        {
                            "segment_name": s.get("segment_name"),
                            "metrics": s.get("metrics") or {},
                        }
                        for s in (v.get("segments") or [])
                    ],
                }
            )
        return {
            "policy_used": {"primary_kpi": primary, "direction": direction},
            "ranking": final_ranking,
            "recommendations": out.get("recommendations") or [],
        }