refactor: v2.0 完全解耦 — 阿里云内闭环

- 删除 VOC_DATA_DIR / get_voc_conn（不再跨云直读 SQLite） - 案例 DB 自带 comments 表，自包含所有数据 - 新增 POST /import-voc：通过 VOC 公网 API 导入评论 - VOC_API_BASE 环境变量控制 API 地址 - 新增 httpx 依赖
2026-04-07 19:47:34 +08:00 · 2026-04-07 19:47:34 +08:00 · c5e2a58258
commit c5e2a58258
parent ec8eaa0b36
5 changed files with 194 additions and 191 deletions
--- a/backend/.env.example
+++ b/backend/.env.example
@ -1,13 +1,19 @@
-# LLM（通过 LiteLLM 网关）
+# 黑手党提案后端 v2.0 — 环境变量
+# 完全独立，阿里云内闭环
+
+# VOC 公网 API（跨云只读访问，用于 import-voc）
+VOC_API_BASE=https://brand.brainwork.club/voc/api/research
+
+# LLM 路由（走同机 LiteLLM）
 LITELLM_PROXY_URL=http://127.0.0.1:4000/v1
-LITELLM_MASTER_KEY=
+LITELLM_MASTER_KEY=sk-xxx
+
+# 模型
 MODEL_ID=qwen-plus
+TEMPERATURE=0.1

-# 向量化（DashScope text-embedding-v4）
-DASHSCOPE_API_KEY=
+# DashScope（向量化用）
+DASHSCOPE_API_KEY=sk-xxx

-# 共享 VOC 数据层
-VOC_DATA_DIR=/opt/apps/voc-researcher/data
-
-# 服务
+# 端口
 PORT=8093
--- a/backend/db.py
+++ b/backend/db.py
@ -1,9 +1,11 @@
 """
-黑手党提案 — 数据库管理
+黑手党提案 — 数据库管理（完全独立，阿里云内闭环）

-双库设计：
-  1. 案例 DB（读写）：每个提案案例一个 SQLite，存分析结果
-  2. VOC DB（只读）：读取共享 VOC 数据层的原始评论
+每个提案案例一个 SQLite 文件，自包含所有数据：
+  - case_card：案例元信息
+  - comments：从 VOC API 导入的评论（本地副本）
+  - ude_sentences / ude_clusters：UDE 分析结果
+  - conflicts / proposal_sections：后续阶段
 """
 import os
 import sqlite3
@ -17,10 +19,11 @@ load_dotenv()
 DATA_DIR = Path(__file__).parent / "data"
 DATA_DIR.mkdir(exist_ok=True)

-VOC_DATA_DIR = Path(os.getenv("VOC_DATA_DIR", ""))
+# VOC 公网 API（腾讯云，跨云只读访问）
+VOC_API_BASE = os.getenv("VOC_API_BASE", "https://brand.brainwork.club/voc/api/research")


-# ═══════════ 案例 DB（读写） ═══════════
+# ═══════════ Schema ═══════════

 CASE_SCHEMA = """
 CREATE TABLE IF NOT EXISTS case_card (
@ -29,13 +32,25 @@ CREATE TABLE IF NOT EXISTS case_card (
    focus_product TEXT,
    competitors TEXT,
    voc_research_id TEXT,
+    voc_api_base TEXT,
    created_at TEXT DEFAULT (datetime('now')),
    status TEXT DEFAULT 'draft'
 );

+CREATE TABLE IF NOT EXISTS comments (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    voc_id INTEGER,
+    platform TEXT,
+    text TEXT NOT NULL,
+    like_count INTEGER DEFAULT 0,
+    published_at TEXT,
+    imported_at TEXT DEFAULT (datetime('now')),
+    UNIQUE(voc_id)
+);
+
 CREATE TABLE IF NOT EXISTS ude_sentences (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
-    voc_comment_id INTEGER,
+    comment_id INTEGER REFERENCES comments(id),
    ude_text TEXT NOT NULL,
    confidence REAL DEFAULT 0.5,
    vector TEXT,
@ -73,8 +88,10 @@ CREATE TABLE IF NOT EXISTS proposal_sections (
 """


+# ═══════════ 案例 DB ═══════════
+
 def get_case_conn(case_id: str) -> sqlite3.Connection:
-    """获取案例 DB 连接（读写）"""
+    """获取案例 DB 连接"""
    path = DATA_DIR / f"{case_id}.db"
    if not path.exists():
        raise FileNotFoundError(f"案例 {case_id} 不存在")
@ -93,8 +110,8 @@ def init_case_db(brand_name: str, category: str = "", focus_product: str = "",
    conn.row_factory = sqlite3.Row
    conn.executescript(CASE_SCHEMA)
    conn.execute(
-        "INSERT INTO case_card (brand_name, category, focus_product, competitors, voc_research_id) VALUES (?,?,?,?,?)",
-        (brand_name, category, focus_product, competitors, voc_research_id)
+        "INSERT INTO case_card (brand_name, category, focus_product, competitors, voc_research_id, voc_api_base) VALUES (?,?,?,?,?,?)",
+        (brand_name, category, focus_product, competitors, voc_research_id, VOC_API_BASE)
    )
    conn.commit()
    conn.close()
@ -111,11 +128,13 @@ def list_cases() -> list[dict]:
            conn.row_factory = sqlite3.Row
            card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone()
            if card:
+                comment_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
                ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
                cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0]
                cases.append({
                    "case_id": case_id,
                    **dict(card),
+                    "comment_count": comment_count,
                    "ude_count": ude_count,
                    "cluster_count": cluster_count,
                })
@ -123,45 +142,3 @@ def list_cases() -> list[dict]:
        except Exception:
            pass
    return cases
-
-
-# ═══════════ VOC DB（只读） ═══════════
-
-def get_voc_conn(voc_research_id: str) -> sqlite3.Connection:
-    """只读访问共享 VOC 数据"""
-    if not VOC_DATA_DIR.exists():
-        raise FileNotFoundError(f"VOC 数据目录不存在: {VOC_DATA_DIR}")
-    path = VOC_DATA_DIR / f"{voc_research_id}.db"
-    if not path.exists():
-        raise FileNotFoundError(f"VOC 研究 {voc_research_id} 不存在")
-    conn = sqlite3.connect(f"file:{path}?mode=ro", uri=True)
-    conn.row_factory = sqlite3.Row
-    return conn
-
-
-def list_voc_researches() -> list[dict]:
-    """列出共享 VOC 数据层中的所有研究"""
-    if not VOC_DATA_DIR.exists():
-        return []
-    researches = []
-    for db_file in sorted(VOC_DATA_DIR.glob("*.db")):
-        if db_file.name in ("global_cache.db", "agent_sessions.db"):
-            continue
-        rid = db_file.stem
-        try:
-            conn = sqlite3.connect(f"file:{db_file}?mode=ro", uri=True)
-            conn.row_factory = sqlite3.Row
-            card = conn.execute("SELECT brand_name FROM research_card LIMIT 1").fetchone()
-            comment_count = conn.execute(
-                "SELECT count(*) FROM comments WHERE length(text) > 10"
-            ).fetchone()[0]
-            conn.close()
-            if card and comment_count > 0:
-                researches.append({
-                    "research_id": rid,
-                    "brand_name": card["brand_name"],
-                    "comment_count": comment_count,
-                })
-        except Exception:
-            pass
-    return researches
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -5,3 +5,4 @@ python-dotenv>=1.0.0
 numpy>=1.24.0
 scikit-learn>=1.3.0
 gunicorn>=21.2.0
+httpx>=0.27.0
--- a/backend/server.py
+++ b/backend/server.py
@ -1,12 +1,12 @@
 """
-黑手党提案 — 独立后端
+黑手党提案 — 独立后端（阿里云内闭环）

 FastAPI 服务，端口 8093。
-数据来源：只读访问共享 VOC 数据层。
-分析结果：存自己的案例 DB。
+VOC 数据通过公网 API 导入，不直读 VOC DB。
 """
 import os
 import logging
+import httpx

 from fastapi import FastAPI, Header, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
@ -15,15 +15,12 @@ from dotenv import load_dotenv

 load_dotenv()

-from db import (
-    get_case_conn, get_voc_conn, init_case_db,
-    list_cases as _list_cases, list_voc_researches as _list_voc_researches,
-)
+from db import get_case_conn, init_case_db, list_cases as _list_cases, DATA_DIR, VOC_API_BASE

 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
 logger = logging.getLogger("mafia")

-app = FastAPI(title="黑手党提案后端", version="1.0.0", description="独立后端：共享 VOC 数据层 + 自有分析存储")
+app = FastAPI(title="黑手党提案后端", version="2.0.0", description="独立后端：阿里云内闭环，VOC 通过 API 导入")
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
@ -70,18 +67,23 @@ async def get_case(case_id: str):
    try:
        with get_case_conn(case_id) as conn:
            card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone()
+            comment_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
            ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
            cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0]
        if not card:
            raise HTTPException(404, "案例不存在")
-        return {"caseId": case_id, **dict(card), "udeCount": ude_count, "clusterCount": cluster_count}
+        return {
+            "caseId": case_id, **dict(card),
+            "commentCount": comment_count,
+            "udeCount": ude_count,
+            "clusterCount": cluster_count,
+        }
    except FileNotFoundError:
        raise HTTPException(404, "案例不存在")


@app.delete("/api/cases/{case_id}")
 async def delete_case(case_id: str):
-    from db import DATA_DIR
    path = DATA_DIR / f"{case_id}.db"
    if path.exists():
        path.unlink()
@ -89,60 +91,107 @@ async def delete_case(case_id: str):
    raise HTTPException(404, "案例不存在")


-# ═══════════ VOC 关联 ═══════════
+# ═══════════ VOC 导入（跨云 API） ═══════════

@app.post("/api/cases/{case_id}/link-voc")
 async def link_voc(case_id: str, req: LinkVocRequest):
-    """关联 VOC 研究 ID（验证 VOC 研究存在后再写入）"""
-    try:
-        with get_voc_conn(req.vocResearchId) as voc:
-            count = voc.execute(
-                "SELECT count(*) FROM comments WHERE length(text) > 10 "
-            ).fetchone()[0]
-    except FileNotFoundError as e:
-        raise HTTPException(404, str(e))
-
+    """关联 VOC 研究 ID"""
    try:
        with get_case_conn(case_id) as conn:
            conn.execute("UPDATE case_card SET voc_research_id = ?", (req.vocResearchId,))
            conn.commit()
    except FileNotFoundError:
        raise HTTPException(404, "案例不存在")
-
-    return {"linked": True, "vocCommentCount": count}
+    return {"linked": True, "vocResearchId": req.vocResearchId}


-@app.get("/api/voc/researches")
-async def get_voc_researches():
-    return _list_voc_researches()
-
-
-@app.get("/api/cases/{case_id}/voc-comments")
-async def get_voc_comments(case_id: str, page: int = 1, pageSize: int = 50):
-    """从共享 VOC 数据层只读获取原始评论"""
+@app.post("/api/cases/{case_id}/import-voc")
+async def import_voc(case_id: str, page: int = Query(1), pageSize: int = Query(100)):
+    """从 VOC 公网 API 拉取评论数据，存入本地案例 DB"""
    try:
        with get_case_conn(case_id) as conn:
-            card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
+            card = conn.execute("SELECT voc_research_id, voc_api_base FROM case_card LIMIT 1").fetchone()
    except FileNotFoundError:
        raise HTTPException(404, "案例不存在")

    if not card or not card["voc_research_id"]:
-        raise HTTPException(400, "未关联 VOC 研究")
+        raise HTTPException(400, "未关联 VOC 研究，请先调用 link-voc")

+    voc_rid = card["voc_research_id"]
+    api_base = card["voc_api_base"] or VOC_API_BASE
+
+    # 从 VOC API 拉取（只读，不需要 TikHub Key）
+    total_imported = 0
+    current_page = page
+
+    async with httpx.AsyncClient(timeout=30) as client:
+        while True:
+            url = f"{api_base}/{voc_rid}/voc-list?page={current_page}&page_size={pageSize}"
+            try:
+                resp = await client.get(url)
+                if resp.status_code != 200:
+                    logger.warning(f"[Import] VOC API 返回 {resp.status_code}: {resp.text[:100]}")
+                    break
+                data = resp.json()
+            except Exception as e:
+                logger.error(f"[Import] VOC API 请求失败: {e}")
+                break
+
+            items = data.get("items") or data.get("data") or []
+            if not items:
+                break
+
+            with get_case_conn(case_id) as conn:
+                for item in items:
+                    text = item.get("text", "")
+                    if len(text) < 10:
+                        continue
+                    try:
+                        conn.execute(
+                            "INSERT OR IGNORE INTO comments (voc_id, platform, text, like_count, published_at) VALUES (?,?,?,?,?)",
+                            (
+                                item.get("id"),
+                                item.get("platform", ""),
+                                text,
+                                item.get("like_count", 0),
+                                item.get("published_at", ""),
+                            )
+                        )
+                        total_imported += 1
+                    except Exception:
+                        pass
+                conn.commit()
+
+            total = data.get("total", 0)
+            if current_page * pageSize >= total:
+                break
+            current_page += 1
+
+    # 更新统计
+    with get_case_conn(case_id) as conn:
+        local_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
+
+    return {
+        "imported": total_imported,
+        "totalLocal": local_count,
+        "vocResearchId": voc_rid,
+        "pagesProcessed": current_page - page + 1,
+    }
+
+
+@app.get("/api/cases/{case_id}/comments")
+async def get_comments(case_id: str, page: int = 1, pageSize: int = 50):
+    """查看本地导入的评论"""
    try:
-        with get_voc_conn(card["voc_research_id"]) as voc:
-            total = voc.execute(
-                "SELECT count(*) FROM comments WHERE length(text) > 10 "
-            ).fetchone()[0]
-            rows = voc.execute("""
-                SELECT id, platform, text, like_count, published_at
-                FROM comments WHERE length(text) > 10 
-                ORDER BY like_count DESC
+        with get_case_conn(case_id) as conn:
+            total = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
+            rows = conn.execute("""
+                SELECT id, voc_id, platform, text, like_count, published_at
+                FROM comments ORDER BY like_count DESC
                LIMIT ? OFFSET ?
            """, (pageSize, (page - 1) * pageSize)).fetchall()
-    except FileNotFoundError as e:
-        raise HTTPException(404, str(e))
-
+    except FileNotFoundError:
+        raise HTTPException(404, "案例不存在")
    return {"total": total, "page": page, "items": [dict(r) for r in rows]}


@ -200,11 +249,11 @@ async def get_coverage(case_id: str):

@app.get("/api/health")
 async def health():
-    from db import VOC_DATA_DIR, DATA_DIR
    return {
        "status": "ok",
-        "vocDataDir": str(VOC_DATA_DIR),
-        "vocDataExists": VOC_DATA_DIR.exists(),
+        "version": "2.0.0",
+        "architecture": "independent (Aliyun self-contained)",
+        "vocApiBase": VOC_API_BASE,
        "caseDataDir": str(DATA_DIR),
    }

--- a/backend/tools/ude_extract.py
+++ b/backend/tools/ude_extract.py
@ -1,10 +1,9 @@
 """
-黑手党提案 — UDE 提取工具
+黑手党提案 — UDE 提取工具（阿里云内闭环）

-流程：VOC 原始评论 → LLM 转写 UDE → DashScope 向量化 → DBSCAN 聚类 → 覆盖扫描
+流程：本地 comments → LLM 转写 UDE → DashScope 向量化 → DBSCAN 聚类

-数据来源：只读访问共享 VOC 数据层
-分析结果：写入本项目的案例 DB
+所有数据读写都在案例 DB 内，不跨云。
 """
 from __future__ import annotations

@ -47,7 +46,7 @@ def _get_embed_client(key: str) -> OpenAI:
    )


-# ═══════════ Step 1: VOC → UDE 转写 ═══════════
+# ═══════════ Step 1: 本地评论 → UDE 转写 ═══════════

 async def _call_ude_llm(prompt: str, comments: list[dict]) -> list[dict]:
    """单批 LLM 转写"""
@ -87,40 +86,33 @@ async def _process_ude_batch(comments, prompt, semaphore):


 async def run_ude_extraction(case_id: str, limit: int = 0) -> dict:
-    """从共享 VOC 数据读取原始评论，转写为 UDE，存入案例 DB"""
-    from db import get_case_conn, get_voc_conn
+    """从本地 comments 表读取评论，转写为 UDE，存入 ude_sentences"""
+    from db import get_case_conn

    prompt = PROMPT_PATH.read_text("utf-8") if PROMPT_PATH.exists() else ""
    if not prompt:
        return {"error": "UDE 转写 prompt 未找到 (prompts/voc_to_ude.txt)"}

-    with get_case_conn(case_id) as case_conn:
-        card = case_conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
-        if not card or not card["voc_research_id"]:
-            return {"error": "未关联 VOC 研究。请先调用 link-voc。"}
-
-        voc_research_id = card["voc_research_id"]
-
-        # 获取已转写的 voc_comment_ids
-        done_ids = {r[0] for r in case_conn.execute(
-            "SELECT voc_comment_id FROM ude_sentences"
+    with get_case_conn(case_id) as conn:
+        # 获取已转写的 comment_ids
+        done_ids = {r[0] for r in conn.execute(
+            "SELECT comment_id FROM ude_sentences"
        ).fetchall()}

-    # 从 VOC DB 只读获取原始评论
-    with get_voc_conn(voc_research_id) as voc_conn:
-        rows = voc_conn.execute("""
+        # 从本地 comments 表读取
+        rows = conn.execute("""
            SELECT id, platform, text
-            FROM comments
-            WHERE length(text) > 10 
+            FROM comments WHERE length(text) > 10
            ORDER BY id
        """).fetchall()

-    # 过滤已完成的
+    total_comments = len(rows)
    pending = [r for r in rows if r["id"] not in done_ids]
+
    if not pending:
        with get_case_conn(case_id) as conn:
            total = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
-        return {"message": "全部已转写完成", "total_udes": total, "new": 0}
+        return {"message": "全部已转写完成", "totalUdes": total, "new": 0}

    if limit > 0:
        pending = pending[:limit]
@ -137,7 +129,7 @@ async def run_ude_extraction(case_id: str, limit: int = 0) -> dict:

    # 写入案例 DB
    ok = 0
-    with get_case_conn(case_id) as case_conn:
+    with get_case_conn(case_id) as conn:
        for results in all_results:
            for r in (results or []):
                if not isinstance(r, dict):
@ -149,21 +141,21 @@ async def run_ude_extraction(case_id: str, limit: int = 0) -> dict:
                if not cid:
                    continue
                try:
-                    case_conn.execute(
-                        "INSERT OR IGNORE INTO ude_sentences (voc_comment_id, ude_text, confidence) VALUES (?, ?, ?)",
+                    conn.execute(
+                        "INSERT OR IGNORE INTO ude_sentences (comment_id, ude_text, confidence) VALUES (?, ?, ?)",
                        (int(cid), ude_text, r.get("confidence", 0.5))
                    )
                    ok += 1
                except Exception as e:
                    logger.warning(f"[UDE] 写入失败 id={cid}: {e}")
-        case_conn.commit()
-        total = case_conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
+        conn.commit()
+        total = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]

    return {
-        "new_udes": ok,
-        "total_udes": total,
-        "total_voc_comments": len(rows),
-        "remaining": len(rows) - total,
+        "newUdes": ok,
+        "totalUdes": total,
+        "totalComments": total_comments,
+        "remaining": total_comments - total,
        "batches": len(batches),
    }

@ -181,10 +173,10 @@ def _embed_texts(client: OpenAI, texts: list[str]) -> list[list[float]]:

 def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
                   dashscope_key: str = None) -> dict:
-    """向量化 + DBSCAN 聚类"""
+    """向量化 + DBSCAN 聚类（全部在本地案例 DB 内）"""
    from sklearn.cluster import DBSCAN
    from sklearn.metrics.pairwise import cosine_distances
-    from db import get_case_conn, get_voc_conn
+    from db import get_case_conn

    key = dashscope_key or os.getenv("DASHSCOPE_API_KEY", "")
    if not key:
@ -193,13 +185,13 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
    embed_client = _get_embed_client(key)

    with get_case_conn(case_id) as conn:
-        rows = conn.execute("SELECT id, voc_comment_id, ude_text FROM ude_sentences ORDER BY id").fetchall()
+        rows = conn.execute("SELECT id, comment_id, ude_text FROM ude_sentences ORDER BY id").fetchall()
        if len(rows) < min_samples:
            return {"error": f"UDE 不足 ({len(rows)} 条)，至少需要 {min_samples} 条。"}

        ude_texts = [r["ude_text"] for r in rows]
        ude_ids = [r["id"] for r in rows]
-        comment_ids = [r["voc_comment_id"] for r in rows]
+        comment_ids = [r["comment_id"] for r in rows]

        # 向量化
        vectors = _embed_texts(embed_client, ude_texts)
@ -223,10 +215,6 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
        # 清空旧聚类，写入新聚类
        conn.execute("DELETE FROM ude_clusters")

-        # 获取关联的 VOC research_id 用于读取原声
-        card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
-        voc_rid = card["voc_research_id"] if card else None
-
        clusters = []
        unique_labels = sorted(set(labels) - {-1})

@ -241,30 +229,24 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
            dists = cosine_distances([centroid], member_vectors)[0]
            representative = member_texts[dists.argmin()]

-            # 取原声
+            # 原声采样（从本地 comments 表）
            sample_voices = []
-            if voc_rid:
-                try:
-                    voc_conn = get_voc_conn(voc_rid)
-                    for cid in member_cids[:5]:
-                        voice = voc_conn.execute(
-                            "SELECT text, platform FROM comments WHERE id = ?", (cid,)
-                        ).fetchone()
-                        if voice:
-                            sample_voices.append({"text": voice["text"][:200], "platform": voice["platform"]})
-                    voc_conn.close()
-                except Exception:
-                    pass
+            for cid in member_cids[:5]:
+                voice = conn.execute(
+                    "SELECT text, platform FROM comments WHERE id = ?", (cid,)
+                ).fetchone()
+                if voice:
+                    sample_voices.append({"text": voice["text"][:200], "platform": voice["platform"]})

            conn.execute(
                "INSERT INTO ude_clusters (representative_ude, coverage, sample_voices) VALUES (?, ?, ?)",
                (representative, len(member_indices), json.dumps(sample_voices, ensure_ascii=False))
            )
            clusters.append({
-                "cluster_id": int(cluster_id),
-                "representative_ude": representative,
+                "clusterId": int(cluster_id),
+                "representativeUde": representative,
                "coverage": len(member_indices),
-                "sample_voices": sample_voices,
+                "sampleVoices": sample_voices,
            })

        conn.commit()
@ -272,24 +254,22 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
        noise_count = int((labels == -1).sum())

    return {
-        "total_udes": len(labels),
-        "num_clusters": len(clusters),
-        "noise_count": noise_count,
-        "noise_pct": round(noise_count / len(labels) * 100, 1) if len(labels) else 0,
+        "totalUdes": len(labels),
+        "numClusters": len(clusters),
+        "noiseCount": noise_count,
+        "noisePct": round(noise_count / len(labels) * 100, 1) if len(labels) else 0,
        "clusters": clusters,
-        "params": {"eps": eps, "min_samples": min_samples},
+        "params": {"eps": eps, "minSamples": min_samples},
    }


-# ═══════════ Step 5: 覆盖扫描 ═══════════
+# ═══════════ 覆盖扫描 ═══════════

 def run_coverage_scan(case_id: str) -> dict:
-    from db import get_case_conn, get_voc_conn
+    from db import get_case_conn

    with get_case_conn(case_id) as conn:
-        card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
-        voc_rid = card["voc_research_id"] if card else None
-
+        total_comments = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
        total_udes = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
        clustered = conn.execute("SELECT count(*) FROM ude_sentences WHERE cluster_id >= 0").fetchone()[0]
        noise = conn.execute("SELECT count(*) FROM ude_sentences WHERE cluster_id = -1").fetchone()[0]
@ -299,27 +279,17 @@ def run_coverage_scan(case_id: str) -> dict:
        ).fetchall()]

        noise_samples = [dict(r) for r in conn.execute(
-            "SELECT ude_text, voc_comment_id, confidence FROM ude_sentences WHERE cluster_id = -1 ORDER BY confidence DESC LIMIT 10"
+            "SELECT ude_text, comment_id, confidence FROM ude_sentences WHERE cluster_id = -1 ORDER BY confidence DESC LIMIT 10"
        ).fetchall()]

-    total_voc = 0
-    if voc_rid:
-        try:
-            with get_voc_conn(voc_rid) as voc:
-                total_voc = voc.execute(
-                    "SELECT count(*) FROM comments WHERE length(text) > 10 "
-                ).fetchone()[0]
-        except Exception:
-            pass
-
    return {
-        "total_voc_comments": total_voc,
-        "total_udes": total_udes,
-        "udes_clustered": clustered,
-        "udes_noise": noise,
-        "coverage_rate": round(clustered / total_voc * 100, 1) if total_voc else 0,
-        "cluster_distribution": cluster_stats,
-        "noise_samples": noise_samples,
+        "totalComments": total_comments,
+        "totalUdes": total_udes,
+        "udesClustered": clustered,
+        "udesNoise": noise,
+        "coverageRate": round(clustered / total_comments * 100, 1) if total_comments else 0,
+        "clusterDistribution": cluster_stats,
+        "noiseSamples": noise_samples,
        "verdict": "充分" if (total_udes > 0 and noise / total_udes < 0.1) else
                   ("需关注" if (total_udes > 0 and noise / total_udes < 0.2) else "需调参"),
    }