From c5e2a58258c330ba4b1f1cc263bafc0f2ff0ec1c Mon Sep 17 00:00:00 2001
From: lidf <lidf@brainwork.club>
Date: Tue, 7 Apr 2026 19:47:34 +0800
Subject: [PATCH] =?UTF-8?q?refactor:=20v2.0=20=E5=AE=8C=E5=85=A8=E8=A7=A3?=
 =?UTF-8?q?=E8=80=A6=20=E2=80=94=20=E9=98=BF=E9=87=8C=E4=BA=91=E5=86=85?=
 =?UTF-8?q?=E9=97=AD=E7=8E=AF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 删除 VOC_DATA_DIR / get_voc_conn（不再跨云直读 SQLite）
- 案例 DB 自带 comments 表，自包含所有数据
- 新增 POST /import-voc：通过 VOC 公网 API 导入评论
- VOC_API_BASE 环境变量控制 API 地址
- 新增 httpx 依赖
---
 backend/.env.example         |  22 ++++--
 backend/db.py                |  81 +++++++-------------
 backend/requirements.txt     |   1 +
 backend/server.py            | 141 +++++++++++++++++++++++------------
 backend/tools/ude_extract.py | 140 ++++++++++++++--------------------
 5 files changed, 194 insertions(+), 191 deletions(-)

diff --git a/backend/.env.example b/backend/.env.example
index e972ce0..bcbc865 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -1,13 +1,19 @@
-# LLM（通过 LiteLLM 网关）
+# 黑手党提案后端 v2.0 — 环境变量
+# 完全独立，阿里云内闭环
+
+# VOC 公网 API（跨云只读访问，用于 import-voc）
+VOC_API_BASE=https://brand.brainwork.club/voc/api/research
+
+# LLM 路由（走同机 LiteLLM）
 LITELLM_PROXY_URL=http://127.0.0.1:4000/v1
-LITELLM_MASTER_KEY=
+LITELLM_MASTER_KEY=sk-xxx
+
+# 模型
 MODEL_ID=qwen-plus
+TEMPERATURE=0.1
 
-# 向量化（DashScope text-embedding-v4）
-DASHSCOPE_API_KEY=
+# DashScope（向量化用）
+DASHSCOPE_API_KEY=sk-xxx
 
-# 共享 VOC 数据层
-VOC_DATA_DIR=/opt/apps/voc-researcher/data
-
-# 服务
+# 端口
 PORT=8093
diff --git a/backend/db.py b/backend/db.py
index ce4a8ee..1f04293 100644
--- a/backend/db.py
+++ b/backend/db.py
@@ -1,9 +1,11 @@
 """
-黑手党提案 — 数据库管理
+黑手党提案 — 数据库管理（完全独立，阿里云内闭环）
 
-双库设计：
-  1. 案例 DB（读写）：每个提案案例一个 SQLite，存分析结果
-  2. VOC DB（只读）：读取共享 VOC 数据层的原始评论
+每个提案案例一个 SQLite 文件，自包含所有数据：
+  - case_card：案例元信息
+  - comments：从 VOC API 导入的评论（本地副本）
+  - ude_sentences / ude_clusters：UDE 分析结果
+  - conflicts / proposal_sections：后续阶段
 """
 import os
 import sqlite3
@@ -17,10 +19,11 @@ load_dotenv()
 DATA_DIR = Path(__file__).parent / "data"
 DATA_DIR.mkdir(exist_ok=True)
 
-VOC_DATA_DIR = Path(os.getenv("VOC_DATA_DIR", ""))
+# VOC 公网 API（腾讯云，跨云只读访问）
+VOC_API_BASE = os.getenv("VOC_API_BASE", "https://brand.brainwork.club/voc/api/research")
 
 
-# ═══════════ 案例 DB（读写） ═══════════
+# ═══════════ Schema ═══════════
 
 CASE_SCHEMA = """
 CREATE TABLE IF NOT EXISTS case_card (
@@ -29,13 +32,25 @@ CREATE TABLE IF NOT EXISTS case_card (
     focus_product TEXT,
     competitors TEXT,
     voc_research_id TEXT,
+    voc_api_base TEXT,
     created_at TEXT DEFAULT (datetime('now')),
     status TEXT DEFAULT 'draft'
 );
 
+CREATE TABLE IF NOT EXISTS comments (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    voc_id INTEGER,
+    platform TEXT,
+    text TEXT NOT NULL,
+    like_count INTEGER DEFAULT 0,
+    published_at TEXT,
+    imported_at TEXT DEFAULT (datetime('now')),
+    UNIQUE(voc_id)
+);
+
 CREATE TABLE IF NOT EXISTS ude_sentences (
     id INTEGER PRIMARY KEY AUTOINCREMENT,
-    voc_comment_id INTEGER,
+    comment_id INTEGER REFERENCES comments(id),
     ude_text TEXT NOT NULL,
     confidence REAL DEFAULT 0.5,
     vector TEXT,
@@ -73,8 +88,10 @@ CREATE TABLE IF NOT EXISTS proposal_sections (
 """
 
 
+# ═══════════ 案例 DB ═══════════
+
 def get_case_conn(case_id: str) -> sqlite3.Connection:
-    """获取案例 DB 连接（读写）"""
+    """获取案例 DB 连接"""
     path = DATA_DIR / f"{case_id}.db"
     if not path.exists():
         raise FileNotFoundError(f"案例 {case_id} 不存在")
@@ -93,8 +110,8 @@ def init_case_db(brand_name: str, category: str = "", focus_product: str = "",
     conn.row_factory = sqlite3.Row
     conn.executescript(CASE_SCHEMA)
     conn.execute(
-        "INSERT INTO case_card (brand_name, category, focus_product, competitors, voc_research_id) VALUES (?,?,?,?,?)",
-        (brand_name, category, focus_product, competitors, voc_research_id)
+        "INSERT INTO case_card (brand_name, category, focus_product, competitors, voc_research_id, voc_api_base) VALUES (?,?,?,?,?,?)",
+        (brand_name, category, focus_product, competitors, voc_research_id, VOC_API_BASE)
     )
     conn.commit()
     conn.close()
@@ -111,11 +128,13 @@ def list_cases() -> list[dict]:
             conn.row_factory = sqlite3.Row
             card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone()
             if card:
+                comment_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
                 ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
                 cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0]
                 cases.append({
                     "case_id": case_id,
                     **dict(card),
+                    "comment_count": comment_count,
                     "ude_count": ude_count,
                     "cluster_count": cluster_count,
                 })
@@ -123,45 +142,3 @@ def list_cases() -> list[dict]:
         except Exception:
             pass
     return cases
-
-
-# ═══════════ VOC DB（只读） ═══════════
-
-def get_voc_conn(voc_research_id: str) -> sqlite3.Connection:
-    """只读访问共享 VOC 数据"""
-    if not VOC_DATA_DIR.exists():
-        raise FileNotFoundError(f"VOC 数据目录不存在: {VOC_DATA_DIR}")
-    path = VOC_DATA_DIR / f"{voc_research_id}.db"
-    if not path.exists():
-        raise FileNotFoundError(f"VOC 研究 {voc_research_id} 不存在")
-    conn = sqlite3.connect(f"file:{path}?mode=ro", uri=True)
-    conn.row_factory = sqlite3.Row
-    return conn
-
-
-def list_voc_researches() -> list[dict]:
-    """列出共享 VOC 数据层中的所有研究"""
-    if not VOC_DATA_DIR.exists():
-        return []
-    researches = []
-    for db_file in sorted(VOC_DATA_DIR.glob("*.db")):
-        if db_file.name in ("global_cache.db", "agent_sessions.db"):
-            continue
-        rid = db_file.stem
-        try:
-            conn = sqlite3.connect(f"file:{db_file}?mode=ro", uri=True)
-            conn.row_factory = sqlite3.Row
-            card = conn.execute("SELECT brand_name FROM research_card LIMIT 1").fetchone()
-            comment_count = conn.execute(
-                "SELECT count(*) FROM comments WHERE length(text) > 10"
-            ).fetchone()[0]
-            conn.close()
-            if card and comment_count > 0:
-                researches.append({
-                    "research_id": rid,
-                    "brand_name": card["brand_name"],
-                    "comment_count": comment_count,
-                })
-        except Exception:
-            pass
-    return researches
diff --git a/backend/requirements.txt b/backend/requirements.txt
index a4e8c28..6bd9960 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -5,3 +5,4 @@ python-dotenv>=1.0.0
 numpy>=1.24.0
 scikit-learn>=1.3.0
 gunicorn>=21.2.0
+httpx>=0.27.0
diff --git a/backend/server.py b/backend/server.py
index 88bc463..d5296ab 100644
--- a/backend/server.py
+++ b/backend/server.py
@@ -1,12 +1,12 @@
 """
-黑手党提案 — 独立后端
+黑手党提案 — 独立后端（阿里云内闭环）
 
 FastAPI 服务，端口 8093。
-数据来源：只读访问共享 VOC 数据层。
-分析结果：存自己的案例 DB。
+VOC 数据通过公网 API 导入，不直读 VOC DB。
 """
 import os
 import logging
+import httpx
 
 from fastapi import FastAPI, Header, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
@@ -15,15 +15,12 @@ from dotenv import load_dotenv
 
 load_dotenv()
 
-from db import (
-    get_case_conn, get_voc_conn, init_case_db,
-    list_cases as _list_cases, list_voc_researches as _list_voc_researches,
-)
+from db import get_case_conn, init_case_db, list_cases as _list_cases, DATA_DIR, VOC_API_BASE
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
 logger = logging.getLogger("mafia")
 
-app = FastAPI(title="黑手党提案后端", version="1.0.0", description="独立后端：共享 VOC 数据层 + 自有分析存储")
+app = FastAPI(title="黑手党提案后端", version="2.0.0", description="独立后端：阿里云内闭环，VOC 通过 API 导入")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -70,18 +67,23 @@ async def get_case(case_id: str):
     try:
         with get_case_conn(case_id) as conn:
             card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone()
+            comment_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
             ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
             cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0]
         if not card:
             raise HTTPException(404, "案例不存在")
-        return {"caseId": case_id, **dict(card), "udeCount": ude_count, "clusterCount": cluster_count}
+        return {
+            "caseId": case_id, **dict(card),
+            "commentCount": comment_count,
+            "udeCount": ude_count,
+            "clusterCount": cluster_count,
+        }
     except FileNotFoundError:
         raise HTTPException(404, "案例不存在")
 
 
 @app.delete("/api/cases/{case_id}")
 async def delete_case(case_id: str):
-    from db import DATA_DIR
     path = DATA_DIR / f"{case_id}.db"
     if path.exists():
         path.unlink()
@@ -89,60 +91,107 @@ async def delete_case(case_id: str):
     raise HTTPException(404, "案例不存在")
 
 
-# ═══════════ VOC 关联 ═══════════
+# ═══════════ VOC 导入（跨云 API） ═══════════
 
 @app.post("/api/cases/{case_id}/link-voc")
 async def link_voc(case_id: str, req: LinkVocRequest):
-    """关联 VOC 研究 ID（验证 VOC 研究存在后再写入）"""
-    try:
-        with get_voc_conn(req.vocResearchId) as voc:
-            count = voc.execute(
-                "SELECT count(*) FROM comments WHERE length(text) > 10 "
-            ).fetchone()[0]
-    except FileNotFoundError as e:
-        raise HTTPException(404, str(e))
-
+    """关联 VOC 研究 ID"""
     try:
         with get_case_conn(case_id) as conn:
             conn.execute("UPDATE case_card SET voc_research_id = ?", (req.vocResearchId,))
             conn.commit()
     except FileNotFoundError:
         raise HTTPException(404, "案例不存在")
-
-    return {"linked": True, "vocCommentCount": count}
+    return {"linked": True, "vocResearchId": req.vocResearchId}
 
 
-@app.get("/api/voc/researches")
-async def get_voc_researches():
-    return _list_voc_researches()
-
-
-@app.get("/api/cases/{case_id}/voc-comments")
-async def get_voc_comments(case_id: str, page: int = 1, pageSize: int = 50):
-    """从共享 VOC 数据层只读获取原始评论"""
+@app.post("/api/cases/{case_id}/import-voc")
+async def import_voc(case_id: str, page: int = Query(1), pageSize: int = Query(100)):
+    """从 VOC 公网 API 拉取评论数据，存入本地案例 DB"""
     try:
         with get_case_conn(case_id) as conn:
-            card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
+            card = conn.execute("SELECT voc_research_id, voc_api_base FROM case_card LIMIT 1").fetchone()
     except FileNotFoundError:
         raise HTTPException(404, "案例不存在")
 
     if not card or not card["voc_research_id"]:
-        raise HTTPException(400, "未关联 VOC 研究")
+        raise HTTPException(400, "未关联 VOC 研究，请先调用 link-voc")
 
+    voc_rid = card["voc_research_id"]
+    api_base = card["voc_api_base"] or VOC_API_BASE
+
+    # 从 VOC API 拉取（只读，不需要 TikHub Key）
+    total_imported = 0
+    current_page = page
+
+    async with httpx.AsyncClient(timeout=30) as client:
+        while True:
+            url = f"{api_base}/{voc_rid}/voc-list?page={current_page}&page_size={pageSize}"
+            try:
+                resp = await client.get(url)
+                if resp.status_code != 200:
+                    logger.warning(f"[Import] VOC API 返回 {resp.status_code}: {resp.text[:100]}")
+                    break
+                data = resp.json()
+            except Exception as e:
+                logger.error(f"[Import] VOC API 请求失败: {e}")
+                break
+
+            items = data.get("items") or data.get("data") or []
+            if not items:
+                break
+
+            with get_case_conn(case_id) as conn:
+                for item in items:
+                    text = item.get("text", "")
+                    if len(text) < 10:
+                        continue
+                    try:
+                        conn.execute(
+                            "INSERT OR IGNORE INTO comments (voc_id, platform, text, like_count, published_at) VALUES (?,?,?,?,?)",
+                            (
+                                item.get("id"),
+                                item.get("platform", ""),
+                                text,
+                                item.get("like_count", 0),
+                                item.get("published_at", ""),
+                            )
+                        )
+                        total_imported += 1
+                    except Exception:
+                        pass
+                conn.commit()
+
+            total = data.get("total", 0)
+            if current_page * pageSize >= total:
+                break
+            current_page += 1
+
+    # 更新统计
+    with get_case_conn(case_id) as conn:
+        local_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
+
+    return {
+        "imported": total_imported,
+        "totalLocal": local_count,
+        "vocResearchId": voc_rid,
+        "pagesProcessed": current_page - page + 1,
+    }
+
+
+@app.get("/api/cases/{case_id}/comments")
+async def get_comments(case_id: str, page: int = 1, pageSize: int = 50):
+    """查看本地导入的评论"""
     try:
-        with get_voc_conn(card["voc_research_id"]) as voc:
-            total = voc.execute(
-                "SELECT count(*) FROM comments WHERE length(text) > 10 "
-            ).fetchone()[0]
-            rows = voc.execute("""
-                SELECT id, platform, text, like_count, published_at
-                FROM comments WHERE length(text) > 10 
-                ORDER BY like_count DESC
+        with get_case_conn(case_id) as conn:
+            total = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
+            rows = conn.execute("""
+                SELECT id, voc_id, platform, text, like_count, published_at
+                FROM comments ORDER BY like_count DESC
                 LIMIT ? OFFSET ?
             """, (pageSize, (page - 1) * pageSize)).fetchall()
-    except FileNotFoundError as e:
-        raise HTTPException(404, str(e))
-
+    except FileNotFoundError:
+        raise HTTPException(404, "案例不存在")
     return {"total": total, "page": page, "items": [dict(r) for r in rows]}
 
 
@@ -200,11 +249,11 @@ async def get_coverage(case_id: str):
 
 @app.get("/api/health")
 async def health():
-    from db import VOC_DATA_DIR, DATA_DIR
     return {
         "status": "ok",
-        "vocDataDir": str(VOC_DATA_DIR),
-        "vocDataExists": VOC_DATA_DIR.exists(),
+        "version": "2.0.0",
+        "architecture": "independent (Aliyun self-contained)",
+        "vocApiBase": VOC_API_BASE,
         "caseDataDir": str(DATA_DIR),
     }
 
diff --git a/backend/tools/ude_extract.py b/backend/tools/ude_extract.py
index d55c639..7622a89 100644
--- a/backend/tools/ude_extract.py
+++ b/backend/tools/ude_extract.py
@@ -1,10 +1,9 @@
 """
-黑手党提案 — UDE 提取工具
+黑手党提案 — UDE 提取工具（阿里云内闭环）
 
-流程：VOC 原始评论 → LLM 转写 UDE → DashScope 向量化 → DBSCAN 聚类 → 覆盖扫描
+流程：本地 comments → LLM 转写 UDE → DashScope 向量化 → DBSCAN 聚类
 
-数据来源：只读访问共享 VOC 数据层
-分析结果：写入本项目的案例 DB
+所有数据读写都在案例 DB 内，不跨云。
 """
 from __future__ import annotations
 
@@ -47,7 +46,7 @@ def _get_embed_client(key: str) -> OpenAI:
     )
 
 
-# ═══════════ Step 1: VOC → UDE 转写 ═══════════
+# ═══════════ Step 1: 本地评论 → UDE 转写 ═══════════
 
 async def _call_ude_llm(prompt: str, comments: list[dict]) -> list[dict]:
     """单批 LLM 转写"""
@@ -87,40 +86,33 @@ async def _process_ude_batch(comments, prompt, semaphore):
 
 
 async def run_ude_extraction(case_id: str, limit: int = 0) -> dict:
-    """从共享 VOC 数据读取原始评论，转写为 UDE，存入案例 DB"""
-    from db import get_case_conn, get_voc_conn
+    """从本地 comments 表读取评论，转写为 UDE，存入 ude_sentences"""
+    from db import get_case_conn
 
     prompt = PROMPT_PATH.read_text("utf-8") if PROMPT_PATH.exists() else ""
     if not prompt:
         return {"error": "UDE 转写 prompt 未找到 (prompts/voc_to_ude.txt)"}
 
-    with get_case_conn(case_id) as case_conn:
-        card = case_conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
-        if not card or not card["voc_research_id"]:
-            return {"error": "未关联 VOC 研究。请先调用 link-voc。"}
-
-        voc_research_id = card["voc_research_id"]
-
-        # 获取已转写的 voc_comment_ids
-        done_ids = {r[0] for r in case_conn.execute(
-            "SELECT voc_comment_id FROM ude_sentences"
+    with get_case_conn(case_id) as conn:
+        # 获取已转写的 comment_ids
+        done_ids = {r[0] for r in conn.execute(
+            "SELECT comment_id FROM ude_sentences"
         ).fetchall()}
 
-    # 从 VOC DB 只读获取原始评论
-    with get_voc_conn(voc_research_id) as voc_conn:
-        rows = voc_conn.execute("""
+        # 从本地 comments 表读取
+        rows = conn.execute("""
             SELECT id, platform, text
-            FROM comments
-            WHERE length(text) > 10 
+            FROM comments WHERE length(text) > 10
             ORDER BY id
         """).fetchall()
 
-    # 过滤已完成的
+    total_comments = len(rows)
     pending = [r for r in rows if r["id"] not in done_ids]
+
     if not pending:
         with get_case_conn(case_id) as conn:
             total = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
-        return {"message": "全部已转写完成", "total_udes": total, "new": 0}
+        return {"message": "全部已转写完成", "totalUdes": total, "new": 0}
 
     if limit > 0:
         pending = pending[:limit]
@@ -137,7 +129,7 @@ async def run_ude_extraction(case_id: str, limit: int = 0) -> dict:
 
     # 写入案例 DB
     ok = 0
-    with get_case_conn(case_id) as case_conn:
+    with get_case_conn(case_id) as conn:
         for results in all_results:
             for r in (results or []):
                 if not isinstance(r, dict):
@@ -149,21 +141,21 @@ async def run_ude_extraction(case_id: str, limit: int = 0) -> dict:
                 if not cid:
                     continue
                 try:
-                    case_conn.execute(
-                        "INSERT OR IGNORE INTO ude_sentences (voc_comment_id, ude_text, confidence) VALUES (?, ?, ?)",
+                    conn.execute(
+                        "INSERT OR IGNORE INTO ude_sentences (comment_id, ude_text, confidence) VALUES (?, ?, ?)",
                         (int(cid), ude_text, r.get("confidence", 0.5))
                     )
                     ok += 1
                 except Exception as e:
                     logger.warning(f"[UDE] 写入失败 id={cid}: {e}")
-        case_conn.commit()
-        total = case_conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
+        conn.commit()
+        total = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
 
     return {
-        "new_udes": ok,
-        "total_udes": total,
-        "total_voc_comments": len(rows),
-        "remaining": len(rows) - total,
+        "newUdes": ok,
+        "totalUdes": total,
+        "totalComments": total_comments,
+        "remaining": total_comments - total,
         "batches": len(batches),
     }
 
@@ -181,10 +173,10 @@ def _embed_texts(client: OpenAI, texts: list[str]) -> list[list[float]]:
 
 def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
                    dashscope_key: str = None) -> dict:
-    """向量化 + DBSCAN 聚类"""
+    """向量化 + DBSCAN 聚类（全部在本地案例 DB 内）"""
     from sklearn.cluster import DBSCAN
     from sklearn.metrics.pairwise import cosine_distances
-    from db import get_case_conn, get_voc_conn
+    from db import get_case_conn
 
     key = dashscope_key or os.getenv("DASHSCOPE_API_KEY", "")
     if not key:
@@ -193,13 +185,13 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
     embed_client = _get_embed_client(key)
 
     with get_case_conn(case_id) as conn:
-        rows = conn.execute("SELECT id, voc_comment_id, ude_text FROM ude_sentences ORDER BY id").fetchall()
+        rows = conn.execute("SELECT id, comment_id, ude_text FROM ude_sentences ORDER BY id").fetchall()
         if len(rows) < min_samples:
             return {"error": f"UDE 不足 ({len(rows)} 条)，至少需要 {min_samples} 条。"}
 
         ude_texts = [r["ude_text"] for r in rows]
         ude_ids = [r["id"] for r in rows]
-        comment_ids = [r["voc_comment_id"] for r in rows]
+        comment_ids = [r["comment_id"] for r in rows]
 
         # 向量化
         vectors = _embed_texts(embed_client, ude_texts)
@@ -223,10 +215,6 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
         # 清空旧聚类，写入新聚类
         conn.execute("DELETE FROM ude_clusters")
 
-        # 获取关联的 VOC research_id 用于读取原声
-        card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
-        voc_rid = card["voc_research_id"] if card else None
-
         clusters = []
         unique_labels = sorted(set(labels) - {-1})
 
@@ -241,30 +229,24 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
             dists = cosine_distances([centroid], member_vectors)[0]
             representative = member_texts[dists.argmin()]
 
-            # 取原声
+            # 原声采样（从本地 comments 表）
             sample_voices = []
-            if voc_rid:
-                try:
-                    voc_conn = get_voc_conn(voc_rid)
-                    for cid in member_cids[:5]:
-                        voice = voc_conn.execute(
-                            "SELECT text, platform FROM comments WHERE id = ?", (cid,)
-                        ).fetchone()
-                        if voice:
-                            sample_voices.append({"text": voice["text"][:200], "platform": voice["platform"]})
-                    voc_conn.close()
-                except Exception:
-                    pass
+            for cid in member_cids[:5]:
+                voice = conn.execute(
+                    "SELECT text, platform FROM comments WHERE id = ?", (cid,)
+                ).fetchone()
+                if voice:
+                    sample_voices.append({"text": voice["text"][:200], "platform": voice["platform"]})
 
             conn.execute(
                 "INSERT INTO ude_clusters (representative_ude, coverage, sample_voices) VALUES (?, ?, ?)",
                 (representative, len(member_indices), json.dumps(sample_voices, ensure_ascii=False))
             )
             clusters.append({
-                "cluster_id": int(cluster_id),
-                "representative_ude": representative,
+                "clusterId": int(cluster_id),
+                "representativeUde": representative,
                 "coverage": len(member_indices),
-                "sample_voices": sample_voices,
+                "sampleVoices": sample_voices,
             })
 
         conn.commit()
@@ -272,24 +254,22 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
         noise_count = int((labels == -1).sum())
 
     return {
-        "total_udes": len(labels),
-        "num_clusters": len(clusters),
-        "noise_count": noise_count,
-        "noise_pct": round(noise_count / len(labels) * 100, 1) if len(labels) else 0,
+        "totalUdes": len(labels),
+        "numClusters": len(clusters),
+        "noiseCount": noise_count,
+        "noisePct": round(noise_count / len(labels) * 100, 1) if len(labels) else 0,
         "clusters": clusters,
-        "params": {"eps": eps, "min_samples": min_samples},
+        "params": {"eps": eps, "minSamples": min_samples},
     }
 
 
-# ═══════════ Step 5: 覆盖扫描 ═══════════
+# ═══════════ 覆盖扫描 ═══════════
 
 def run_coverage_scan(case_id: str) -> dict:
-    from db import get_case_conn, get_voc_conn
+    from db import get_case_conn
 
     with get_case_conn(case_id) as conn:
-        card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
-        voc_rid = card["voc_research_id"] if card else None
-
+        total_comments = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
         total_udes = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
         clustered = conn.execute("SELECT count(*) FROM ude_sentences WHERE cluster_id >= 0").fetchone()[0]
         noise = conn.execute("SELECT count(*) FROM ude_sentences WHERE cluster_id = -1").fetchone()[0]
@@ -299,27 +279,17 @@ def run_coverage_scan(case_id: str) -> dict:
         ).fetchall()]
 
         noise_samples = [dict(r) for r in conn.execute(
-            "SELECT ude_text, voc_comment_id, confidence FROM ude_sentences WHERE cluster_id = -1 ORDER BY confidence DESC LIMIT 10"
+            "SELECT ude_text, comment_id, confidence FROM ude_sentences WHERE cluster_id = -1 ORDER BY confidence DESC LIMIT 10"
         ).fetchall()]
 
-    total_voc = 0
-    if voc_rid:
-        try:
-            with get_voc_conn(voc_rid) as voc:
-                total_voc = voc.execute(
-                    "SELECT count(*) FROM comments WHERE length(text) > 10 "
-                ).fetchone()[0]
-        except Exception:
-            pass
-
     return {
-        "total_voc_comments": total_voc,
-        "total_udes": total_udes,
-        "udes_clustered": clustered,
-        "udes_noise": noise,
-        "coverage_rate": round(clustered / total_voc * 100, 1) if total_voc else 0,
-        "cluster_distribution": cluster_stats,
-        "noise_samples": noise_samples,
+        "totalComments": total_comments,
+        "totalUdes": total_udes,
+        "udesClustered": clustered,
+        "udesNoise": noise,
+        "coverageRate": round(clustered / total_comments * 100, 1) if total_comments else 0,
+        "clusterDistribution": cluster_stats,
+        "noiseSamples": noise_samples,
         "verdict": "充分" if (total_udes > 0 and noise / total_udes < 0.1) else
                    ("需关注" if (total_udes > 0 and noise / total_udes < 0.2) else "需调参"),
     }