From c5e2a58258c330ba4b1f1cc263bafc0f2ff0ec1c Mon Sep 17 00:00:00 2001 From: lidf Date: Tue, 7 Apr 2026 19:47:34 +0800 Subject: [PATCH] =?UTF-8?q?refactor:=20v2.0=20=E5=AE=8C=E5=85=A8=E8=A7=A3?= =?UTF-8?q?=E8=80=A6=20=E2=80=94=20=E9=98=BF=E9=87=8C=E4=BA=91=E5=86=85?= =?UTF-8?q?=E9=97=AD=E7=8E=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 删除 VOC_DATA_DIR / get_voc_conn(不再跨云直读 SQLite) - 案例 DB 自带 comments 表,自包含所有数据 - 新增 POST /import-voc:通过 VOC 公网 API 导入评论 - VOC_API_BASE 环境变量控制 API 地址 - 新增 httpx 依赖 --- backend/.env.example | 22 ++++-- backend/db.py | 81 +++++++------------- backend/requirements.txt | 1 + backend/server.py | 141 +++++++++++++++++++++++------------ backend/tools/ude_extract.py | 140 ++++++++++++++-------------------- 5 files changed, 194 insertions(+), 191 deletions(-) diff --git a/backend/.env.example b/backend/.env.example index e972ce0..bcbc865 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,13 +1,19 @@ -# LLM(通过 LiteLLM 网关) +# 黑手党提案后端 v2.0 — 环境变量 +# 完全独立,阿里云内闭环 + +# VOC 公网 API(跨云只读访问,用于 import-voc) +VOC_API_BASE=https://brand.brainwork.club/voc/api/research + +# LLM 路由(走同机 LiteLLM) LITELLM_PROXY_URL=http://127.0.0.1:4000/v1 -LITELLM_MASTER_KEY= +LITELLM_MASTER_KEY=sk-xxx + +# 模型 MODEL_ID=qwen-plus +TEMPERATURE=0.1 -# 向量化(DashScope text-embedding-v4) -DASHSCOPE_API_KEY= +# DashScope(向量化用) +DASHSCOPE_API_KEY=sk-xxx -# 共享 VOC 数据层 -VOC_DATA_DIR=/opt/apps/voc-researcher/data - -# 服务 +# 端口 PORT=8093 diff --git a/backend/db.py b/backend/db.py index ce4a8ee..1f04293 100644 --- a/backend/db.py +++ b/backend/db.py @@ -1,9 +1,11 @@ """ -黑手党提案 — 数据库管理 +黑手党提案 — 数据库管理(完全独立,阿里云内闭环) -双库设计: - 1. 案例 DB(读写):每个提案案例一个 SQLite,存分析结果 - 2. VOC DB(只读):读取共享 VOC 数据层的原始评论 +每个提案案例一个 SQLite 文件,自包含所有数据: + - case_card:案例元信息 + - comments:从 VOC API 导入的评论(本地副本) + - ude_sentences / ude_clusters:UDE 分析结果 + - conflicts / proposal_sections:后续阶段 """ import os import sqlite3 @@ -17,10 +19,11 @@ load_dotenv() DATA_DIR = Path(__file__).parent / "data" DATA_DIR.mkdir(exist_ok=True) -VOC_DATA_DIR = Path(os.getenv("VOC_DATA_DIR", "")) +# VOC 公网 API(腾讯云,跨云只读访问) +VOC_API_BASE = os.getenv("VOC_API_BASE", "https://brand.brainwork.club/voc/api/research") -# ═══════════ 案例 DB(读写) ═══════════ +# ═══════════ Schema ═══════════ CASE_SCHEMA = """ CREATE TABLE IF NOT EXISTS case_card ( @@ -29,13 +32,25 @@ CREATE TABLE IF NOT EXISTS case_card ( focus_product TEXT, competitors TEXT, voc_research_id TEXT, + voc_api_base TEXT, created_at TEXT DEFAULT (datetime('now')), status TEXT DEFAULT 'draft' ); +CREATE TABLE IF NOT EXISTS comments ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + voc_id INTEGER, + platform TEXT, + text TEXT NOT NULL, + like_count INTEGER DEFAULT 0, + published_at TEXT, + imported_at TEXT DEFAULT (datetime('now')), + UNIQUE(voc_id) +); + CREATE TABLE IF NOT EXISTS ude_sentences ( id INTEGER PRIMARY KEY AUTOINCREMENT, - voc_comment_id INTEGER, + comment_id INTEGER REFERENCES comments(id), ude_text TEXT NOT NULL, confidence REAL DEFAULT 0.5, vector TEXT, @@ -73,8 +88,10 @@ CREATE TABLE IF NOT EXISTS proposal_sections ( """ +# ═══════════ 案例 DB ═══════════ + def get_case_conn(case_id: str) -> sqlite3.Connection: - """获取案例 DB 连接(读写)""" + """获取案例 DB 连接""" path = DATA_DIR / f"{case_id}.db" if not path.exists(): raise FileNotFoundError(f"案例 {case_id} 不存在") @@ -93,8 +110,8 @@ def init_case_db(brand_name: str, category: str = "", focus_product: str = "", conn.row_factory = sqlite3.Row conn.executescript(CASE_SCHEMA) conn.execute( - "INSERT INTO case_card (brand_name, category, focus_product, competitors, voc_research_id) VALUES (?,?,?,?,?)", - (brand_name, category, focus_product, competitors, voc_research_id) + "INSERT INTO case_card (brand_name, category, focus_product, competitors, voc_research_id, voc_api_base) VALUES (?,?,?,?,?,?)", + (brand_name, category, focus_product, competitors, voc_research_id, VOC_API_BASE) ) conn.commit() conn.close() @@ -111,11 +128,13 @@ def list_cases() -> list[dict]: conn.row_factory = sqlite3.Row card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone() if card: + comment_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0] ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0] cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0] cases.append({ "case_id": case_id, **dict(card), + "comment_count": comment_count, "ude_count": ude_count, "cluster_count": cluster_count, }) @@ -123,45 +142,3 @@ def list_cases() -> list[dict]: except Exception: pass return cases - - -# ═══════════ VOC DB(只读) ═══════════ - -def get_voc_conn(voc_research_id: str) -> sqlite3.Connection: - """只读访问共享 VOC 数据""" - if not VOC_DATA_DIR.exists(): - raise FileNotFoundError(f"VOC 数据目录不存在: {VOC_DATA_DIR}") - path = VOC_DATA_DIR / f"{voc_research_id}.db" - if not path.exists(): - raise FileNotFoundError(f"VOC 研究 {voc_research_id} 不存在") - conn = sqlite3.connect(f"file:{path}?mode=ro", uri=True) - conn.row_factory = sqlite3.Row - return conn - - -def list_voc_researches() -> list[dict]: - """列出共享 VOC 数据层中的所有研究""" - if not VOC_DATA_DIR.exists(): - return [] - researches = [] - for db_file in sorted(VOC_DATA_DIR.glob("*.db")): - if db_file.name in ("global_cache.db", "agent_sessions.db"): - continue - rid = db_file.stem - try: - conn = sqlite3.connect(f"file:{db_file}?mode=ro", uri=True) - conn.row_factory = sqlite3.Row - card = conn.execute("SELECT brand_name FROM research_card LIMIT 1").fetchone() - comment_count = conn.execute( - "SELECT count(*) FROM comments WHERE length(text) > 10" - ).fetchone()[0] - conn.close() - if card and comment_count > 0: - researches.append({ - "research_id": rid, - "brand_name": card["brand_name"], - "comment_count": comment_count, - }) - except Exception: - pass - return researches diff --git a/backend/requirements.txt b/backend/requirements.txt index a4e8c28..6bd9960 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -5,3 +5,4 @@ python-dotenv>=1.0.0 numpy>=1.24.0 scikit-learn>=1.3.0 gunicorn>=21.2.0 +httpx>=0.27.0 diff --git a/backend/server.py b/backend/server.py index 88bc463..d5296ab 100644 --- a/backend/server.py +++ b/backend/server.py @@ -1,12 +1,12 @@ """ -黑手党提案 — 独立后端 +黑手党提案 — 独立后端(阿里云内闭环) FastAPI 服务,端口 8093。 -数据来源:只读访问共享 VOC 数据层。 -分析结果:存自己的案例 DB。 +VOC 数据通过公网 API 导入,不直读 VOC DB。 """ import os import logging +import httpx from fastapi import FastAPI, Header, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware @@ -15,15 +15,12 @@ from dotenv import load_dotenv load_dotenv() -from db import ( - get_case_conn, get_voc_conn, init_case_db, - list_cases as _list_cases, list_voc_researches as _list_voc_researches, -) +from db import get_case_conn, init_case_db, list_cases as _list_cases, DATA_DIR, VOC_API_BASE logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s") logger = logging.getLogger("mafia") -app = FastAPI(title="黑手党提案后端", version="1.0.0", description="独立后端:共享 VOC 数据层 + 自有分析存储") +app = FastAPI(title="黑手党提案后端", version="2.0.0", description="独立后端:阿里云内闭环,VOC 通过 API 导入") app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -70,18 +67,23 @@ async def get_case(case_id: str): try: with get_case_conn(case_id) as conn: card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone() + comment_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0] ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0] cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0] if not card: raise HTTPException(404, "案例不存在") - return {"caseId": case_id, **dict(card), "udeCount": ude_count, "clusterCount": cluster_count} + return { + "caseId": case_id, **dict(card), + "commentCount": comment_count, + "udeCount": ude_count, + "clusterCount": cluster_count, + } except FileNotFoundError: raise HTTPException(404, "案例不存在") @app.delete("/api/cases/{case_id}") async def delete_case(case_id: str): - from db import DATA_DIR path = DATA_DIR / f"{case_id}.db" if path.exists(): path.unlink() @@ -89,60 +91,107 @@ async def delete_case(case_id: str): raise HTTPException(404, "案例不存在") -# ═══════════ VOC 关联 ═══════════ +# ═══════════ VOC 导入(跨云 API) ═══════════ @app.post("/api/cases/{case_id}/link-voc") async def link_voc(case_id: str, req: LinkVocRequest): - """关联 VOC 研究 ID(验证 VOC 研究存在后再写入)""" - try: - with get_voc_conn(req.vocResearchId) as voc: - count = voc.execute( - "SELECT count(*) FROM comments WHERE length(text) > 10 " - ).fetchone()[0] - except FileNotFoundError as e: - raise HTTPException(404, str(e)) - + """关联 VOC 研究 ID""" try: with get_case_conn(case_id) as conn: conn.execute("UPDATE case_card SET voc_research_id = ?", (req.vocResearchId,)) conn.commit() except FileNotFoundError: raise HTTPException(404, "案例不存在") - - return {"linked": True, "vocCommentCount": count} + return {"linked": True, "vocResearchId": req.vocResearchId} -@app.get("/api/voc/researches") -async def get_voc_researches(): - return _list_voc_researches() - - -@app.get("/api/cases/{case_id}/voc-comments") -async def get_voc_comments(case_id: str, page: int = 1, pageSize: int = 50): - """从共享 VOC 数据层只读获取原始评论""" +@app.post("/api/cases/{case_id}/import-voc") +async def import_voc(case_id: str, page: int = Query(1), pageSize: int = Query(100)): + """从 VOC 公网 API 拉取评论数据,存入本地案例 DB""" try: with get_case_conn(case_id) as conn: - card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone() + card = conn.execute("SELECT voc_research_id, voc_api_base FROM case_card LIMIT 1").fetchone() except FileNotFoundError: raise HTTPException(404, "案例不存在") if not card or not card["voc_research_id"]: - raise HTTPException(400, "未关联 VOC 研究") + raise HTTPException(400, "未关联 VOC 研究,请先调用 link-voc") + voc_rid = card["voc_research_id"] + api_base = card["voc_api_base"] or VOC_API_BASE + + # 从 VOC API 拉取(只读,不需要 TikHub Key) + total_imported = 0 + current_page = page + + async with httpx.AsyncClient(timeout=30) as client: + while True: + url = f"{api_base}/{voc_rid}/voc-list?page={current_page}&page_size={pageSize}" + try: + resp = await client.get(url) + if resp.status_code != 200: + logger.warning(f"[Import] VOC API 返回 {resp.status_code}: {resp.text[:100]}") + break + data = resp.json() + except Exception as e: + logger.error(f"[Import] VOC API 请求失败: {e}") + break + + items = data.get("items") or data.get("data") or [] + if not items: + break + + with get_case_conn(case_id) as conn: + for item in items: + text = item.get("text", "") + if len(text) < 10: + continue + try: + conn.execute( + "INSERT OR IGNORE INTO comments (voc_id, platform, text, like_count, published_at) VALUES (?,?,?,?,?)", + ( + item.get("id"), + item.get("platform", ""), + text, + item.get("like_count", 0), + item.get("published_at", ""), + ) + ) + total_imported += 1 + except Exception: + pass + conn.commit() + + total = data.get("total", 0) + if current_page * pageSize >= total: + break + current_page += 1 + + # 更新统计 + with get_case_conn(case_id) as conn: + local_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0] + + return { + "imported": total_imported, + "totalLocal": local_count, + "vocResearchId": voc_rid, + "pagesProcessed": current_page - page + 1, + } + + +@app.get("/api/cases/{case_id}/comments") +async def get_comments(case_id: str, page: int = 1, pageSize: int = 50): + """查看本地导入的评论""" try: - with get_voc_conn(card["voc_research_id"]) as voc: - total = voc.execute( - "SELECT count(*) FROM comments WHERE length(text) > 10 " - ).fetchone()[0] - rows = voc.execute(""" - SELECT id, platform, text, like_count, published_at - FROM comments WHERE length(text) > 10 - ORDER BY like_count DESC + with get_case_conn(case_id) as conn: + total = conn.execute("SELECT count(*) FROM comments").fetchone()[0] + rows = conn.execute(""" + SELECT id, voc_id, platform, text, like_count, published_at + FROM comments ORDER BY like_count DESC LIMIT ? OFFSET ? """, (pageSize, (page - 1) * pageSize)).fetchall() - except FileNotFoundError as e: - raise HTTPException(404, str(e)) - + except FileNotFoundError: + raise HTTPException(404, "案例不存在") return {"total": total, "page": page, "items": [dict(r) for r in rows]} @@ -200,11 +249,11 @@ async def get_coverage(case_id: str): @app.get("/api/health") async def health(): - from db import VOC_DATA_DIR, DATA_DIR return { "status": "ok", - "vocDataDir": str(VOC_DATA_DIR), - "vocDataExists": VOC_DATA_DIR.exists(), + "version": "2.0.0", + "architecture": "independent (Aliyun self-contained)", + "vocApiBase": VOC_API_BASE, "caseDataDir": str(DATA_DIR), } diff --git a/backend/tools/ude_extract.py b/backend/tools/ude_extract.py index d55c639..7622a89 100644 --- a/backend/tools/ude_extract.py +++ b/backend/tools/ude_extract.py @@ -1,10 +1,9 @@ """ -黑手党提案 — UDE 提取工具 +黑手党提案 — UDE 提取工具(阿里云内闭环) -流程:VOC 原始评论 → LLM 转写 UDE → DashScope 向量化 → DBSCAN 聚类 → 覆盖扫描 +流程:本地 comments → LLM 转写 UDE → DashScope 向量化 → DBSCAN 聚类 -数据来源:只读访问共享 VOC 数据层 -分析结果:写入本项目的案例 DB +所有数据读写都在案例 DB 内,不跨云。 """ from __future__ import annotations @@ -47,7 +46,7 @@ def _get_embed_client(key: str) -> OpenAI: ) -# ═══════════ Step 1: VOC → UDE 转写 ═══════════ +# ═══════════ Step 1: 本地评论 → UDE 转写 ═══════════ async def _call_ude_llm(prompt: str, comments: list[dict]) -> list[dict]: """单批 LLM 转写""" @@ -87,40 +86,33 @@ async def _process_ude_batch(comments, prompt, semaphore): async def run_ude_extraction(case_id: str, limit: int = 0) -> dict: - """从共享 VOC 数据读取原始评论,转写为 UDE,存入案例 DB""" - from db import get_case_conn, get_voc_conn + """从本地 comments 表读取评论,转写为 UDE,存入 ude_sentences""" + from db import get_case_conn prompt = PROMPT_PATH.read_text("utf-8") if PROMPT_PATH.exists() else "" if not prompt: return {"error": "UDE 转写 prompt 未找到 (prompts/voc_to_ude.txt)"} - with get_case_conn(case_id) as case_conn: - card = case_conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone() - if not card or not card["voc_research_id"]: - return {"error": "未关联 VOC 研究。请先调用 link-voc。"} - - voc_research_id = card["voc_research_id"] - - # 获取已转写的 voc_comment_ids - done_ids = {r[0] for r in case_conn.execute( - "SELECT voc_comment_id FROM ude_sentences" + with get_case_conn(case_id) as conn: + # 获取已转写的 comment_ids + done_ids = {r[0] for r in conn.execute( + "SELECT comment_id FROM ude_sentences" ).fetchall()} - # 从 VOC DB 只读获取原始评论 - with get_voc_conn(voc_research_id) as voc_conn: - rows = voc_conn.execute(""" + # 从本地 comments 表读取 + rows = conn.execute(""" SELECT id, platform, text - FROM comments - WHERE length(text) > 10 + FROM comments WHERE length(text) > 10 ORDER BY id """).fetchall() - # 过滤已完成的 + total_comments = len(rows) pending = [r for r in rows if r["id"] not in done_ids] + if not pending: with get_case_conn(case_id) as conn: total = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0] - return {"message": "全部已转写完成", "total_udes": total, "new": 0} + return {"message": "全部已转写完成", "totalUdes": total, "new": 0} if limit > 0: pending = pending[:limit] @@ -137,7 +129,7 @@ async def run_ude_extraction(case_id: str, limit: int = 0) -> dict: # 写入案例 DB ok = 0 - with get_case_conn(case_id) as case_conn: + with get_case_conn(case_id) as conn: for results in all_results: for r in (results or []): if not isinstance(r, dict): @@ -149,21 +141,21 @@ async def run_ude_extraction(case_id: str, limit: int = 0) -> dict: if not cid: continue try: - case_conn.execute( - "INSERT OR IGNORE INTO ude_sentences (voc_comment_id, ude_text, confidence) VALUES (?, ?, ?)", + conn.execute( + "INSERT OR IGNORE INTO ude_sentences (comment_id, ude_text, confidence) VALUES (?, ?, ?)", (int(cid), ude_text, r.get("confidence", 0.5)) ) ok += 1 except Exception as e: logger.warning(f"[UDE] 写入失败 id={cid}: {e}") - case_conn.commit() - total = case_conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0] + conn.commit() + total = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0] return { - "new_udes": ok, - "total_udes": total, - "total_voc_comments": len(rows), - "remaining": len(rows) - total, + "newUdes": ok, + "totalUdes": total, + "totalComments": total_comments, + "remaining": total_comments - total, "batches": len(batches), } @@ -181,10 +173,10 @@ def _embed_texts(client: OpenAI, texts: list[str]) -> list[list[float]]: def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3, dashscope_key: str = None) -> dict: - """向量化 + DBSCAN 聚类""" + """向量化 + DBSCAN 聚类(全部在本地案例 DB 内)""" from sklearn.cluster import DBSCAN from sklearn.metrics.pairwise import cosine_distances - from db import get_case_conn, get_voc_conn + from db import get_case_conn key = dashscope_key or os.getenv("DASHSCOPE_API_KEY", "") if not key: @@ -193,13 +185,13 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3, embed_client = _get_embed_client(key) with get_case_conn(case_id) as conn: - rows = conn.execute("SELECT id, voc_comment_id, ude_text FROM ude_sentences ORDER BY id").fetchall() + rows = conn.execute("SELECT id, comment_id, ude_text FROM ude_sentences ORDER BY id").fetchall() if len(rows) < min_samples: return {"error": f"UDE 不足 ({len(rows)} 条),至少需要 {min_samples} 条。"} ude_texts = [r["ude_text"] for r in rows] ude_ids = [r["id"] for r in rows] - comment_ids = [r["voc_comment_id"] for r in rows] + comment_ids = [r["comment_id"] for r in rows] # 向量化 vectors = _embed_texts(embed_client, ude_texts) @@ -223,10 +215,6 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3, # 清空旧聚类,写入新聚类 conn.execute("DELETE FROM ude_clusters") - # 获取关联的 VOC research_id 用于读取原声 - card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone() - voc_rid = card["voc_research_id"] if card else None - clusters = [] unique_labels = sorted(set(labels) - {-1}) @@ -241,30 +229,24 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3, dists = cosine_distances([centroid], member_vectors)[0] representative = member_texts[dists.argmin()] - # 取原声 + # 原声采样(从本地 comments 表) sample_voices = [] - if voc_rid: - try: - voc_conn = get_voc_conn(voc_rid) - for cid in member_cids[:5]: - voice = voc_conn.execute( - "SELECT text, platform FROM comments WHERE id = ?", (cid,) - ).fetchone() - if voice: - sample_voices.append({"text": voice["text"][:200], "platform": voice["platform"]}) - voc_conn.close() - except Exception: - pass + for cid in member_cids[:5]: + voice = conn.execute( + "SELECT text, platform FROM comments WHERE id = ?", (cid,) + ).fetchone() + if voice: + sample_voices.append({"text": voice["text"][:200], "platform": voice["platform"]}) conn.execute( "INSERT INTO ude_clusters (representative_ude, coverage, sample_voices) VALUES (?, ?, ?)", (representative, len(member_indices), json.dumps(sample_voices, ensure_ascii=False)) ) clusters.append({ - "cluster_id": int(cluster_id), - "representative_ude": representative, + "clusterId": int(cluster_id), + "representativeUde": representative, "coverage": len(member_indices), - "sample_voices": sample_voices, + "sampleVoices": sample_voices, }) conn.commit() @@ -272,24 +254,22 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3, noise_count = int((labels == -1).sum()) return { - "total_udes": len(labels), - "num_clusters": len(clusters), - "noise_count": noise_count, - "noise_pct": round(noise_count / len(labels) * 100, 1) if len(labels) else 0, + "totalUdes": len(labels), + "numClusters": len(clusters), + "noiseCount": noise_count, + "noisePct": round(noise_count / len(labels) * 100, 1) if len(labels) else 0, "clusters": clusters, - "params": {"eps": eps, "min_samples": min_samples}, + "params": {"eps": eps, "minSamples": min_samples}, } -# ═══════════ Step 5: 覆盖扫描 ═══════════ +# ═══════════ 覆盖扫描 ═══════════ def run_coverage_scan(case_id: str) -> dict: - from db import get_case_conn, get_voc_conn + from db import get_case_conn with get_case_conn(case_id) as conn: - card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone() - voc_rid = card["voc_research_id"] if card else None - + total_comments = conn.execute("SELECT count(*) FROM comments").fetchone()[0] total_udes = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0] clustered = conn.execute("SELECT count(*) FROM ude_sentences WHERE cluster_id >= 0").fetchone()[0] noise = conn.execute("SELECT count(*) FROM ude_sentences WHERE cluster_id = -1").fetchone()[0] @@ -299,27 +279,17 @@ def run_coverage_scan(case_id: str) -> dict: ).fetchall()] noise_samples = [dict(r) for r in conn.execute( - "SELECT ude_text, voc_comment_id, confidence FROM ude_sentences WHERE cluster_id = -1 ORDER BY confidence DESC LIMIT 10" + "SELECT ude_text, comment_id, confidence FROM ude_sentences WHERE cluster_id = -1 ORDER BY confidence DESC LIMIT 10" ).fetchall()] - total_voc = 0 - if voc_rid: - try: - with get_voc_conn(voc_rid) as voc: - total_voc = voc.execute( - "SELECT count(*) FROM comments WHERE length(text) > 10 " - ).fetchone()[0] - except Exception: - pass - return { - "total_voc_comments": total_voc, - "total_udes": total_udes, - "udes_clustered": clustered, - "udes_noise": noise, - "coverage_rate": round(clustered / total_voc * 100, 1) if total_voc else 0, - "cluster_distribution": cluster_stats, - "noise_samples": noise_samples, + "totalComments": total_comments, + "totalUdes": total_udes, + "udesClustered": clustered, + "udesNoise": noise, + "coverageRate": round(clustered / total_comments * 100, 1) if total_comments else 0, + "clusterDistribution": cluster_stats, + "noiseSamples": noise_samples, "verdict": "充分" if (total_udes > 0 and noise / total_udes < 0.1) else ("需关注" if (total_udes > 0 and noise / total_udes < 0.2) else "需调参"), }