refactor: v2.0 完全解耦 — 阿里云内闭环

- 删除 VOC_DATA_DIR / get_voc_conn(不再跨云直读 SQLite)
- 案例 DB 自带 comments 表,自包含所有数据
- 新增 POST /import-voc:通过 VOC 公网 API 导入评论
- VOC_API_BASE 环境变量控制 API 地址
- 新增 httpx 依赖
This commit is contained in:
lidf 2026-04-07 19:47:34 +08:00
parent ec8eaa0b36
commit c5e2a58258
5 changed files with 194 additions and 191 deletions

View File

@ -1,13 +1,19 @@
# LLM通过 LiteLLM 网关)
# 黑手党提案后端 v2.0 — 环境变量
# 完全独立,阿里云内闭环
# VOC 公网 API跨云只读访问用于 import-voc
VOC_API_BASE=https://brand.brainwork.club/voc/api/research
# LLM 路由(走同机 LiteLLM
LITELLM_PROXY_URL=http://127.0.0.1:4000/v1
LITELLM_MASTER_KEY=
LITELLM_MASTER_KEY=sk-xxx
# 模型
MODEL_ID=qwen-plus
TEMPERATURE=0.1
# 向量化DashScope text-embedding-v4
DASHSCOPE_API_KEY=
# DashScope向量化用
DASHSCOPE_API_KEY=sk-xxx
# 共享 VOC 数据层
VOC_DATA_DIR=/opt/apps/voc-researcher/data
# 服务
# 端口
PORT=8093

View File

@ -1,9 +1,11 @@
"""
黑手党提案 数据库管理
黑手党提案 数据库管理完全独立阿里云内闭环
双库设计
1. 案例 DB读写每个提案案例一个 SQLite存分析结果
2. VOC DB只读读取共享 VOC 数据层的原始评论
每个提案案例一个 SQLite 文件自包含所有数据
- case_card案例元信息
- comments VOC API 导入的评论本地副本
- ude_sentences / ude_clustersUDE 分析结果
- conflicts / proposal_sections后续阶段
"""
import os
import sqlite3
@ -17,10 +19,11 @@ load_dotenv()
DATA_DIR = Path(__file__).parent / "data"
DATA_DIR.mkdir(exist_ok=True)
VOC_DATA_DIR = Path(os.getenv("VOC_DATA_DIR", ""))
# VOC 公网 API腾讯云跨云只读访问
VOC_API_BASE = os.getenv("VOC_API_BASE", "https://brand.brainwork.club/voc/api/research")
# ═══════════ 案例 DB读写 ═══════════
# ═══════════ Schema ═══════════
CASE_SCHEMA = """
CREATE TABLE IF NOT EXISTS case_card (
@ -29,13 +32,25 @@ CREATE TABLE IF NOT EXISTS case_card (
focus_product TEXT,
competitors TEXT,
voc_research_id TEXT,
voc_api_base TEXT,
created_at TEXT DEFAULT (datetime('now')),
status TEXT DEFAULT 'draft'
);
CREATE TABLE IF NOT EXISTS comments (
id INTEGER PRIMARY KEY AUTOINCREMENT,
voc_id INTEGER,
platform TEXT,
text TEXT NOT NULL,
like_count INTEGER DEFAULT 0,
published_at TEXT,
imported_at TEXT DEFAULT (datetime('now')),
UNIQUE(voc_id)
);
CREATE TABLE IF NOT EXISTS ude_sentences (
id INTEGER PRIMARY KEY AUTOINCREMENT,
voc_comment_id INTEGER,
comment_id INTEGER REFERENCES comments(id),
ude_text TEXT NOT NULL,
confidence REAL DEFAULT 0.5,
vector TEXT,
@ -73,8 +88,10 @@ CREATE TABLE IF NOT EXISTS proposal_sections (
"""
# ═══════════ 案例 DB ═══════════
def get_case_conn(case_id: str) -> sqlite3.Connection:
"""获取案例 DB 连接(读写)"""
"""获取案例 DB 连接"""
path = DATA_DIR / f"{case_id}.db"
if not path.exists():
raise FileNotFoundError(f"案例 {case_id} 不存在")
@ -93,8 +110,8 @@ def init_case_db(brand_name: str, category: str = "", focus_product: str = "",
conn.row_factory = sqlite3.Row
conn.executescript(CASE_SCHEMA)
conn.execute(
"INSERT INTO case_card (brand_name, category, focus_product, competitors, voc_research_id) VALUES (?,?,?,?,?)",
(brand_name, category, focus_product, competitors, voc_research_id)
"INSERT INTO case_card (brand_name, category, focus_product, competitors, voc_research_id, voc_api_base) VALUES (?,?,?,?,?,?)",
(brand_name, category, focus_product, competitors, voc_research_id, VOC_API_BASE)
)
conn.commit()
conn.close()
@ -111,11 +128,13 @@ def list_cases() -> list[dict]:
conn.row_factory = sqlite3.Row
card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone()
if card:
comment_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0]
cases.append({
"case_id": case_id,
**dict(card),
"comment_count": comment_count,
"ude_count": ude_count,
"cluster_count": cluster_count,
})
@ -123,45 +142,3 @@ def list_cases() -> list[dict]:
except Exception:
pass
return cases
# ═══════════ VOC DB只读 ═══════════
def get_voc_conn(voc_research_id: str) -> sqlite3.Connection:
"""只读访问共享 VOC 数据"""
if not VOC_DATA_DIR.exists():
raise FileNotFoundError(f"VOC 数据目录不存在: {VOC_DATA_DIR}")
path = VOC_DATA_DIR / f"{voc_research_id}.db"
if not path.exists():
raise FileNotFoundError(f"VOC 研究 {voc_research_id} 不存在")
conn = sqlite3.connect(f"file:{path}?mode=ro", uri=True)
conn.row_factory = sqlite3.Row
return conn
def list_voc_researches() -> list[dict]:
"""列出共享 VOC 数据层中的所有研究"""
if not VOC_DATA_DIR.exists():
return []
researches = []
for db_file in sorted(VOC_DATA_DIR.glob("*.db")):
if db_file.name in ("global_cache.db", "agent_sessions.db"):
continue
rid = db_file.stem
try:
conn = sqlite3.connect(f"file:{db_file}?mode=ro", uri=True)
conn.row_factory = sqlite3.Row
card = conn.execute("SELECT brand_name FROM research_card LIMIT 1").fetchone()
comment_count = conn.execute(
"SELECT count(*) FROM comments WHERE length(text) > 10"
).fetchone()[0]
conn.close()
if card and comment_count > 0:
researches.append({
"research_id": rid,
"brand_name": card["brand_name"],
"comment_count": comment_count,
})
except Exception:
pass
return researches

View File

@ -5,3 +5,4 @@ python-dotenv>=1.0.0
numpy>=1.24.0
scikit-learn>=1.3.0
gunicorn>=21.2.0
httpx>=0.27.0

View File

@ -1,12 +1,12 @@
"""
黑手党提案 独立后端
黑手党提案 独立后端阿里云内闭环
FastAPI 服务端口 8093
数据来源只读访问共享 VOC 数据层
分析结果存自己的案例 DB
VOC 数据通过公网 API 导入不直读 VOC DB
"""
import os
import logging
import httpx
from fastapi import FastAPI, Header, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
@ -15,15 +15,12 @@ from dotenv import load_dotenv
load_dotenv()
from db import (
get_case_conn, get_voc_conn, init_case_db,
list_cases as _list_cases, list_voc_researches as _list_voc_researches,
)
from db import get_case_conn, init_case_db, list_cases as _list_cases, DATA_DIR, VOC_API_BASE
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
logger = logging.getLogger("mafia")
app = FastAPI(title="黑手党提案后端", version="1.0.0", description="独立后端:共享 VOC 数据层 + 自有分析存储")
app = FastAPI(title="黑手党提案后端", version="2.0.0", description="独立后端阿里云内闭环VOC 通过 API 导入")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
@ -70,18 +67,23 @@ async def get_case(case_id: str):
try:
with get_case_conn(case_id) as conn:
card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone()
comment_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0]
if not card:
raise HTTPException(404, "案例不存在")
return {"caseId": case_id, **dict(card), "udeCount": ude_count, "clusterCount": cluster_count}
return {
"caseId": case_id, **dict(card),
"commentCount": comment_count,
"udeCount": ude_count,
"clusterCount": cluster_count,
}
except FileNotFoundError:
raise HTTPException(404, "案例不存在")
@app.delete("/api/cases/{case_id}")
async def delete_case(case_id: str):
from db import DATA_DIR
path = DATA_DIR / f"{case_id}.db"
if path.exists():
path.unlink()
@ -89,60 +91,107 @@ async def delete_case(case_id: str):
raise HTTPException(404, "案例不存在")
# ═══════════ VOC 关联 ═══════════
# ═══════════ VOC 导入(跨云 API ═══════════
@app.post("/api/cases/{case_id}/link-voc")
async def link_voc(case_id: str, req: LinkVocRequest):
"""关联 VOC 研究 ID验证 VOC 研究存在后再写入)"""
try:
with get_voc_conn(req.vocResearchId) as voc:
count = voc.execute(
"SELECT count(*) FROM comments WHERE length(text) > 10 "
).fetchone()[0]
except FileNotFoundError as e:
raise HTTPException(404, str(e))
"""关联 VOC 研究 ID"""
try:
with get_case_conn(case_id) as conn:
conn.execute("UPDATE case_card SET voc_research_id = ?", (req.vocResearchId,))
conn.commit()
except FileNotFoundError:
raise HTTPException(404, "案例不存在")
return {"linked": True, "vocCommentCount": count}
return {"linked": True, "vocResearchId": req.vocResearchId}
@app.get("/api/voc/researches")
async def get_voc_researches():
return _list_voc_researches()
@app.get("/api/cases/{case_id}/voc-comments")
async def get_voc_comments(case_id: str, page: int = 1, pageSize: int = 50):
"""从共享 VOC 数据层只读获取原始评论"""
@app.post("/api/cases/{case_id}/import-voc")
async def import_voc(case_id: str, page: int = Query(1), pageSize: int = Query(100)):
"""从 VOC 公网 API 拉取评论数据,存入本地案例 DB"""
try:
with get_case_conn(case_id) as conn:
card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
card = conn.execute("SELECT voc_research_id, voc_api_base FROM case_card LIMIT 1").fetchone()
except FileNotFoundError:
raise HTTPException(404, "案例不存在")
if not card or not card["voc_research_id"]:
raise HTTPException(400, "未关联 VOC 研究")
raise HTTPException(400, "未关联 VOC 研究,请先调用 link-voc")
voc_rid = card["voc_research_id"]
api_base = card["voc_api_base"] or VOC_API_BASE
# 从 VOC API 拉取(只读,不需要 TikHub Key
total_imported = 0
current_page = page
async with httpx.AsyncClient(timeout=30) as client:
while True:
url = f"{api_base}/{voc_rid}/voc-list?page={current_page}&page_size={pageSize}"
try:
resp = await client.get(url)
if resp.status_code != 200:
logger.warning(f"[Import] VOC API 返回 {resp.status_code}: {resp.text[:100]}")
break
data = resp.json()
except Exception as e:
logger.error(f"[Import] VOC API 请求失败: {e}")
break
items = data.get("items") or data.get("data") or []
if not items:
break
with get_case_conn(case_id) as conn:
for item in items:
text = item.get("text", "")
if len(text) < 10:
continue
try:
conn.execute(
"INSERT OR IGNORE INTO comments (voc_id, platform, text, like_count, published_at) VALUES (?,?,?,?,?)",
(
item.get("id"),
item.get("platform", ""),
text,
item.get("like_count", 0),
item.get("published_at", ""),
)
)
total_imported += 1
except Exception:
pass
conn.commit()
total = data.get("total", 0)
if current_page * pageSize >= total:
break
current_page += 1
# 更新统计
with get_case_conn(case_id) as conn:
local_count = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
return {
"imported": total_imported,
"totalLocal": local_count,
"vocResearchId": voc_rid,
"pagesProcessed": current_page - page + 1,
}
@app.get("/api/cases/{case_id}/comments")
async def get_comments(case_id: str, page: int = 1, pageSize: int = 50):
"""查看本地导入的评论"""
try:
with get_voc_conn(card["voc_research_id"]) as voc:
total = voc.execute(
"SELECT count(*) FROM comments WHERE length(text) > 10 "
).fetchone()[0]
rows = voc.execute("""
SELECT id, platform, text, like_count, published_at
FROM comments WHERE length(text) > 10
ORDER BY like_count DESC
with get_case_conn(case_id) as conn:
total = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
rows = conn.execute("""
SELECT id, voc_id, platform, text, like_count, published_at
FROM comments ORDER BY like_count DESC
LIMIT ? OFFSET ?
""", (pageSize, (page - 1) * pageSize)).fetchall()
except FileNotFoundError as e:
raise HTTPException(404, str(e))
except FileNotFoundError:
raise HTTPException(404, "案例不存在")
return {"total": total, "page": page, "items": [dict(r) for r in rows]}
@ -200,11 +249,11 @@ async def get_coverage(case_id: str):
@app.get("/api/health")
async def health():
from db import VOC_DATA_DIR, DATA_DIR
return {
"status": "ok",
"vocDataDir": str(VOC_DATA_DIR),
"vocDataExists": VOC_DATA_DIR.exists(),
"version": "2.0.0",
"architecture": "independent (Aliyun self-contained)",
"vocApiBase": VOC_API_BASE,
"caseDataDir": str(DATA_DIR),
}

View File

@ -1,10 +1,9 @@
"""
黑手党提案 UDE 提取工具
黑手党提案 UDE 提取工具阿里云内闭环
流程VOC 原始评论 LLM 转写 UDE DashScope 向量化 DBSCAN 聚类 覆盖扫描
流程本地 comments LLM 转写 UDE DashScope 向量化 DBSCAN 聚类
数据来源只读访问共享 VOC 数据层
分析结果写入本项目的案例 DB
所有数据读写都在案例 DB 不跨云
"""
from __future__ import annotations
@ -47,7 +46,7 @@ def _get_embed_client(key: str) -> OpenAI:
)
# ═══════════ Step 1: VOC → UDE 转写 ═══════════
# ═══════════ Step 1: 本地评论 → UDE 转写 ═══════════
async def _call_ude_llm(prompt: str, comments: list[dict]) -> list[dict]:
"""单批 LLM 转写"""
@ -87,40 +86,33 @@ async def _process_ude_batch(comments, prompt, semaphore):
async def run_ude_extraction(case_id: str, limit: int = 0) -> dict:
"""共享 VOC 数据读取原始评论,转写为 UDE存入案例 DB"""
from db import get_case_conn, get_voc_conn
"""本地 comments 表读取评论,转写为 UDE存入 ude_sentences"""
from db import get_case_conn
prompt = PROMPT_PATH.read_text("utf-8") if PROMPT_PATH.exists() else ""
if not prompt:
return {"error": "UDE 转写 prompt 未找到 (prompts/voc_to_ude.txt)"}
with get_case_conn(case_id) as case_conn:
card = case_conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
if not card or not card["voc_research_id"]:
return {"error": "未关联 VOC 研究。请先调用 link-voc。"}
voc_research_id = card["voc_research_id"]
# 获取已转写的 voc_comment_ids
done_ids = {r[0] for r in case_conn.execute(
"SELECT voc_comment_id FROM ude_sentences"
with get_case_conn(case_id) as conn:
# 获取已转写的 comment_ids
done_ids = {r[0] for r in conn.execute(
"SELECT comment_id FROM ude_sentences"
).fetchall()}
# 从 VOC DB 只读获取原始评论
with get_voc_conn(voc_research_id) as voc_conn:
rows = voc_conn.execute("""
# 从本地 comments 表读取
rows = conn.execute("""
SELECT id, platform, text
FROM comments
WHERE length(text) > 10
FROM comments WHERE length(text) > 10
ORDER BY id
""").fetchall()
# 过滤已完成的
total_comments = len(rows)
pending = [r for r in rows if r["id"] not in done_ids]
if not pending:
with get_case_conn(case_id) as conn:
total = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
return {"message": "全部已转写完成", "total_udes": total, "new": 0}
return {"message": "全部已转写完成", "totalUdes": total, "new": 0}
if limit > 0:
pending = pending[:limit]
@ -137,7 +129,7 @@ async def run_ude_extraction(case_id: str, limit: int = 0) -> dict:
# 写入案例 DB
ok = 0
with get_case_conn(case_id) as case_conn:
with get_case_conn(case_id) as conn:
for results in all_results:
for r in (results or []):
if not isinstance(r, dict):
@ -149,21 +141,21 @@ async def run_ude_extraction(case_id: str, limit: int = 0) -> dict:
if not cid:
continue
try:
case_conn.execute(
"INSERT OR IGNORE INTO ude_sentences (voc_comment_id, ude_text, confidence) VALUES (?, ?, ?)",
conn.execute(
"INSERT OR IGNORE INTO ude_sentences (comment_id, ude_text, confidence) VALUES (?, ?, ?)",
(int(cid), ude_text, r.get("confidence", 0.5))
)
ok += 1
except Exception as e:
logger.warning(f"[UDE] 写入失败 id={cid}: {e}")
case_conn.commit()
total = case_conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
conn.commit()
total = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
return {
"new_udes": ok,
"total_udes": total,
"total_voc_comments": len(rows),
"remaining": len(rows) - total,
"newUdes": ok,
"totalUdes": total,
"totalComments": total_comments,
"remaining": total_comments - total,
"batches": len(batches),
}
@ -181,10 +173,10 @@ def _embed_texts(client: OpenAI, texts: list[str]) -> list[list[float]]:
def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
dashscope_key: str = None) -> dict:
"""向量化 + DBSCAN 聚类"""
"""向量化 + DBSCAN 聚类(全部在本地案例 DB 内)"""
from sklearn.cluster import DBSCAN
from sklearn.metrics.pairwise import cosine_distances
from db import get_case_conn, get_voc_conn
from db import get_case_conn
key = dashscope_key or os.getenv("DASHSCOPE_API_KEY", "")
if not key:
@ -193,13 +185,13 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
embed_client = _get_embed_client(key)
with get_case_conn(case_id) as conn:
rows = conn.execute("SELECT id, voc_comment_id, ude_text FROM ude_sentences ORDER BY id").fetchall()
rows = conn.execute("SELECT id, comment_id, ude_text FROM ude_sentences ORDER BY id").fetchall()
if len(rows) < min_samples:
return {"error": f"UDE 不足 ({len(rows)} 条),至少需要 {min_samples} 条。"}
ude_texts = [r["ude_text"] for r in rows]
ude_ids = [r["id"] for r in rows]
comment_ids = [r["voc_comment_id"] for r in rows]
comment_ids = [r["comment_id"] for r in rows]
# 向量化
vectors = _embed_texts(embed_client, ude_texts)
@ -223,10 +215,6 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
# 清空旧聚类,写入新聚类
conn.execute("DELETE FROM ude_clusters")
# 获取关联的 VOC research_id 用于读取原声
card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
voc_rid = card["voc_research_id"] if card else None
clusters = []
unique_labels = sorted(set(labels) - {-1})
@ -241,30 +229,24 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
dists = cosine_distances([centroid], member_vectors)[0]
representative = member_texts[dists.argmin()]
# 原声
# 原声采样(从本地 comments 表)
sample_voices = []
if voc_rid:
try:
voc_conn = get_voc_conn(voc_rid)
for cid in member_cids[:5]:
voice = voc_conn.execute(
"SELECT text, platform FROM comments WHERE id = ?", (cid,)
).fetchone()
if voice:
sample_voices.append({"text": voice["text"][:200], "platform": voice["platform"]})
voc_conn.close()
except Exception:
pass
for cid in member_cids[:5]:
voice = conn.execute(
"SELECT text, platform FROM comments WHERE id = ?", (cid,)
).fetchone()
if voice:
sample_voices.append({"text": voice["text"][:200], "platform": voice["platform"]})
conn.execute(
"INSERT INTO ude_clusters (representative_ude, coverage, sample_voices) VALUES (?, ?, ?)",
(representative, len(member_indices), json.dumps(sample_voices, ensure_ascii=False))
)
clusters.append({
"cluster_id": int(cluster_id),
"representative_ude": representative,
"clusterId": int(cluster_id),
"representativeUde": representative,
"coverage": len(member_indices),
"sample_voices": sample_voices,
"sampleVoices": sample_voices,
})
conn.commit()
@ -272,24 +254,22 @@ def run_clustering(case_id: str, eps: float = 0.25, min_samples: int = 3,
noise_count = int((labels == -1).sum())
return {
"total_udes": len(labels),
"num_clusters": len(clusters),
"noise_count": noise_count,
"noise_pct": round(noise_count / len(labels) * 100, 1) if len(labels) else 0,
"totalUdes": len(labels),
"numClusters": len(clusters),
"noiseCount": noise_count,
"noisePct": round(noise_count / len(labels) * 100, 1) if len(labels) else 0,
"clusters": clusters,
"params": {"eps": eps, "min_samples": min_samples},
"params": {"eps": eps, "minSamples": min_samples},
}
# ═══════════ Step 5: 覆盖扫描 ═══════════
# ═══════════ 覆盖扫描 ═══════════
def run_coverage_scan(case_id: str) -> dict:
from db import get_case_conn, get_voc_conn
from db import get_case_conn
with get_case_conn(case_id) as conn:
card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
voc_rid = card["voc_research_id"] if card else None
total_comments = conn.execute("SELECT count(*) FROM comments").fetchone()[0]
total_udes = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
clustered = conn.execute("SELECT count(*) FROM ude_sentences WHERE cluster_id >= 0").fetchone()[0]
noise = conn.execute("SELECT count(*) FROM ude_sentences WHERE cluster_id = -1").fetchone()[0]
@ -299,27 +279,17 @@ def run_coverage_scan(case_id: str) -> dict:
).fetchall()]
noise_samples = [dict(r) for r in conn.execute(
"SELECT ude_text, voc_comment_id, confidence FROM ude_sentences WHERE cluster_id = -1 ORDER BY confidence DESC LIMIT 10"
"SELECT ude_text, comment_id, confidence FROM ude_sentences WHERE cluster_id = -1 ORDER BY confidence DESC LIMIT 10"
).fetchall()]
total_voc = 0
if voc_rid:
try:
with get_voc_conn(voc_rid) as voc:
total_voc = voc.execute(
"SELECT count(*) FROM comments WHERE length(text) > 10 "
).fetchone()[0]
except Exception:
pass
return {
"total_voc_comments": total_voc,
"total_udes": total_udes,
"udes_clustered": clustered,
"udes_noise": noise,
"coverage_rate": round(clustered / total_voc * 100, 1) if total_voc else 0,
"cluster_distribution": cluster_stats,
"noise_samples": noise_samples,
"totalComments": total_comments,
"totalUdes": total_udes,
"udesClustered": clustered,
"udesNoise": noise,
"coverageRate": round(clustered / total_comments * 100, 1) if total_comments else 0,
"clusterDistribution": cluster_stats,
"noiseSamples": noise_samples,
"verdict": "充分" if (total_udes > 0 and noise / total_udes < 0.1) else
("需关注" if (total_udes > 0 and noise / total_udes < 0.2) else "需调参"),
}