- backend/server.py: FastAPI 端口 8093 - backend/db.py: 双库设计(案例 DB 读写 + VOC DB 只读) - backend/tools/ude_extract.py: UDE 转写 + 向量聚类 - backend/prompts/voc_to_ude.txt: TOC 7条规范约束 - 已部署至 /opt/apps/mafia-proposal/ (systemd) - Nginx /copaw/mafia/api/ 代理已配置
218 lines
6.7 KiB
Python
218 lines
6.7 KiB
Python
"""
|
||
黑手党提案 — 独立后端
|
||
|
||
FastAPI 服务,端口 8093。
|
||
数据来源:只读访问共享 VOC 数据层。
|
||
分析结果:存自己的案例 DB。
|
||
"""
|
||
import os
|
||
import logging
|
||
|
||
from fastapi import FastAPI, Header, HTTPException, Query
|
||
from fastapi.middleware.cors import CORSMiddleware
|
||
from pydantic import BaseModel
|
||
from dotenv import load_dotenv
|
||
|
||
load_dotenv()
|
||
|
||
from db import (
|
||
get_case_conn, get_voc_conn, init_case_db,
|
||
list_cases as _list_cases, list_voc_researches as _list_voc_researches,
|
||
)
|
||
|
||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
|
||
logger = logging.getLogger("mafia")
|
||
|
||
app = FastAPI(title="黑手党提案后端", version="1.0.0", description="独立后端:共享 VOC 数据层 + 自有分析存储")
|
||
app.add_middleware(
|
||
CORSMiddleware,
|
||
allow_origins=["*"],
|
||
allow_methods=["*"],
|
||
allow_headers=["*"],
|
||
)
|
||
|
||
|
||
# ═══════════ Models ═══════════
|
||
|
||
class CreateCaseRequest(BaseModel):
|
||
brandName: str
|
||
category: str = ""
|
||
focusProduct: str = ""
|
||
competitors: str = "[]"
|
||
vocResearchId: str = None
|
||
|
||
|
||
class LinkVocRequest(BaseModel):
|
||
vocResearchId: str
|
||
|
||
|
||
# ═══════════ 案例管理 ═══════════
|
||
|
||
@app.post("/api/cases")
|
||
async def create_case(req: CreateCaseRequest):
|
||
case_id = init_case_db(
|
||
brand_name=req.brandName,
|
||
category=req.category,
|
||
focus_product=req.focusProduct,
|
||
competitors=req.competitors,
|
||
voc_research_id=req.vocResearchId,
|
||
)
|
||
return {"caseId": case_id}
|
||
|
||
|
||
@app.get("/api/cases")
|
||
async def get_cases():
|
||
return _list_cases()
|
||
|
||
|
||
@app.get("/api/cases/{case_id}")
|
||
async def get_case(case_id: str):
|
||
try:
|
||
with get_case_conn(case_id) as conn:
|
||
card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone()
|
||
ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
|
||
cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0]
|
||
if not card:
|
||
raise HTTPException(404, "案例不存在")
|
||
return {"caseId": case_id, **dict(card), "udeCount": ude_count, "clusterCount": cluster_count}
|
||
except FileNotFoundError:
|
||
raise HTTPException(404, "案例不存在")
|
||
|
||
|
||
@app.delete("/api/cases/{case_id}")
|
||
async def delete_case(case_id: str):
|
||
from db import DATA_DIR
|
||
path = DATA_DIR / f"{case_id}.db"
|
||
if path.exists():
|
||
path.unlink()
|
||
return {"deleted": True}
|
||
raise HTTPException(404, "案例不存在")
|
||
|
||
|
||
# ═══════════ VOC 关联 ═══════════
|
||
|
||
@app.post("/api/cases/{case_id}/link-voc")
|
||
async def link_voc(case_id: str, req: LinkVocRequest):
|
||
"""关联 VOC 研究 ID(验证 VOC 研究存在后再写入)"""
|
||
try:
|
||
with get_voc_conn(req.vocResearchId) as voc:
|
||
count = voc.execute(
|
||
"SELECT count(*) FROM comments WHERE length(text) > 10 "
|
||
).fetchone()[0]
|
||
except FileNotFoundError as e:
|
||
raise HTTPException(404, str(e))
|
||
|
||
try:
|
||
with get_case_conn(case_id) as conn:
|
||
conn.execute("UPDATE case_card SET voc_research_id = ?", (req.vocResearchId,))
|
||
conn.commit()
|
||
except FileNotFoundError:
|
||
raise HTTPException(404, "案例不存在")
|
||
|
||
return {"linked": True, "vocCommentCount": count}
|
||
|
||
|
||
@app.get("/api/voc/researches")
|
||
async def get_voc_researches():
|
||
return _list_voc_researches()
|
||
|
||
|
||
@app.get("/api/cases/{case_id}/voc-comments")
|
||
async def get_voc_comments(case_id: str, page: int = 1, pageSize: int = 50):
|
||
"""从共享 VOC 数据层只读获取原始评论"""
|
||
try:
|
||
with get_case_conn(case_id) as conn:
|
||
card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
|
||
except FileNotFoundError:
|
||
raise HTTPException(404, "案例不存在")
|
||
|
||
if not card or not card["voc_research_id"]:
|
||
raise HTTPException(400, "未关联 VOC 研究")
|
||
|
||
try:
|
||
with get_voc_conn(card["voc_research_id"]) as voc:
|
||
total = voc.execute(
|
||
"SELECT count(*) FROM comments WHERE length(text) > 10 "
|
||
).fetchone()[0]
|
||
rows = voc.execute("""
|
||
SELECT id, platform, text, like_count, published_at
|
||
FROM comments WHERE length(text) > 10
|
||
ORDER BY like_count DESC
|
||
LIMIT ? OFFSET ?
|
||
""", (pageSize, (page - 1) * pageSize)).fetchall()
|
||
except FileNotFoundError as e:
|
||
raise HTTPException(404, str(e))
|
||
|
||
return {"total": total, "page": page, "items": [dict(r) for r in rows]}
|
||
|
||
|
||
# ═══════════ UDE 分析 ═══════════
|
||
|
||
@app.post("/api/cases/{case_id}/ude/extract")
|
||
async def extract_ude(case_id: str, limit: int = Query(0)):
|
||
from tools.ude_extract import run_ude_extraction
|
||
try:
|
||
result = await run_ude_extraction(case_id, limit)
|
||
except FileNotFoundError as e:
|
||
raise HTTPException(404, str(e))
|
||
return result
|
||
|
||
|
||
@app.post("/api/cases/{case_id}/ude/cluster")
|
||
async def cluster_ude(
|
||
case_id: str,
|
||
eps: float = Query(0.25),
|
||
minSamples: int = Query(3),
|
||
x_dashscope_key: str = Header(None),
|
||
):
|
||
from tools.ude_extract import run_clustering
|
||
key = x_dashscope_key or os.getenv("DASHSCOPE_API_KEY", "")
|
||
try:
|
||
result = run_clustering(case_id, eps, minSamples, dashscope_key=key)
|
||
except FileNotFoundError as e:
|
||
raise HTTPException(404, str(e))
|
||
return result
|
||
|
||
|
||
@app.get("/api/cases/{case_id}/ude/clusters")
|
||
async def get_clusters(case_id: str):
|
||
try:
|
||
with get_case_conn(case_id) as conn:
|
||
clusters = conn.execute(
|
||
"SELECT * FROM ude_clusters ORDER BY coverage DESC"
|
||
).fetchall()
|
||
except FileNotFoundError:
|
||
raise HTTPException(404, "案例不存在")
|
||
return [dict(r) for r in clusters]
|
||
|
||
|
||
@app.get("/api/cases/{case_id}/ude/coverage")
|
||
async def get_coverage(case_id: str):
|
||
from tools.ude_extract import run_coverage_scan
|
||
try:
|
||
result = run_coverage_scan(case_id)
|
||
except FileNotFoundError as e:
|
||
raise HTTPException(404, str(e))
|
||
return result
|
||
|
||
|
||
# ═══════════ 健康检查 ═══════════
|
||
|
||
@app.get("/api/health")
|
||
async def health():
|
||
from db import VOC_DATA_DIR, DATA_DIR
|
||
return {
|
||
"status": "ok",
|
||
"vocDataDir": str(VOC_DATA_DIR),
|
||
"vocDataExists": VOC_DATA_DIR.exists(),
|
||
"caseDataDir": str(DATA_DIR),
|
||
}
|
||
|
||
|
||
# ═══════════ 启动 ═══════════
|
||
|
||
if __name__ == "__main__":
|
||
import uvicorn
|
||
port = int(os.getenv("PORT", "8093"))
|
||
uvicorn.run(app, host="0.0.0.0", port=port)
|