chanpinhsd/backend/server.py

"""
黑手党提案 — 独立后端

FastAPI 服务，端口 8093。
数据来源：只读访问共享 VOC 数据层。
分析结果：存自己的案例 DB。
"""
import os
import logging

from fastapi import FastAPI, Header, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from dotenv import load_dotenv

load_dotenv()

from db import (
    get_case_conn, get_voc_conn, init_case_db,
    list_cases as _list_cases, list_voc_researches as _list_voc_researches,
)

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(levelname)s %(message)s")
logger = logging.getLogger("mafia")

app = FastAPI(title="黑手党提案后端", version="1.0.0", description="独立后端：共享 VOC 数据层 + 自有分析存储")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)


# ═══════════ Models ═══════════

class CreateCaseRequest(BaseModel):
    brandName: str
    category: str = ""
    focusProduct: str = ""
    competitors: str = "[]"
    vocResearchId: str = None


class LinkVocRequest(BaseModel):
    vocResearchId: str


# ═══════════ 案例管理 ═══════════

@app.post("/api/cases")
async def create_case(req: CreateCaseRequest):
    case_id = init_case_db(
        brand_name=req.brandName,
        category=req.category,
        focus_product=req.focusProduct,
        competitors=req.competitors,
        voc_research_id=req.vocResearchId,
    )
    return {"caseId": case_id}


@app.get("/api/cases")
async def get_cases():
    return _list_cases()


@app.get("/api/cases/{case_id}")
async def get_case(case_id: str):
    try:
        with get_case_conn(case_id) as conn:
            card = conn.execute("SELECT * FROM case_card LIMIT 1").fetchone()
            ude_count = conn.execute("SELECT count(*) FROM ude_sentences").fetchone()[0]
            cluster_count = conn.execute("SELECT count(*) FROM ude_clusters").fetchone()[0]
        if not card:
            raise HTTPException(404, "案例不存在")
        return {"caseId": case_id, **dict(card), "udeCount": ude_count, "clusterCount": cluster_count}
    except FileNotFoundError:
        raise HTTPException(404, "案例不存在")


@app.delete("/api/cases/{case_id}")
async def delete_case(case_id: str):
    from db import DATA_DIR
    path = DATA_DIR / f"{case_id}.db"
    if path.exists():
        path.unlink()
        return {"deleted": True}
    raise HTTPException(404, "案例不存在")


# ═══════════ VOC 关联 ═══════════

@app.post("/api/cases/{case_id}/link-voc")
async def link_voc(case_id: str, req: LinkVocRequest):
    """关联 VOC 研究 ID（验证 VOC 研究存在后再写入）"""
    try:
        with get_voc_conn(req.vocResearchId) as voc:
            count = voc.execute(
                "SELECT count(*) FROM comments WHERE length(text) > 10 "
            ).fetchone()[0]
    except FileNotFoundError as e:
        raise HTTPException(404, str(e))

    try:
        with get_case_conn(case_id) as conn:
            conn.execute("UPDATE case_card SET voc_research_id = ?", (req.vocResearchId,))
            conn.commit()
    except FileNotFoundError:
        raise HTTPException(404, "案例不存在")

    return {"linked": True, "vocCommentCount": count}


@app.get("/api/voc/researches")
async def get_voc_researches():
    return _list_voc_researches()


@app.get("/api/cases/{case_id}/voc-comments")
async def get_voc_comments(case_id: str, page: int = 1, pageSize: int = 50):
    """从共享 VOC 数据层只读获取原始评论"""
    try:
        with get_case_conn(case_id) as conn:
            card = conn.execute("SELECT voc_research_id FROM case_card LIMIT 1").fetchone()
    except FileNotFoundError:
        raise HTTPException(404, "案例不存在")

    if not card or not card["voc_research_id"]:
        raise HTTPException(400, "未关联 VOC 研究")

    try:
        with get_voc_conn(card["voc_research_id"]) as voc:
            total = voc.execute(
                "SELECT count(*) FROM comments WHERE length(text) > 10 "
            ).fetchone()[0]
            rows = voc.execute("""
                SELECT id, platform, text, like_count, published_at
                FROM comments WHERE length(text) > 10
                ORDER BY like_count DESC
                LIMIT ? OFFSET ?
            """, (pageSize, (page - 1) * pageSize)).fetchall()
    except FileNotFoundError as e:
        raise HTTPException(404, str(e))

    return {"total": total, "page": page, "items": [dict(r) for r in rows]}


# ═══════════ UDE 分析 ═══════════

@app.post("/api/cases/{case_id}/ude/extract")
async def extract_ude(case_id: str, limit: int = Query(0)):
    from tools.ude_extract import run_ude_extraction
    try:
        result = await run_ude_extraction(case_id, limit)
    except FileNotFoundError as e:
        raise HTTPException(404, str(e))
    return result


@app.post("/api/cases/{case_id}/ude/cluster")
async def cluster_ude(
    case_id: str,
    eps: float = Query(0.25),
    minSamples: int = Query(3),
    x_dashscope_key: str = Header(None),
):
    from tools.ude_extract import run_clustering
    key = x_dashscope_key or os.getenv("DASHSCOPE_API_KEY", "")
    try:
        result = run_clustering(case_id, eps, minSamples, dashscope_key=key)
    except FileNotFoundError as e:
        raise HTTPException(404, str(e))
    return result


@app.get("/api/cases/{case_id}/ude/clusters")
async def get_clusters(case_id: str):
    try:
        with get_case_conn(case_id) as conn:
            clusters = conn.execute(
                "SELECT * FROM ude_clusters ORDER BY coverage DESC"
            ).fetchall()
    except FileNotFoundError:
        raise HTTPException(404, "案例不存在")
    return [dict(r) for r in clusters]


@app.get("/api/cases/{case_id}/ude/coverage")
async def get_coverage(case_id: str):
    from tools.ude_extract import run_coverage_scan
    try:
        result = run_coverage_scan(case_id)
    except FileNotFoundError as e:
        raise HTTPException(404, str(e))
    return result


# ═══════════ 健康检查 ═══════════

@app.get("/api/health")
async def health():
    from db import VOC_DATA_DIR, DATA_DIR
    return {
        "status": "ok",
        "vocDataDir": str(VOC_DATA_DIR),
        "vocDataExists": VOC_DATA_DIR.exists(),
        "caseDataDir": str(DATA_DIR),
    }


# ═══════════ 启动 ═══════════

if __name__ == "__main__":
    import uvicorn
    port = int(os.getenv("PORT", "8093"))
    uvicorn.run(app, host="0.0.0.0", port=port)