Spaces:

wxy01giser
/

egisinsight

Running

File size: 31,099 Bytes

# main.py
import gradio as gr
from analyzer import analyze_teacher_dashboard
from cluster_insight import cluster_and_visualize
from qwen_api import call_qwen
import pandas as pd
import re
import tempfile  
import os
import base64
import threading
import plotly.graph_objects as go


# 环境判断：本地运行（DEBUG=True）用纯 Gradio，线上用 FastAPI+Gradio 整合
DEBUG = os.getenv("HF_SPACE_REPO_ID") is None  # HF 环境会自动设置该环境变量
print(f"DEBUG是{DEBUG}")

# ================== 全局配置（关键！统一URL和路径）==================
PLUGIN_DOMAIN = "https://egisinsight.top"  # 你的域名（无端口，若用默认443端口）
PLUGIN_PORT = 7860  # 你的插件部署端口    
IMAGE_SAVE_DIR = "./temp_images"  # 图片保存目录（绝对路径更稳妥）
# 拼接完整的图片访问前缀（含端口和路径分隔符）
IMAGE_BASE_URL = f"{PLUGIN_DOMAIN}/temp_images/" # :{PLUGIN_PORT}

# 确保图片目录存在（递归创建，避免目录不存在报错）
os.makedirs(IMAGE_SAVE_DIR, exist_ok=True)
print(f"✅ 图片保存目录：{os.path.abspath(IMAGE_SAVE_DIR)}")
print(f"✅ 图片访问前缀：{IMAGE_BASE_URL}")


# ================== LLM 教学建议 ==================
def generate_teaching_advice1(sankey_fig, cluster_stats):
    prompt = """
你是一名GIS实验教学专家，基于以下分析结果，生成教学优化方案：
【桑基图分析】
- 学生反馈从 s1→s4 的主要流向：核密度 → 参数设置 → 应用场景
- 最粗路径：核密度分析 → 搜索半径选择 → 城市规划应用
【聚类分析】
"""
    for s in cluster_stats[:3]:
        prompt += f"- 聚类 {s['cluster_id']}：{s['keyword']}（{s['size']}条，占{s['ratio']:.1%}）\n"
        prompt += f"  代表句：{s['rep_sentence'][:100]}\n"

    prompt += """
【要求】
1. 诊断核心教学痛点（3条）
2. 提出针对性优化措施（微课/演示/作业）
3. 设计 1 个 2 分钟微课脚本（标题+3步演示）
4. 建议 1 个课后作业（验证学生掌握）

【输出格式】
# 教学优化方案
## 1. 核心痛点
## 2. 优化措施
## 3. 微课脚本
## 4. 课后作业
"""
    advice = call_qwen(prompt)
    return f"<pre style='background:#f8f9fa; padding:20px; border-radius:12px; white-space: pre-wrap; font-family: Microsoft YaHei; line-height:1.6;'>{advice}</pre>"

# ================== LLM 教学建议---测试版 ==================
def generate_teaching_advice(sankey_fig, cluster_stats):
    # 临时替换LLM调用（30秒内必完成）
    prompt = """
你是一名GIS实验教学专家，基于以下分析结果，生成教学优化方案：
【桑基图分析】
- 学生反馈从 s1→s4 的主要流向：核密度 → 参数设置 → 应用场景
- 最粗路径：核密度分析 → 搜索半径选择 → 城市规划应用
【聚类分析】
"""
    for s in cluster_stats[:3]:
        prompt += f"- 聚类 {s['cluster_id']}：{s['keyword']}（{s['size']}条，占{s['ratio']:.1%}）\n"
        prompt += f"  代表句：{s['rep_sentence'][:100]}\n"

    prompt += """
【要求】
1. 诊断核心教学痛点（3条）
2. 提出针对性优化措施（微课/演示/作业）
3. 设计 1 个 2 分钟微课脚本（标题+3步演示）
4. 建议 1 个课后作业（验证学生掌握）
【输出格式】
# 教学优化方案
## 1. 核心痛点
## 2. 优化措施
## 3. 微课脚本
## 4. 课后作业
"""
    return prompt, f"<pre style='background:#f8f9fa; padding:20px; border-radius:12px; white-space: pre-wrap; font-family: Microsoft YaHei; line-height:1.6;'>{prompt}</pre>"

    # ================== 主分析函数 ==================
def analyze_report(excel_path):
    if not excel_path:
        return "", "请上传 Excel 文件", None, None, None, "", ""

    try:
        sankey_fig, sb64 = analyze_teacher_dashboard(excel_path=excel_path)
        cluster_fig, cb64, cluster_stats  = cluster_and_visualize(excel_path=excel_path)
        prompt, msgs = generate_teaching_advice(sankey_fig, cluster_stats)
        print(f"{sankey_fig}和{sb64}生成成功！")
        print(f"{cluster_fig}和{cb64}生成成功！")
        
        # 统计表格
        stats_table = """
            <h3 style="color:#1976d2; text-align:center; margin:40px 0 15px;">聚类主题统计</h3>
            <table border="1" style="width:100%; max-width:900px; margin:0 auto; border-collapse: collapse; text-align:center; font-size:14px;">
                <tr style="background:#f0f0f0;"><th>聚类</th><th>主题关键词</th><th>反馈数</th><th>占比</th><th>代表句</th></tr>
        """
        for s in cluster_stats:
            stats_table += f"""
                <tr>
                    <td>{s['cluster_id']}</td>
                    <td><strong>{s['keyword']}</strong></td>
                    <td>{s['size']}</td>
                    <td>{s['ratio']:.1%}</td>
                    <td style="text-align:left; max-width:400px;">{s['rep_sentence'][:80]}...</td>
                </tr>
            """.format(s=s)
        stats_table += "</table>"

        # 最终报告（只放文字部分）
        html_report = f"""
            <div style="font-family:'Microsoft YaHei',sans-serif; max-width:1000px; margin:40px auto; padding:20px;">
                <h1 style="text-align:center; color:#1e88e5;">EGISInsight</h1>
                <p style="text-align:center; color:#555; font-size:17px;">GIS 教学智能体 · 循证教学优化</p>
                <hr style="border:1px solid #eee; margin:30px 0;">
                {stats_table}
                <div style="padding:25px; background:#f8f9fa; border-radius:12px; margin-top:30px;">
                    {msgs}
                </div>
                <p style="text-align:center; color:#999; margin-top:50px; font-size:13px;">
                    EGISInsight © 2025 | 从数据到教学内容改革
                </p>
            </div>
        """
        return prompt, html_report, sankey_fig, cluster_fig, cluster_stats, sb64, cb64

    except Exception as e:
        # 问题1修复：异常逻辑也返回6个值（补全空字符串）
        error_html = f"<p style='color:red; text-align:center;'>分析失败：{str(e)}</p>"
        return "", error_html, None, None, None, "", ""

def analyze_report_plugin(excel_path):
    if not excel_path:
        return "", "请上传 Excel 文件", None, "", ""

    try:
        sankey_fig, sb64 = analyze_teacher_dashboard(excel_path=excel_path)
        cluster_fig, cb64, cluster_stats  = cluster_and_visualize(excel_path=excel_path)
        prompt, msgs = generate_teaching_advice(sankey_fig, cluster_stats)
        print(f"{sankey_fig}和{sb64}生成成功！")
        print(f"{cluster_fig}和{cb64}生成成功！")
        
        # 统计表格
        stats_table = """
            <h3 style="color:#1976d2; text-align:center; margin:40px 0 15px;">聚类主题统计</h3>
            <table border="1" style="width:100%; max-width:900px; margin:0 auto; border-collapse: collapse; text-align:center; font-size:14px;">
                <tr style="background:#f0f0f0;"><th>聚类</th><th>主题关键词</th><th>反馈数</th><th>占比</th><th>代表句</th></tr>
        """
        for s in cluster_stats:
            stats_table += f"""
                <tr>
                    <td>{s['cluster_id']}</td>
                    <td><strong>{s['keyword']}</strong></td>
                    <td>{s['size']}</td>
                    <td>{s['ratio']:.1%}</td>
                    <td style="text-align:left; max-width:400px;">{s['rep_sentence'][:80]}...</td>
                </tr>
            """.format(s=s)
        stats_table += "</table>"

        # 最终报告（只放文字部分）
        html_report = f"""
            <div style="font-family:'Microsoft YaHei',sans-serif; max-width:1000px; margin:40px auto; padding:20px;">
                <h1 style="text-align:center; color:#1e88e5;">EGISInsight</h1>
                <p style="text-align:center; color:#555; font-size:17px;">GIS 教学智能体 · 循证教学优化</p>
                <hr style="border:1px solid #eee; margin:30px 0;">
                {stats_table}
                <div style="padding:25px; background:#f8f9fa; border-radius:12px; margin-top:30px;">
                    {msgs}
                </div>
                <p style="text-align:center; color:#999; margin-top:50px; font-size:13px;">
                    EGISInsight © 2025 | 从数据到教学内容改革
                </p>
            </div>
        """
        return prompt, html_report, cluster_stats, sb64, cb64

    except Exception as e:
        # 问题1修复：异常逻辑也返回6个值（补全空字符串）
        error_html = f"<p style='color:red; text-align:center;'>分析失败：{str(e)}</p>"
        return "", error_html, None, "", ""


# 核心工具函数：把平台传递的纯文本 → DataFrame（不变）
def parse_text_to_df(raw_text: str):
    """
    步骤：
    1. 按制表符\t分割所有单元格（你的原有逻辑）
    2. 过滤空单元格，得到纯数据列表
    3. 按每行5列重新分组，不足5列的补None
    4. 手动添加表头，生成DataFrame
    """
    # 1. 清理原始文本（避免多余空字符干扰）
    cleaned_text = re.sub(r'\n+', '', raw_text.strip())  # 去掉所有换行（因为你按\t分割，换行没用）
    cleaned_text = re.sub(r'\t+', '\t', cleaned_text)    # 合并连续制表符
    cleaned_text = re.sub(r'^\t|\t$', '', cleaned_text)  # 去掉首尾制表符

    # 2. 按制表符分割，过滤空单元格（你的原有逻辑，稍作优化）
    lines = []
    for cell in cleaned_text.split('\t'):
        cell_stripped = cell.strip()
        # 跳过空单元格（仅保留有内容的）
        if cell_stripped or cell_stripped == '0':  # 避免把"0"当成空（如果有编号为0的情况）
            lines.append(cell_stripped)
        else:
            lines.append(None)  # 空单元格用None填充，方便后续分组

    # 关键修改：去掉第一个元素（sheet名，如ex01）
    if len(lines) > 0:
        lines = lines[1:]  # 切片：保留从索引1开始的所有元素，去掉索引0（第一个元素）
        print(f"已去掉sheet名，剩余元素数量：{len(lines)}")
    else:
        raise Exception("分割后无有效数据，无法解析")

    # 关键修改2：再去掉第一个元素（原始表头 no/s1/s2/s3/s4）
    if len(lines) > 5 and lines[0] == 'no':  # 确认第一个元素是表头，再删除
        lines = lines[5:]
    else:
        # 若没有原始表头（极端情况），直接继续（后续用我们的固定表头）
        pass
    # 3. 核心：按每行5列重新分组（关键逻辑）
    header = ['no', 's1', 's2', 's3', 's4']  # 固定表头
    data_rows = []  # 存储分组后的每行数据

    # 计算需要分多少组（向上取整，避免遗漏最后几个元素）
    total_cells = len(lines[1:])
    total_rows = (total_cells + 4) // 5  # 向上取整（比如11个元素→3行：5+5+1）

    # 按5个元素一组拆分，不足5列的补None
    for i in range(total_rows):
        # 截取当前行的5个元素（左闭右开区间）
        start_idx = i * 5
        end_idx = start_idx + 5
        row = lines[start_idx:end_idx]
        # 不足5列的补None（确保每行都是5列）
        while len(row) < 5:
            row.append(None)
        data_rows.append(row)

    # 4. 生成DataFrame，清理无效数据
    df = pd.DataFrame(data_rows, columns=header)
    # 清理：去掉no列为空的行、s1-s4全为空的行
    df = df.dropna(subset=['no'], how='any')
    df = df.dropna(subset=['s1', 's2', 's3', 's4'], how='all')

    return df

# ================== 完美兼容版界面（老版本也能居中 + 控制高度）==================
def create_demo():
    with gr.Blocks(theme=gr.themes.Soft(), title="EGISInsight") as demo:
        gr.Markdown("# GIS实验报告智能分析系统")
        gr.Markdown("**上传学生反馈 Excel → 1秒生成教学决策图 + AI教案**")
    
        file_input = gr.File(label="上传 ex02.xlsx（需含 s1-s4 列）", file_types=[".xlsx"])
    
        # 标题 + 桑基图
        gr.HTML('<h2 style="text-align:center; color:#1976d2; margin:40px 0 10px;">1. 学生反馈流向分析（交互桑基图）</h2>')
        sankey_plot = gr.Plot(elem_id="sankey-plot")
    
        # 标题 + 聚类图
        gr.HTML('<h2 style="text-align:center; color:#388e3c; margin:50px 0 10px;">2. 学生反馈主题聚类可视化</h2>')
        cluster_plot = gr.Plot(elem_id="cluster-plot")
    
        # 新增：接收 prompt 的组件（如果需要显示，用 gr.Textbox；不需要则隐藏）
        prompt_output = gr.Textbox(visible=True, label="提示词",
                                   lines=5,          # 核心：设置默认显示行数（越大高度越高）
                                   max_lines=10,     # 滚动前最大可显示行数
                                   # size="lg",        # 宽度：sm（小）、md（中，默认）、lg（大）、xl（超大）
                                   elem_id="prompt-textbox", 
                                   # font_size=16,     # 字体大小（可选，默认14px）
                                   interactive=False # 若只是展示，设为不可编辑（避免误操作）
                                  )  # 第1个返回值
    
        # 文字报告（第2个返回值）
        html_report = gr.HTML()
    
        # 聚类统计数据（第5个返回值，对应 cluster_stats）
        stats_json = gr.JSON(visible=False)
    
        # 新增：接收 sb64 和 cb64 的组件（如果不需要显示，隐藏）
        sankey_b64 = gr.Textbox(visible=False, label="桑基图base64")  # 第6个返回值
        cluster_b64 = gr.Textbox(visible=False, label="聚类图base64")  # 第7个返回值
    
        # CSS 样式（保持不变）
        demo.load(
            None,
            None,
            None,
            js="""
            () => {
                const style = document.createElement('style');
                style.innerHTML = `
                    #sankey-plot, #cluster-plot {
                        height: 560px !important;
                        width: 100% !important;
                        max-width: 1100px !important;
                        margin: 0 auto !important;
                        display: block !important;
                    }
                    #sankey-plot > div, #cluster-plot > div {
                        height: 100% !important;
                    }
                     /* 新增：控制提示词文本框宽度和样式 */
                    /* 提示词文本框样式（完全用 CSS 控制，兼容低版本） */
                    #prompt-textbox {
                    width: 100% !important;
                    max-width: 1100px !important;  /* 和图表同宽，居中显示 */
                    margin: 0 auto 30px !important; /* 居中 + 底部间距 */
                    font-size: 17px !important;    /* 字体大小（核心需求） */
                    line-height: 1.6 !important;   /* 行间距，更易读 */
                    padding: 15px !important;      /* 内边距，不拥挤 */
                    border-radius: 8px !important; /* 圆角，视觉更友好 */
                    border: 1px solid #eee !important; /* 边框，区分区域 */
                    }

                    /* 文本框标签样式（可选，让标签也变大） */
                    #prompt-textbox + label {
                    font-size: 18px !important;
                    font-weight: 600 !important;
                    color: #1976d2 !important;
                    margin-bottom: 10px !important;
                    display: block !important;
                    text-align: center !important;
                    }
                `;
                document.head.appendChild(style);
            }
            """
        )
    
        # 关键修复：outputs 数量=7，与函数返回值顺序一一对应
        file_input.change(
            fn=analyze_report,
            inputs=file_input,
            outputs=[
                prompt_output,        # 1. prompt
                html_report,          # 2. html_report
                sankey_plot,          # 3. sankey_fig
                cluster_plot,         # 4. cluster_fig
                stats_json,           # 5. cluster_stats
                sankey_b64,           # 6. sb64
                cluster_b64           # 7. cb64
            ]
        )
    
        gr.Markdown("---")
        gr.Markdown("<p style='text-align:center; color:#666;'>通义千问大模型实时生成教学优化方案</p>")
    return demo


def create_demo_plugin():
    with gr.Blocks(theme=gr.themes.Soft(), title="EGISInsight") as demo:
        gr.Markdown("# GIS实验报告智能分析系统")
        gr.Markdown("**上传学生反馈 Excel → 1秒生成教学决策图 + AI教案**")
    
        file_input = gr.File(label="上传 ex02.xlsx（需含 s1-s4 列）", file_types=[".xlsx"])
    
        # 标题 + 桑基图（完美居中 + 固定高度）
        gr.HTML('<h2 style="text-align:center; color:#1976d2; margin:40px 0 10px;">1. 学生反馈流向分析（交互桑基图）</h2>')
        sankey_plot = gr.Plot(elem_id="sankey-plot")          # 去掉 height，加 elem_id
    
        # 标题 + 聚类图
        gr.HTML('<h2 style="text-align:center; color:#388e3c; margin:50px 0 10px;">2. 学生反馈主题聚类可视化</h2>')
        cluster_plot = gr.Plot(elem_id="cluster-plot")        # 去掉 height，加 elem_id
    
        # 文字报告
        html_report = gr.HTML()
    
        # 隐藏统计
        stats_json = gr.JSON(visible=False)
    
        # 关键：加一段 CSS 强制高度和居中
        demo.load(
            None,
            None,
            None,
            js="""
            () => {
                const style = document.createElement('style');
                style.innerHTML = `
                    #sankey-plot, #cluster-plot {
                        height: 560px !important;
                        width: 100% !important;
                        max-width: 1100px !important;
                        margin: 0 auto !important;
                        display: block !important;
                    }
                    #sankey-plot > div, #cluster-plot > div {
                        height: 100% !important;
                    }
                `;
                document.head.appendChild(style);
            }
            """
        )
    
        file_input.change(
            fn=analyze_report,
            inputs=file_input,
            outputs=[html_report, sankey_plot, cluster_plot, stats_json]
        )
    
        gr.Markdown("---")
        gr.Markdown("<p style='text-align:center; color:#666;'>通义千问大模型实时生成教学优化方案</p>")
    return demo

# if __name__ == "__main__":
#    demo.launch(server_name="0.0.0.0", share=True)

# ================== 大赛平台专用 API（最终修正版）==================
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import FileResponse
import uvicorn
from io import BytesIO

app = FastAPI()
# 配置临时图片目录（确保存在）
IMAGE_DIR = "./temp_images"
os.makedirs(IMAGE_DIR, exist_ok=True)

# ================== 1. 图片访问接口（修复路径匹配）==================
@app.get("/temp_images/{img_name}")
async def get_temp_image(img_name: str):
    """通过 URL 访问图片（确保路径正确）"""
    # 用绝对路径查找图片（避免相对路径混乱）
    img_path = os.path.abspath(os.path.join(IMAGE_SAVE_DIR, img_name))
    print(f"🔍 正在查找图片：{img_path}")
    
    # 检查图片是否存在
    if not os.path.exists(img_path):
        print(f"❌ 图片不存在：{img_path}")
        return Response(status_code=404, content="图片不存在")
    
    # 检查文件是否是有效图片（避免目录遍历攻击）
    if not img_name.endswith((".png", ".jpg", ".jpeg")):
        return Response(status_code=403, content="无效的图片格式")
    
    # 返回图片文件
    return FileResponse(img_path, media_type="image/png")

# ================== 2. 辅助函数：base64转图片并生成URL（修复URL拼接）==================
def base64_to_public_url(base64_str: str, img_name: str) -> str:
    """
    功能：将base64字符串保存为图片，返回可公网访问的URL
    修复：正确拼接端口和路径分隔符
    """
    if not base64_str:
        print("⚠️  空的base64字符串，跳过图片生成")
        return ""
    
    try:
        # 去掉base64前缀（如果有）
        if base64_str.startswith("data:image/png;base64,"):
            base64_str = base64_str.split(",")[1]
        
        # 解码base64（处理可能的空格/换行）
        # base64_str = base64_str.strip().replace("\n", "").replace(" ", "")
        # img_data = base64.b64decode(base64_str)
        processed = (
        base64_str.strip().replace("\n", "").replace("\r", "").replace(" ", "").replace("-", "+").replace("_", "/") )  # 兼容 URL 安全 Base64
        # 补全 Base64 填充符（避免 Incorrect padding 错误）
        padding = 4 - (len(processed) % 4)
        if padding != 4:
            processed += "=" * padding
        img_data = base64.b64decode(processed)
        
        # 保存图片到绝对路径
        img_path = os.path.abspath(os.path.join(IMAGE_SAVE_DIR, img_name))
        with open(img_path, "wb") as f:
            f.write(img_data)
        print(f"✅ 图片保存成功：{img_path}（大小：{len(img_data)/1024:.1f}KB）")
        
        # 生成正确的公网URL（含端口和路径分隔符）
        public_url = f"{IMAGE_BASE_URL}{img_name}"
        print(f"✅ 图片公网URL：{public_url}")
        return public_url
    
    except Exception as e:
        print(f"❌ 图片生成失败：{str(e)}")
        return ""


# ================== 桑基图转公网URL函数（无需传参，读取全局配置）==================
def plotly_sankey_to_url(fig: go.Figure,IMG_N: str) -> str:
    """
    无需传参！读取全局配置，将Plotly桑基图转公网可访问URL
    自动处理：中文字体修复、图片保存、URL生成
    参数：
        fig: Plotly桑基图对象（go.Figure）
    返回：
        成功：公网URL字符串；失败：None
    """
    # 1. 校验输入
    if not isinstance(fig, go.Figure):
        print(f"❌ 输入不是Plotly Figure对象（类型：{type(fig)}）")
        return None

    try:
        # 2. 生成唯一图片名（防重复，用时间戳+固定前缀）
        import time
        timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
        img_name = f"{IMG_N}_{timestamp}.png"  # 示例：sankey_gis_20251123153020.png

        # 3. 强制设置中文字体（解决中文方块问题）
        fig.update_layout(
            title=dict(
                text=fig.layout.title.text if fig.layout.title else "GIS实践教学改革方向捕捉",
                font=dict(family="SimHei", size=22)
            ),
            font=dict(family="SimHei", size=18),  # 全局字体
            autosize=False,
            width=1186,  # 匹配日志中的宽度
            height=798   # 匹配日志中的高度
        )
        # 单独设置桑基图节点标签字体（防止节点中文失效）
        for trace in fig.data:
            if hasattr(trace, "node"):
                trace.node.font = dict(family="SimHei", size=18)

        # 4. 保存图片（使用全局配置的目录）
        img_path = os.path.abspath(os.path.join(IMAGE_SAVE_DIR, img_name))  # 转绝对路径更稳妥
        fig.write_image(img_path, engine="kaleido")
        print(f"✅ 图片保存成功：{img_path}（大小：{os.path.getsize(img_path)/1024:.1f}KB）")

        # 5. 生成公网URL（读取全局配置的前缀，处理 '/' 拼接）
        public_url = f"{IMAGE_BASE_URL.rstrip('/')}/{img_name.lstrip('/')}"
        print(f"✅ 图片公网URL：{public_url}")
        return public_url

    except Exception as e:
        print(f"❌ 桑基图转URL失败：{str(e)}")
        # 针对常见错误给出提示
        if "font" in str(e).lower():
            print(f"📌 建议：尝试更换中文字体为 'WenQuanYi Zen Hei'（适配Linux环境）")
        elif "engine" in str(e).lower():
            print(f"📌 建议：将 engine 改为 'orca'，并执行 conda install -c plotly plotly-orca 安装")
        return None
        
@app.post("/api/plugin")
async def plugin_api(
    file: str = Form(...),
    token: str = Form(...),
    timestamp: str = Form(...),
    signature: str = Form(...)
):
    try:
        # print(type(file), file)
        df = parse_text_to_df(file)
        print(f"解析出 {len(df)} 条有效反馈")

        # 关键：创建本地临时Excel文件（自动生成真实路径，用完删除）
        with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
            df.to_excel(temp_file, index=False)  # 写入临时文件
            temp_file_path = temp_file.name  # 拿到临时文件的真实路径（比如：C:\Users\XXX\AppData\Local\Temp\tmpxxxx.xlsx）
        # print(df)
        # 调用分析函数（传临时文件的真实路径）
        # prompt, html_report, stats, sb64, cb64 = analyze_report_plugin(temp_file_path)
        prompt, html_report, sankey_fig, cluster_fig, stats, sb64, cb64 = analyze_report(temp_file_path)

        # print(f"{sankey_fig}生成成功！")
        # print(f"{cluster_fig}生成成功！")
        
        # 用完删除临时文件（避免占用空间）
        os.remove(temp_file_path)
        print(f"临时文件已删除：{temp_file_path}")

        # 生成图片公网URL（关键修复：用正确的URL拼接）
        # sankey_url = base64_to_public_url(sb64, "sankey.png")
        # cluster_url = base64_to_public_url(cb64, "cluster.png")

        sankey_url = plotly_sankey_to_url(sankey_fig, "sankey")
        cluster_url = plotly_sankey_to_url(cluster_fig, "cluster")
        
        print(f"✅ 图片URL生成：桑基图={sankey_url}，聚类图={cluster_url}")

        # 按平台要求返回结果
        # 按平台要求返回结果（用 sb64/cb64 替换 to_json()）
        print(f"{sb64}生成成功！")
        print(f"{cb64}生成成功！")
        
         # 3. 格式化 stats 字段（严格匹配插件定义的子字段）
        formatted_stats = []
        if stats and isinstance(stats, list):  # 确保 stats 是数组
            for idx, s in enumerate(stats):
                # 严格匹配平台定义的5个字段：cluster_id、keyword、size、ratio、rep_sentence
                stat_item = {
                    "cluster_id": str(s.get("cluster_id", idx)),  # String 类型
                    "keyword": str(s.get("keyword", "无")),        # String 类型
                    "size": int(s.get("size", 0)),                # Number 类型
                    "ratio": f"{s.get('ratio', 0):.1%}" if s.get('ratio') is not None else "0.0%",  # String 类型（如 "30.5%"）
                    "rep_sentence": str(s.get("rep_sentence", "无")[:200])  # String 类型，截取200字
                }
                # 过滤空值（必填字段确保有默认值）
                for key in list(stat_item.keys()):
                    if stat_item[key] == "无" and key in ["cluster_id", "keyword", "rep_sentence"]:
                        stat_item[key] = "无数据"
                    elif stat_item[key] == 0 and key == "size":
                        stat_item[key] = 0
                formatted_stats.append(stat_item)
        else:
            formatted_stats = []
                
        return {
            "code": "200",  # 插件定义为 String 类型
            "message": "success",  # 插件定义为 String 类型
            "data": {
                "html_report": html_report.strip() if html_report else "",  # 对应 html_report 字段
                "sankey_fig": sankey_url,  # 对应 sankey_fig 字段（图片URL，String类型）
                "cluster_fig": cluster_url,  # 对应 cluster_fig 字段（图片URL，String类型）
                "stats": formatted_stats,  # 对应 stats 字段（Array/Object类型）
                "prompt":prompt
            }
        }
    except Exception as e:
        error_msg = f"分析失败：{str(e)}"
        print(error_msg)
        return {
            "code": 500,
            "message": error_msg,
            "data": {
                "images": [],
                "html_report": f"<p style='color:red; text-align:center; font-size:18px;'>{error_msg}</p>",
                "stats": [],
                "teaching_advice": ""
            }
        }

# ================== 以下是你的原有函数（不变，确保正常调用）==================
# LLM教学建议函数（generate_teaching_advice1 / generate_teaching_advice）
# 主分析函数（analyze_report）
# 数据解析函数（parse_text_to_df）
# 桑基图生成函数（plot_sankey_from_df）
# Gradio界面配置（demo）

# if DEBUG:
#     # 本地模式：仅启动 Gradio 服务（不用 uvicorn）
#     if __name__ == "__main__":
#         demo = create_demo()
#         demo.launch(server_name="0.0.0.0", share=True)
# else:
#     # 线上 HF 模式：启动 FastAPI+Gradio 整合服务（用 uvicorn）
#     demo = create_demo_plugin()
#     app = gr.mount_gradio_app(app, demo, path="/gradio")
#     if __name__ == "__main__":
        
#         print(f"🚀 服务启动：{PLUGIN_DOMAIN}:{PLUGIN_PORT}")
#         print(f"📌 API路径：{PLUGIN_DOMAIN}:{PLUGIN_PORT}/api/plugin")
#         print(f"🖼️  图片访问路径：{PLUGIN_DOMAIN}:{PLUGIN_PORT}/temp_images/")
#         uvicorn.run(app, host="0.0.0.0", port=PLUGIN_PORT)


# 保留原界面
demo = create_demo_plugin()
app = gr.mount_gradio_app(app, demo, path="/hidden-gradio")
if __name__ == "__main__":
    
    # 启动 Gradio 原型（端口 7860，仅本地/你自己访问）
    # demo = create_demo()
    # demo.launch(server_name="0.0.0.0", share=True)
    # gradio_thread = threading.Thread(
    #     target=lambda: demo.launch(
    #         server_name="0.0.0.0",
    #         server_port=7860,  # 非大赛端口，避免冲突
    #         share=False  # 若需要公网访问，可设为 True，但会生成独立链接
    #     ),
    #     daemon=True  # 主程序退出时自动关闭
    # )
    # gradio_thread.start()
    
    print(f"🚀 服务启动：{PLUGIN_DOMAIN}:{PLUGIN_PORT}")
    print(f"📌 API路径：{PLUGIN_DOMAIN}:{PLUGIN_PORT}/api/plugin")
    print(f"🖼️  图片访问路径：{PLUGIN_DOMAIN}:{PLUGIN_PORT}/temp_images/")
    uvicorn.run(app, host="0.0.0.0", port=PLUGIN_PORT)