Spaces:

wxy01giser
/

egisinsight

Running

File size: 13,136 Bytes

38ebcd8
 
 
 
 
8a7699c
 
84a332c
bb70ab8
dfa50d5
9ebd152
73dea88
38ebcd8
 
 
 
 
 
dfa50d5
6c4626c
38ebcd8
6c4626c
38ebcd8
73dea88
 
 
 
 
 
 
 
 
 
 
 
 
38ebcd8
63d7752
38ebcd8
9ebd152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84a332c
 
bd56ab7
38ebcd8
 
16bdc7c
1c242d9
e324c0f
19d4c11
 
 
dfa50d5
38ebcd8
63d7752
 
 
73dea88
38ebcd8
 
73dea88
 
 
 
 
dfa50d5
73dea88
63d7752
38ebcd8
 
63d7752
 
 
73dea88
63d7752
 
 
 
73dea88
38ebcd8
63d7752
73dea88
 
 
 
16bdc7c
38ebcd8
73dea88
2a62501
 
 
38ebcd8
8a7699c
 
62d447f
 
 
 
 
 
 
 
 
 
 
 
8a7699c
62d447f
8a7699c
62d447f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a7699c
 
de3f546
63d7752
38ebcd8
63d7752
38ebcd8
63d7752
 
de3f546
63d7752
de3f546
63d7752
de3f546
63d7752
de3f546
63d7752
de3f546
63d7752
38ebcd8
de3f546
aabffe4
73dea88
de3f546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73dea88
 
 
e4d7d00
73dea88
38ebcd8
 
de3f546
38ebcd8
7048c23
 
 
71b16cf
 
7048c23
074add4
7048c23
 
 
9fdc42d
56a0087
84a332c
9fdc42d
 
 
56a0087
074add4
491e0be
62d447f
 
84a332c
 
 
 
 
d10ba8f
84a332c
16bdc7c
84a332c
db02b31
 
 
84a332c
 
 
8a7699c
 
16bdc7c
491e0be
 
074add4
 
 
 
8a7699c
16bdc7c
 
 
19d4c11
074add4
 
 
8a7699c
 
 
 
 
 
 
 
 
 
 
 
7048c23
 
 
 
38ebcd8
7048c23

# main.py
import gradio as gr
from analyzer import analyze_teacher_dashboard
from cluster_insight import cluster_and_visualize
from qwen_api import call_qwen
import pandas as pd
import re
import tempfile  
import os
# ================== LLM 教学建议 ==================
def generate_teaching_advice1(sankey_fig, cluster_stats):
    prompt = """
你是一名GIS实验教学专家，基于以下分析结果，生成教学优化方案：
【桑基图分析】
- 学生反馈从 s1→s4 的主要流向：核密度 → 参数设置 → 应用场景
- 最粗路径：核密度分析 → 搜索半径选择 → 城市规划应用
【聚类分析】
"""
    for s in cluster_stats[:3]:
        prompt += f"- 聚类 {s['cluster_id']}：{s['keyword']}（{s['size']}条，占{s['ratio']:.1%}）\n"
        prompt += f"  代表句：{s['rep_sentence'][:100]}\n"

    prompt += """
【要求】
1. 诊断核心教学痛点（3条）
2. 提出针对性优化措施（微课/演示/作业）
3. 设计 1 个 2 分钟微课脚本（标题+3步演示）
4. 建议 1 个课后作业（验证学生掌握）

【输出格式】
# 教学优化方案
## 1. 核心痛点
## 2. 优化措施
## 3. 微课脚本
## 4. 课后作业
"""
    advice = call_qwen(prompt)
    return f"<pre style='background:#f8f9fa; padding:20px; border-radius:12px; white-space: pre-wrap; font-family: Microsoft YaHei; line-height:1.6;'>{advice}</pre>"

# ================== LLM 教学建议---测试版 ==================
def generate_teaching_advice(sankey_fig, cluster_stats):
    # 临时替换LLM调用（30秒内必完成）
    advice = """
    # 教学优化方案
    ## 1. 核心痛点
    1. 核密度分析的搜索半径参数设置缺乏实操指导
    2. 空间连接功能的应用场景与实操步骤脱节
    3. 栅格计算器的公式编写逻辑讲解不清晰
    
    ## 2. 优化措施
    1. 制作参数设置微课，结合案例演示不同场景下的取值标准
    2. 增加空间连接功能的分步实操视频，配套场景化习题
    3. 提供栅格计算器常用公式模板，附详细注释
    
    ## 3. 微课脚本
    ### 标题：3分钟掌握核密度分析搜索半径设置
    1. （0-30秒）明确搜索半径的核心作用：影响密度场平滑度
    2. （30-90秒）演示城市POI数据的半径设置（500米）：工具位置→参数面板→取值依据
    3. （90-180秒）对比不同半径效果（300米/500米/1000米），总结取值规律
    
    ## 4. 课后作业
    基于提供的城市餐饮POI数据，分别设置300米、500米、1000米搜索半径，生成3张核密度图，分析不同半径对结果的影响并提交报告
    """
    return f"<pre style='background:#f8f9fa; padding:20px; border-radius:12px; white-space: pre-wrap; font-family: Microsoft YaHei; line-height:1.6;'>{advice}</pre>"

    # ================== 主分析函数 ==================
def analyze_report(excel_path):
    if not excel_path:
        return "请上传 Excel 文件", None, None, None, "", ""

    try:
        sankey_fig, sb64 = analyze_teacher_dashboard(excel_path=excel_path)
        cluster_fig, cb64, cluster_stats  = cluster_and_visualize(excel_path=excel_path)
        advice = generate_teaching_advice(sankey_fig, cluster_stats)
        print(f"{sankey_fig}和{sb64}生成成功！")
        print(f"{cluster_fig}和{cb64}生成成功！")
        
        # 统计表格
        stats_table = """
            <h3 style="color:#1976d2; text-align:center; margin:40px 0 15px;">聚类主题统计</h3>
            <table border="1" style="width:100%; max-width:900px; margin:0 auto; border-collapse: collapse; text-align:center; font-size:14px;">
                <tr style="background:#f0f0f0;"><th>聚类</th><th>主题关键词</th><th>反馈数</th><th>占比</th><th>代表句</th></tr>
        """
        for s in cluster_stats:
            stats_table += f"""
                <tr>
                    <td>{s['cluster_id']}</td>
                    <td><strong>{s['keyword']}</strong></td>
                    <td>{s['size']}</td>
                    <td>{s['ratio']:.1%}</td>
                    <td style="text-align:left; max-width:400px;">{s['rep_sentence'][:80]}...</td>
                </tr>
            """.format(s=s)
        stats_table += "</table>"

        # 最终报告（只放文字部分）
        html_report = f"""
            <div style="font-family:'Microsoft YaHei',sans-serif; max-width:1000px; margin:40px auto; padding:20px;">
                <h1 style="text-align:center; color:#1e88e5;">EGISInsight</h1>
                <p style="text-align:center; color:#555; font-size:17px;">GIS 教学智能体 · 循证教学优化</p>
                <hr style="border:1px solid #eee; margin:30px 0;">
                {stats_table}
                <div style="padding:25px; background:#f8f9fa; border-radius:12px; margin-top:30px;">
                    {advice}
                </div>
                <p style="text-align:center; color:#999; margin-top:50px; font-size:13px;">
                    EGISInsight © 2025 | 从数据到教学内容改革
                </p>
            </div>
        """
        return html_report, sankey_fig, cluster_fig, cluster_stats, sb64, cb64

    except Exception as e:
        # 问题1修复：异常逻辑也返回6个值（补全空字符串）
        error_html = f"<p style='color:red; text-align:center;'>分析失败：{str(e)}</p>"
        return error_html, None, None, None, "", ""


# 核心工具函数：把平台传递的纯文本 → DataFrame（不变）
def parse_text_to_df(raw_text: str):
    """
    步骤：
    1. 按制表符\t分割所有单元格（你的原有逻辑）
    2. 过滤空单元格，得到纯数据列表
    3. 按每行5列重新分组，不足5列的补None
    4. 手动添加表头，生成DataFrame
    """
    # 1. 清理原始文本（避免多余空字符干扰）
    cleaned_text = re.sub(r'\n+', '', raw_text.strip())  # 去掉所有换行（因为你按\t分割，换行没用）
    cleaned_text = re.sub(r'\t+', '\t', cleaned_text)    # 合并连续制表符
    cleaned_text = re.sub(r'^\t|\t$', '', cleaned_text)  # 去掉首尾制表符

    # 2. 按制表符分割，过滤空单元格（你的原有逻辑，稍作优化）
    lines = []
    for cell in cleaned_text.split('\t'):
        cell_stripped = cell.strip()
        # 跳过空单元格（仅保留有内容的）
        if cell_stripped or cell_stripped == '0':  # 避免把"0"当成空（如果有编号为0的情况）
            lines.append(cell_stripped)
        else:
            lines.append(None)  # 空单元格用None填充，方便后续分组

    # 关键修改：去掉第一个元素（sheet名，如ex01）
    if len(lines) > 0:
        lines = lines[1:]  # 切片：保留从索引1开始的所有元素，去掉索引0（第一个元素）
        print(f"已去掉sheet名，剩余元素数量：{len(lines)}")
    else:
        raise Exception("分割后无有效数据，无法解析")

    # 关键修改2：再去掉第一个元素（原始表头 no/s1/s2/s3/s4）
    if len(lines) > 5 and lines[0] == 'no':  # 确认第一个元素是表头，再删除
        lines = lines[5:]
    else:
        # 若没有原始表头（极端情况），直接继续（后续用我们的固定表头）
        pass
    # 3. 核心：按每行5列重新分组（关键逻辑）
    header = ['no', 's1', 's2', 's3', 's4']  # 固定表头
    data_rows = []  # 存储分组后的每行数据

    # 计算需要分多少组（向上取整，避免遗漏最后几个元素）
    total_cells = len(lines[1:])
    total_rows = (total_cells + 4) // 5  # 向上取整（比如11个元素→3行：5+5+1）

    # 按5个元素一组拆分，不足5列的补None
    for i in range(total_rows):
        # 截取当前行的5个元素（左闭右开区间）
        start_idx = i * 5
        end_idx = start_idx + 5
        row = lines[start_idx:end_idx]
        # 不足5列的补None（确保每行都是5列）
        while len(row) < 5:
            row.append(None)
        data_rows.append(row)

    # 4. 生成DataFrame，清理无效数据
    df = pd.DataFrame(data_rows, columns=header)
    # 清理：去掉no列为空的行、s1-s4全为空的行
    df = df.dropna(subset=['no'], how='any')
    df = df.dropna(subset=['s1', 's2', 's3', 's4'], how='all')

    return df

# ================== 完美兼容版界面（老版本也能居中 + 控制高度）==================
with gr.Blocks(theme=gr.themes.Soft(), title="EGISInsight") as demo:
    gr.Markdown("# GIS实验报告智能分析系统")
    gr.Markdown("**上传学生反馈 Excel → 1秒生成教学决策图 + AI教案**")

    file_input = gr.File(label="上传 ex02.xlsx（需含 s1-s4 列）", file_types=[".xlsx"])

    # 标题 + 桑基图（完美居中 + 固定高度）
    gr.HTML('<h2 style="text-align:center; color:#1976d2; margin:40px 0 10px;">1. 学生反馈流向分析（交互桑基图）</h2>')
    sankey_plot = gr.Plot(elem_id="sankey-plot")          # 去掉 height，加 elem_id

    # 标题 + 聚类图
    gr.HTML('<h2 style="text-align:center; color:#388e3c; margin:50px 0 10px;">2. 学生反馈主题聚类可视化</h2>')
    cluster_plot = gr.Plot(elem_id="cluster-plot")        # 去掉 height，加 elem_id

    # 文字报告
    html_report = gr.HTML()

    # 隐藏统计
    stats_json = gr.JSON(visible=False)

    # 关键：加一段 CSS 强制高度和居中
    demo.load(
        None,
        None,
        None,
        js="""
        () => {
            const style = document.createElement('style');
            style.innerHTML = `
                #sankey-plot, #cluster-plot {
                    height: 560px !important;
                    width: 100% !important;
                    max-width: 1100px !important;
                    margin: 0 auto !important;
                    display: block !important;
                }
                #sankey-plot > div, #cluster-plot > div {
                    height: 100% !important;
                }
            `;
            document.head.appendChild(style);
        }
        """
    )

    file_input.change(
        fn=analyze_report,
        inputs=file_input,
        outputs=[html_report, sankey_plot, cluster_plot, stats_json]
    )

    gr.Markdown("---")
    gr.Markdown("<p style='text-align:center; color:#666;'>通义千问大模型实时生成教学优化方案</p>")

# if __name__ == "__main__":
#    demo.launch(server_name="0.0.0.0", share=True)

# ================== 大赛平台专用 API（最终修正版）==================
from fastapi import FastAPI, File, UploadFile, Form
import uvicorn
from io import BytesIO

app = FastAPI()

@app.post("/api/plugin")
async def plugin_api(
    file: str = Form(...),
    token: str = Form(...),
    timestamp: str = Form(...),
    signature: str = Form(...)
):
    try:
        # print(type(file), file)
        df = parse_text_to_df(file)
        print(f"解析出 {len(df)} 条有效反馈")

        # 关键：创建本地临时Excel文件（自动生成真实路径，用完删除）
        with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as temp_file:
            df.to_excel(temp_file, index=False)  # 写入临时文件
            temp_file_path = temp_file.name  # 拿到临时文件的真实路径（比如：C:\Users\XXX\AppData\Local\Temp\tmpxxxx.xlsx）
        print(df)
        # 调用分析函数（传临时文件的真实路径）
        html_report, sankey_fig, cluster_fig, stats, sb64, cb64 = analyze_report(temp_file_path)

        print(f"{sankey_fig}生成成功！")
        print(f"{cluster_fig}生成成功！")
        
        # 用完删除临时文件（避免占用空间）
        os.remove(temp_file_path)
        print(f"临时文件已删除：{temp_file_path}")

        # 按平台要求返回结果
        # 按平台要求返回结果（用 sb64/cb64 替换 to_json()）
        print(f"{sb64}生成成功！")
        print(f"{cb64}生成成功！")
        return {
            "code": 200,
            "message": "success",
            "data": {
                "html_report": html_report if html_report else "",
                # 直接用已有的base64字符串，不用再转json！
                "sankey_image": sb64 if sb64 else "",  # 键名改成sankey_image（符合平台图片命名规范）
                "cluster_image": cb64 if cb64 else "",  # 键名改成cluster_image
                "statistics": stats if stats else []  # 格式化后的统计数据
            }
        }
    except Exception as e:
        error_msg = f"分析失败：{str(e)}"
        print(error_msg)
        return {
            "code": 500,
            "message": error_msg,
            "data": {
                "html_report": "",
                "sankey_fig": "",
                "cluster_fig": "",
                "stats": []
            }
        }

# 保留原界面
app = gr.mount_gradio_app(app, demo, path="/")

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)