Spaces:
Running
Running
Upload 8 files
Browse files- .gitattributes +1 -0
- figures/alpha_diff_target.svg +2554 -0
- figures/entropy_diff_target.svg +2981 -0
- figures/entropy_quadrant.svg +0 -0
- figures/exp_entropy.svg +0 -0
- figures/exp_reward.svg +0 -0
- figures/exp_val.svg +1797 -0
- figures/overview.png +3 -0
- index.html +573 -19
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
figures/overview.png filter=lfs diff=lfs merge=lfs -text
|
figures/alpha_diff_target.svg
ADDED
|
|
figures/entropy_diff_target.svg
ADDED
|
|
figures/entropy_quadrant.svg
ADDED
|
|
figures/exp_entropy.svg
ADDED
|
|
figures/exp_reward.svg
ADDED
|
|
figures/exp_val.svg
ADDED
|
|
figures/overview.png
ADDED
|
Git LFS Details
|
index.html
CHANGED
|
@@ -1,19 +1,573 @@
|
|
| 1 |
-
<!
|
| 2 |
-
<html>
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>EntroPIC: Stable Long-Term Training of LLMs</title>
|
| 7 |
+
|
| 8 |
+
<!-- Tailwind CSS -->
|
| 9 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
| 10 |
+
|
| 11 |
+
<!-- MathJax -->
|
| 12 |
+
<script>
|
| 13 |
+
MathJax = {
|
| 14 |
+
tex: {
|
| 15 |
+
inlineMath: [['$', '$'], ['\\(', '\\)']],
|
| 16 |
+
displayMath: [['$$', '$$'], ['\\[', '\\]']],
|
| 17 |
+
processEscapes: true
|
| 18 |
+
},
|
| 19 |
+
options: {
|
| 20 |
+
ignoreHtmlClass: 'tex2jax_ignore',
|
| 21 |
+
processHtmlClass: 'tex2jax_process'
|
| 22 |
+
}
|
| 23 |
+
};
|
| 24 |
+
</script>
|
| 25 |
+
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
|
| 26 |
+
|
| 27 |
+
<!-- Plotly.js -->
|
| 28 |
+
<script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
|
| 29 |
+
|
| 30 |
+
<!-- Fonts & Icons -->
|
| 31 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
|
| 32 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
|
| 33 |
+
|
| 34 |
+
<script>
|
| 35 |
+
tailwind.config = {
|
| 36 |
+
theme: {
|
| 37 |
+
extend: {
|
| 38 |
+
fontFamily: {
|
| 39 |
+
sans: ['Inter', 'sans-serif'],
|
| 40 |
+
mono: ['JetBrains Mono', 'monospace'],
|
| 41 |
+
},
|
| 42 |
+
colors: {
|
| 43 |
+
tencent: '#0052D9',
|
| 44 |
+
primary: '#2c3e50',
|
| 45 |
+
}
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
</script>
|
| 50 |
+
<style>
|
| 51 |
+
body {
|
| 52 |
+
/* 柔和的背景纹理 */
|
| 53 |
+
background-color: #f8fafc;
|
| 54 |
+
background-image: radial-gradient(#e2e8f0 1px, transparent 1px);
|
| 55 |
+
background-size: 24px 24px;
|
| 56 |
+
}
|
| 57 |
+
.gradient-text {
|
| 58 |
+
background: linear-gradient(135deg, #0052D9 0%, #0ea5e9 100%);
|
| 59 |
+
-webkit-background-clip: text;
|
| 60 |
+
-webkit-text-fill-color: transparent;
|
| 61 |
+
}
|
| 62 |
+
/* 标题强调部分的高亮色 */
|
| 63 |
+
.highlight-acronym {
|
| 64 |
+
color: #0052D9;
|
| 65 |
+
font-weight: 700;
|
| 66 |
+
}
|
| 67 |
+
.math-scroll {
|
| 68 |
+
overflow-x: auto;
|
| 69 |
+
overflow-y: hidden;
|
| 70 |
+
max-width: 100%;
|
| 71 |
+
padding: 8px 0;
|
| 72 |
+
}
|
| 73 |
+
.math-scroll::-webkit-scrollbar {
|
| 74 |
+
height: 6px;
|
| 75 |
+
}
|
| 76 |
+
.math-scroll::-webkit-scrollbar-thumb {
|
| 77 |
+
background: #cbd5e1;
|
| 78 |
+
border-radius: 3px;
|
| 79 |
+
}
|
| 80 |
+
.reflection-highlight {
|
| 81 |
+
background-color: #f0fdf4; border-left: 4px solid #22c55e; padding: 0.75rem; margin: 0.5rem 0; border-radius: 0 0.5rem 0.5rem 0;
|
| 82 |
+
}
|
| 83 |
+
.correction-highlight {
|
| 84 |
+
background-color: #f5f3ff; border-left: 4px solid #8b5cf6; padding: 0.75rem; margin: 0.5rem 0; border-radius: 0 0.5rem 0.5rem 0;
|
| 85 |
+
}
|
| 86 |
+
/* 作者链接样式 */
|
| 87 |
+
.author-link {
|
| 88 |
+
color: #0052D9;
|
| 89 |
+
text-decoration: none;
|
| 90 |
+
transition: color 0.2s;
|
| 91 |
+
}
|
| 92 |
+
.author-link:hover {
|
| 93 |
+
color: #00308F;
|
| 94 |
+
text-decoration: underline;
|
| 95 |
+
}
|
| 96 |
+
</style>
|
| 97 |
+
</head>
|
| 98 |
+
<body class="text-slate-800 antialiased">
|
| 99 |
+
|
| 100 |
+
<!-- Decorative Background Blur -->
|
| 101 |
+
<div class="fixed top-0 left-0 right-0 h-96 bg-gradient-to-b from-blue-50/80 to-transparent -z-10 pointer-events-none"></div>
|
| 102 |
+
|
| 103 |
+
<!-- Header -->
|
| 104 |
+
<header class="pt-24 pb-12">
|
| 105 |
+
<div class="max-w-6xl mx-auto px-4 text-center">
|
| 106 |
+
<!-- Title -->
|
| 107 |
+
<h1 class="text-4xl md:text-5xl lg:text-6xl font-bold tracking-tight mb-6 leading-tight">
|
| 108 |
+
<span class="gradient-text">EntroPIC</span>: Towards Stable Long-Term Training of LLMs via
|
| 109 |
+
<span class="highlight-acronym">Entro</span>py Stabilization with <br class="hidden lg:block">
|
| 110 |
+
<span class="highlight-acronym">P</span>roportional-<span class="highlight-acronym">I</span>ntegral <span class="highlight-acronym">C</span>ontrol
|
| 111 |
+
</h1>
|
| 112 |
+
|
| 113 |
+
<!-- Authors -->
|
| 114 |
+
<div class="text-lg mb-6 text-slate-700 font-medium leading-relaxed">
|
| 115 |
+
<div class="flex flex-wrap justify-center items-center gap-x-4 gap-y-2">
|
| 116 |
+
<span><a href="https://yk7333.github.io/" target="_blank" class="author-link">Kai Yang</a><sup>1</sup>,</span>
|
| 117 |
+
<span><a href="https://xinxu-ustc.github.io/" target="_blank" class="author-link">Xin Xu</a><sup>1,2</sup>,</span>
|
| 118 |
+
<span><a href="" target="_blank" class="author-link">Yangkun Chen</a><sup>1</sup>,</span>
|
| 119 |
+
<span><a href="https://github.com/autoliuweijie" target="_blank" class="author-link">Weijie Liu</a><sup>1</sup>,</span>
|
| 120 |
+
<span><a href="https://dmksjfl.github.io/" target="_blank" class="author-link">Jiafei Lyu</a><sup>1</sup>,</span><br class="hidden md:block">
|
| 121 |
+
<span><a href="" target="_blank" class="author-link">Zichuan Lin</a><sup>1</sup>,</span>
|
| 122 |
+
<span><a href="" target="_blank" class="author-link">Deheng Ye</a><sup>1</sup>,</span>
|
| 123 |
+
<span><a href="https://github.com/yangsaiyong" target="_blank" class="author-link">Saiyong Yang</a><sup>1†</sup></span>
|
| 124 |
+
</div>
|
| 125 |
+
<div class="mt-4 text-base text-slate-500">
|
| 126 |
+
<span class="mr-4">Tencent Hunyuan</span>
|
| 127 |
+
<span>HKUST</span>
|
| 128 |
+
</div>
|
| 129 |
+
<div class="mt-1 text-xs text-slate-400">† Corresponding Author</div>
|
| 130 |
+
</div>
|
| 131 |
+
|
| 132 |
+
<!-- Action Buttons -->
|
| 133 |
+
<div class="flex justify-center gap-4 mt-8">
|
| 134 |
+
<a href="#" class="flex items-center gap-2 px-8 py-3 bg-slate-900 text-white rounded-full hover:bg-slate-800 transition shadow-xl hover:shadow-2xl transform hover:-translate-y-0.5 duration-200">
|
| 135 |
+
<i class="fas fa-file-pdf"></i> Paper
|
| 136 |
+
</a>
|
| 137 |
+
<a href="#" class="flex items-center gap-2 px-8 py-3 bg-white border border-slate-200 text-slate-800 rounded-full hover:bg-slate-50 transition shadow-md hover:shadow-lg transform hover:-translate-y-0.5 duration-200">
|
| 138 |
+
<i class="fab fa-github text-xl"></i> Code
|
| 139 |
+
</a>
|
| 140 |
+
</div>
|
| 141 |
+
</div>
|
| 142 |
+
</header>
|
| 143 |
+
|
| 144 |
+
<main class="max-w-6xl mx-auto px-4 py-10 space-y-24">
|
| 145 |
+
|
| 146 |
+
<!-- Abstract -->
|
| 147 |
+
<section>
|
| 148 |
+
<div class="bg-white/80 backdrop-blur-sm p-8 md:p-10 rounded-3xl shadow-lg border border-white/50 text-justify text-slate-600 leading-relaxed">
|
| 149 |
+
<h2 class="text-2xl font-bold mb-4 text-slate-800">Abstract</h2>
|
| 150 |
+
Long-term training of large language models (LLMs) requires maintaining stable exploration to prevent the model from collapsing into sub-optimal behaviors. Entropy is crucial in this context. However, existing RL methods struggle to maintain an appropriate level of entropy as positive samples reduce it while negative samples increase it.
|
| 151 |
+
<br><br>
|
| 152 |
+
We propose <strong>EntroPIC</strong>, a novel method that uses Proportional-Integral (PI) control to adaptively adjust the loss coefficients of positive and negative samples. This stabilizes entropy throughout training, ensuring efficient exploration and steady progress.
|
| 153 |
+
</div>
|
| 154 |
+
|
| 155 |
+
<div class="mt-10 transform hover:scale-[1.01] transition duration-500">
|
| 156 |
+
<img src="figures/overview.svg" alt="EntroPIC Overview" class="w-full rounded-2xl shadow-xl border border-slate-200/60" onerror="this.src='https://placehold.co/1200x400?text=Overview+Figure+(Ensure+figures/overview.png+exists)'">
|
| 157 |
+
<p class="text-center text-sm text-slate-400 mt-3">Figure 1: EntroPIC uses PI control to dynamically adjust sample weights based on entropy error.</p>
|
| 158 |
+
</div>
|
| 159 |
+
</section>
|
| 160 |
+
|
| 161 |
+
<!-- Method Section -->
|
| 162 |
+
<section id="method">
|
| 163 |
+
<h2 class="text-3xl font-bold mb-12 text-center text-slate-800">Methodology</h2>
|
| 164 |
+
|
| 165 |
+
<!-- 1. Quadrant Analysis -->
|
| 166 |
+
<div class="mb-16 bg-white rounded-3xl shadow-sm border border-slate-100 p-8">
|
| 167 |
+
<div class="grid grid-cols-1 md:grid-cols-12 gap-12 items-center">
|
| 168 |
+
<div class="md:col-span-5">
|
| 169 |
+
<h3 class="text-xl font-bold mb-4 text-slate-800 flex items-center gap-2">
|
| 170 |
+
<span class="w-8 h-8 bg-blue-100 text-blue-600 rounded-lg flex items-center justify-center text-sm">01</span>
|
| 171 |
+
High-Probability Tokens Matter
|
| 172 |
+
</h3>
|
| 173 |
+
<p class="text-slate-600 leading-relaxed mb-6">
|
| 174 |
+
Not all tokens affect entropy equally. Our analysis reveals distinct impacts based on token probability and advantage. EntroPIC focuses control where it matters most:
|
| 175 |
+
</p>
|
| 176 |
+
<ul class="space-y-3">
|
| 177 |
+
<li class="flex items-start gap-3 p-3 bg-green-50 rounded-xl border border-green-100">
|
| 178 |
+
<i class="fas fa-arrow-trend-down text-green-600 mt-1"></i>
|
| 179 |
+
<span class="text-sm text-slate-700"><strong class="text-green-700">Positive High-Prob:</strong> Standard RL drops entropy too fast. We reduce their weight to maintain exploration.</span>
|
| 180 |
+
</li>
|
| 181 |
+
<li class="flex items-start gap-3 p-3 bg-red-50 rounded-xl border border-red-100">
|
| 182 |
+
<i class="fas fa-shield-alt text-red-500 mt-1"></i>
|
| 183 |
+
<span class="text-sm text-slate-700"><strong class="text-red-700">Negative Low-Prob:</strong> We avoid suppressing rare explorations to prevent degradation.</span>
|
| 184 |
+
</li>
|
| 185 |
+
</ul>
|
| 186 |
+
</div>
|
| 187 |
+
<div class="md:col-span-7">
|
| 188 |
+
<div class="rounded-xl overflow-hidden">
|
| 189 |
+
<img src="figures/entropy_quadrant.svg" alt="Entropy Quadrant Analysis" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Entropy+Quadrant+Analysis'">
|
| 190 |
+
</div>
|
| 191 |
+
</div>
|
| 192 |
+
</div>
|
| 193 |
+
</div>
|
| 194 |
+
|
| 195 |
+
<!-- 2. PI Control Dynamics -->
|
| 196 |
+
<div class="bg-white rounded-3xl shadow-sm border border-slate-100 p-8">
|
| 197 |
+
<div class="text-center max-w-3xl mx-auto mb-10">
|
| 198 |
+
<h3 class="text-xl font-bold mb-4 flex items-center justify-center gap-2">
|
| 199 |
+
<span class="w-8 h-8 bg-blue-100 text-blue-600 rounded-lg flex items-center justify-center text-sm">02</span>
|
| 200 |
+
Precise Control at Any Target
|
| 201 |
+
</h3>
|
| 202 |
+
<p class="text-slate-600">
|
| 203 |
+
Unlike static coefficients, EntroPIC's PI controller dynamically adjusts $\alpha$ to lock entropy to <strong>any arbitrary target value</strong>.
|
| 204 |
+
</p>
|
| 205 |
+
</div>
|
| 206 |
+
<div class="grid grid-cols-1 md:grid-cols-2 gap-8">
|
| 207 |
+
<div class="bg-slate-50 p-4 rounded-2xl border border-slate-100">
|
| 208 |
+
<img src="figures/entropy_diff_target.svg" alt="Entropy at Different Targets" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Entropy+Convergence'">
|
| 209 |
+
<p class="text-center text-sm font-bold text-slate-700 mt-3">Entropy Convergence</p>
|
| 210 |
+
</div>
|
| 211 |
+
<div class="bg-slate-50 p-4 rounded-2xl border border-slate-100">
|
| 212 |
+
<img src="figures/alpha_diff_target.svg" alt="Alpha Dynamics" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Alpha+Coefficient+Dynamics'">
|
| 213 |
+
<p class="text-center text-sm font-bold text-slate-700 mt-3">Adaptive Coefficient ($\alpha$)</p>
|
| 214 |
+
</div>
|
| 215 |
+
</div>
|
| 216 |
+
</div>
|
| 217 |
+
</section>
|
| 218 |
+
|
| 219 |
+
<!-- Experimental Results -->
|
| 220 |
+
<section id="results">
|
| 221 |
+
<h2 class="text-3xl font-bold mb-12 text-center text-slate-800">Experimental Results</h2>
|
| 222 |
+
|
| 223 |
+
<!-- Training Curves -->
|
| 224 |
+
<div class="grid grid-cols-1 md:grid-cols-3 gap-6 mb-12">
|
| 225 |
+
<div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition">
|
| 226 |
+
<img src="figures/exp_entropy.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Entropy+Curve'">
|
| 227 |
+
<p class="text-center text-sm font-bold mt-3 text-slate-600">Entropy Stability</p>
|
| 228 |
+
</div>
|
| 229 |
+
<div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition">
|
| 230 |
+
<img src="figures/exp_reward.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Reward+Curve'">
|
| 231 |
+
<p class="text-center text-sm font-bold mt-3 text-slate-600">Training Reward</p>
|
| 232 |
+
</div>
|
| 233 |
+
<div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition">
|
| 234 |
+
<img src="figures/exp_val.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Validation+Accuracy'">
|
| 235 |
+
<p class="text-center text-sm font-bold mt-3 text-slate-600">Validation Accuracy</p>
|
| 236 |
+
</div>
|
| 237 |
+
</div>
|
| 238 |
+
|
| 239 |
+
<!-- Main Results Bar Chart -->
|
| 240 |
+
<div class="bg-white p-8 rounded-3xl shadow-lg border border-slate-100 mb-12">
|
| 241 |
+
<div class="flex flex-col md:flex-row justify-between items-start md:items-end mb-8 gap-4">
|
| 242 |
+
<div>
|
| 243 |
+
<h3 class="text-2xl font-bold text-slate-800">Main Performance Comparison (On-Policy)</h3>
|
| 244 |
+
<p class="text-slate-500 mt-1">Comparing Pass@N rates across mathematical datasets. EntroPIC achieves SOTA.</p>
|
| 245 |
+
</div>
|
| 246 |
+
<div class="px-3 py-1 bg-blue-50 text-blue-700 rounded-lg text-sm font-medium border border-blue-100">
|
| 247 |
+
Metric: Pass@N
|
| 248 |
+
</div>
|
| 249 |
+
</div>
|
| 250 |
+
<div id="chart-main-results" class="w-full h-[500px]"></div>
|
| 251 |
+
</div>
|
| 252 |
+
|
| 253 |
+
<!-- Smaller Charts -->
|
| 254 |
+
<div class="grid grid-cols-1 md:grid-cols-2 gap-8">
|
| 255 |
+
<div class="bg-white p-6 rounded-3xl shadow-sm border border-slate-100">
|
| 256 |
+
<h3 class="font-bold text-slate-700 mb-6 text-center">Off-Policy Training</h3>
|
| 257 |
+
<div id="chart-off-policy" class="w-full h-72"></div>
|
| 258 |
+
</div>
|
| 259 |
+
<div class="bg-white p-6 rounded-3xl shadow-sm border border-slate-100">
|
| 260 |
+
<h3 class="font-bold text-slate-700 mb-6 text-center">High Temperature (T=1.0)</h3>
|
| 261 |
+
<div id="chart-temp" class="w-full h-72"></div>
|
| 262 |
+
</div>
|
| 263 |
+
</div>
|
| 264 |
+
</section>
|
| 265 |
+
|
| 266 |
+
<!-- Interactive Case Study -->
|
| 267 |
+
<section id="case-study" class="scroll-mt-24">
|
| 268 |
+
<div class="bg-slate-900 text-white rounded-3xl p-1 overflow-hidden shadow-2xl ring-1 ring-white/10">
|
| 269 |
+
<div class="p-10 text-center">
|
| 270 |
+
<h2 class="text-3xl font-bold mb-3">Case Study: Reasoning Dynamics</h2>
|
| 271 |
+
<p class="text-slate-400">Witness how high entropy enables <span class="text-green-400 font-semibold">Reflection</span> and <span class="text-purple-400 font-semibold">Self-Correction</span>.</p>
|
| 272 |
+
</div>
|
| 273 |
+
|
| 274 |
+
<div class="bg-white text-slate-800 rounded-2xl m-1 grid grid-cols-1 lg:grid-cols-12 overflow-hidden h-[800px]">
|
| 275 |
+
<!-- Sidebar -->
|
| 276 |
+
<div class="lg:col-span-4 bg-slate-50 border-r border-slate-200 p-6 flex flex-col gap-4 overflow-y-auto">
|
| 277 |
+
<div class="text-sm font-mono bg-white p-5 rounded-xl border border-slate-200 shadow-sm mb-4">
|
| 278 |
+
<strong class="text-slate-400 text-xs uppercase block mb-2 tracking-wider">Problem</strong>
|
| 279 |
+
Let
|
| 280 |
+
$$f(x)=\frac{(x-18)(x-72)(x-98)(x-k)}{x}.$$
|
| 281 |
+
There exist exactly three positive real values of $k$ such that $f$ has a minimum at exactly two real values of $x$. Find the sum of these three values of $k$.
|
| 282 |
+
</div>
|
| 283 |
+
|
| 284 |
+
<div class="space-y-4">
|
| 285 |
+
<button onclick="setCase('entropic')" id="btn-entropic" class="group w-full p-5 rounded-xl border-2 text-left transition-all shadow-md bg-white border-tencent transform scale-[1.02]">
|
| 286 |
+
<div class="font-bold flex justify-between items-center text-lg">
|
| 287 |
+
<span>EntroPIC</span>
|
| 288 |
+
<i class="fas fa-check-circle text-green-500 text-xl"></i>
|
| 289 |
+
</div>
|
| 290 |
+
<div class="text-xs font-bold text-green-600 mt-1 mb-3 uppercase tracking-wide">Correct Answer (240)</div>
|
| 291 |
+
<div class="text-xs text-slate-500 flex flex-wrap gap-2">
|
| 292 |
+
<span class="px-2 py-1 bg-green-100 rounded text-green-700 font-medium">Reflection</span>
|
| 293 |
+
<span class="px-2 py-1 bg-purple-100 rounded text-purple-700 font-medium">Self-Correction</span>
|
| 294 |
+
</div>
|
| 295 |
+
</button>
|
| 296 |
+
|
| 297 |
+
<button onclick="setCase('grpo')" id="btn-grpo" class="group w-full p-5 rounded-xl border-2 text-left transition-all bg-white border-transparent hover:border-slate-200 opacity-60 hover:opacity-100">
|
| 298 |
+
<div class="font-bold flex justify-between items-center text-lg">
|
| 299 |
+
<span>GRPO (Baseline)</span>
|
| 300 |
+
<i class="fas fa-times-circle text-red-400 text-xl"></i>
|
| 301 |
+
</div>
|
| 302 |
+
<div class="text-xs font-bold text-red-500 mt-1 mb-3 uppercase tracking-wide">Incorrect Answer (188)</div>
|
| 303 |
+
<div class="text-xs text-slate-400">
|
| 304 |
+
Linear reasoning, missing edge cases.
|
| 305 |
+
</div>
|
| 306 |
+
</button>
|
| 307 |
+
</div>
|
| 308 |
+
</div>
|
| 309 |
+
|
| 310 |
+
<!-- Content Area -->
|
| 311 |
+
<div class="lg:col-span-8 p-10 bg-white overflow-y-auto relative">
|
| 312 |
+
<div id="case-content" class="prose prose-slate prose-lg max-w-none pb-12">
|
| 313 |
+
<!-- JS Injected -->
|
| 314 |
+
</div>
|
| 315 |
+
<!-- Fade overlay -->
|
| 316 |
+
<div class="absolute bottom-0 left-0 w-full h-24 bg-gradient-to-t from-white to-transparent pointer-events-none"></div>
|
| 317 |
+
</div>
|
| 318 |
+
</div>
|
| 319 |
+
</div>
|
| 320 |
+
</section>
|
| 321 |
+
|
| 322 |
+
<!-- Citation -->
|
| 323 |
+
<section class="max-w-4xl mx-auto">
|
| 324 |
+
<div class="bg-slate-100 text-slate-700 p-8 rounded-3xl border border-slate-200/60">
|
| 325 |
+
<div class="flex justify-between items-center mb-4">
|
| 326 |
+
<h2 class="text-xl font-bold text-slate-900">Citation</h2>
|
| 327 |
+
<button onclick="navigator.clipboard.writeText(document.querySelector('pre').innerText); alert('Copied!')" class="text-xs bg-white border border-slate-300 px-3 py-1 rounded hover:bg-slate-50 transition">
|
| 328 |
+
Copy BibTeX
|
| 329 |
+
</button>
|
| 330 |
+
</div>
|
| 331 |
+
<pre class="font-mono text-xs md:text-sm overflow-x-auto p-4 bg-white border border-slate-200 rounded-xl text-slate-600 shadow-sm selection:bg-blue-100">
|
| 332 |
+
@article{yang2025entropic,
|
| 333 |
+
title={EntroPIC: Towards Stable Long-Term Training of LLMs via Entropy Stabilization with Proportional-Integral Control},
|
| 334 |
+
author={Yang, Kai and Xu, Xin and Chen, Yangkun and Liu, Weijie and Lyu, Jiafei and Lin, Zichuan and Ye, Deheng and Yang, Saiyong},
|
| 335 |
+
journal={arXiv preprint},
|
| 336 |
+
year={2025}
|
| 337 |
+
}</pre>
|
| 338 |
+
</div>
|
| 339 |
+
</section>
|
| 340 |
+
|
| 341 |
+
</main>
|
| 342 |
+
|
| 343 |
+
<footer class="bg-white border-t border-slate-200 py-12 mt-12">
|
| 344 |
+
<div class="max-w-6xl mx-auto px-4 text-center text-slate-500 text-sm">
|
| 345 |
+
<p class="mb-2">Project page based on EntroPIC.</p>
|
| 346 |
+
<p>© 2025 Tencent Hunyuan. All rights reserved.</p>
|
| 347 |
+
</div>
|
| 348 |
+
</footer>
|
| 349 |
+
|
| 350 |
+
<script>
|
| 351 |
+
// --- Data for Charts ---
|
| 352 |
+
|
| 353 |
+
// 1. Main Results (5 Models)
|
| 354 |
+
const datasets = ['Math', 'AMC', 'AIME24', 'AIME25', 'Olympic', 'Omni-math', 'Overall'];
|
| 355 |
+
|
| 356 |
+
const dataInitial = [97.0, 81.6, 60.0, 53.0, 68.7, 49.3, 68.3];
|
| 357 |
+
const dataGRPO = [97.4, 88.0, 70.0, 53.3, 72.7, 57.6, 73.2];
|
| 358 |
+
const dataNSR = [96.4, 89.2, 63.3, 46.7, 71.3, 56.2, 70.5];
|
| 359 |
+
const dataAEC = [97.8, 89.2, 73.3, 60.0, 72.5, 58.5, 75.2];
|
| 360 |
+
const dataEntroPIC =[97.2, 91.6, 76.7, 66.7, 71.3, 58.4, 77.0];
|
| 361 |
+
|
| 362 |
+
const traceInitial = { x: datasets, y: dataInitial, name: 'Initial', type: 'bar', marker: { color: '#e2e8f0' } };
|
| 363 |
+
const traceGRPO = { x: datasets, y: dataGRPO, name: 'GRPO', type: 'bar', marker: { color: '#94a3b8' } };
|
| 364 |
+
const traceNSR = { x: datasets, y: dataNSR, name: 'NSR', type: 'bar', marker: { color: '#facc15' } };
|
| 365 |
+
const traceAEC = { x: datasets, y: dataAEC, name: 'AEC', type: 'bar', marker: { color: '#4ade80' } };
|
| 366 |
+
const traceEntroPIC = { x: datasets, y: dataEntroPIC, name: 'EntroPIC', type: 'bar', marker: { color: '#0052D9' } };
|
| 367 |
+
|
| 368 |
+
const mainLayout = {
|
| 369 |
+
barmode: 'group',
|
| 370 |
+
bargap: 0.15,
|
| 371 |
+
bargroupgap: 0.05,
|
| 372 |
+
margin: { t: 20, b: 40, l: 50, r: 20 },
|
| 373 |
+
legend: { orientation: 'h', y: 1.1, x: 0.5, xanchor: 'center' },
|
| 374 |
+
yaxis: { title: 'Pass@N (%)', range: [40, 100], gridcolor: '#f1f5f9' },
|
| 375 |
+
xaxis: { title: '', tickfont: { size: 13 } },
|
| 376 |
+
plot_bgcolor: 'rgba(0,0,0,0)',
|
| 377 |
+
paper_bgcolor: 'rgba(0,0,0,0)'
|
| 378 |
+
};
|
| 379 |
+
|
| 380 |
+
// 2. Off-Policy
|
| 381 |
+
const offModels = ['GRPO', 'EntroPIC(P)', 'EntroPIC(PI)'];
|
| 382 |
+
const offAvg = [69.4, 72.2, 73.2];
|
| 383 |
+
|
| 384 |
+
const offTrace = {
|
| 385 |
+
x: offModels, y: offAvg, type: 'bar',
|
| 386 |
+
marker: { color: ['#cbd5e1', '#60a5fa', '#2563eb'] },
|
| 387 |
+
text: offAvg.map(String), textposition: 'auto'
|
| 388 |
+
};
|
| 389 |
+
|
| 390 |
+
// 3. High Temp
|
| 391 |
+
const tempModels = ['GRPO (T=1)', 'EntroPIC (T=1)'];
|
| 392 |
+
const tempAvg = [71.0, 74.7];
|
| 393 |
+
|
| 394 |
+
const tempTrace = {
|
| 395 |
+
x: tempModels, y: tempAvg, type: 'bar',
|
| 396 |
+
marker: { color: ['#f87171', '#10b981'] },
|
| 397 |
+
width: [0.4, 0.4],
|
| 398 |
+
text: tempAvg.map(String), textposition: 'auto'
|
| 399 |
+
};
|
| 400 |
+
|
| 401 |
+
const subLayout = {
|
| 402 |
+
margin: { t: 10, b: 30, l: 40, r: 10 },
|
| 403 |
+
yaxis: { range: [60, 80], title: 'Pass@N (%)', gridcolor: '#f1f5f9' },
|
| 404 |
+
plot_bgcolor: 'rgba(0,0,0,0)',
|
| 405 |
+
paper_bgcolor: 'rgba(0,0,0,0)'
|
| 406 |
+
};
|
| 407 |
+
|
| 408 |
+
// --- Case Study Content ---
|
| 409 |
+
const caseData = {
|
| 410 |
+
grpo: {
|
| 411 |
+
btnId: 'btn-grpo',
|
| 412 |
+
content: `
|
| 413 |
+
<h3 class="font-bold text-2xl mb-6 text-slate-900 border-b pb-2">GRPO Response</h3>
|
| 414 |
+
<div class="space-y-8 text-slate-600">
|
| 415 |
+
<div>
|
| 416 |
+
<p class="mb-2">Alright, I have this problem:</p>
|
| 417 |
+
<div class="bg-slate-50 p-4 rounded-lg border border-slate-200">
|
| 418 |
+
<strong class="text-slate-800 block mb-2">Problem Statement:</strong>
|
| 419 |
+
Let $$f(x) = \\frac{(x-18)(x-72)(x-98)(x-k)}{x}$$
|
| 420 |
+
There exist exactly three positive real values of $k$ such that $f$ has a minimum at exactly two real values of $x$. Find the sum of these three values of $k$.
|
| 421 |
+
</div>
|
| 422 |
+
</div>
|
| 423 |
+
|
| 424 |
+
<div>
|
| 425 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 1: Critical Points</h4>
|
| 426 |
+
<p>Let $N(x) = (x-18)(x-72)(x-98)(x-k)$. Derivative:</p>
|
| 427 |
+
<div class="math-scroll">$$f'(x) = \\frac{N'(x) \\cdot x - N(x)}{x^2}$$</div>
|
| 428 |
+
<p>Set numerator to zero: $N'(x) \\cdot x = N(x)$.</p>
|
| 429 |
+
</div>
|
| 430 |
+
|
| 431 |
+
<div>
|
| 432 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 2: Roots of N(x)</h4>
|
| 433 |
+
<p>If $x$ is a root of $N(x)$ (i.e., $x \\in \\{18,72,98,k\\}$), then $N(x) = 0$. Equation reduces to $N'(x) \\cdot x = 0 \\implies N'(x) = 0$.</p>
|
| 434 |
+
<ul class="list-disc list-inside ml-4 font-mono text-sm bg-slate-50 p-4 rounded border border-slate-100">
|
| 435 |
+
<li>$x = 18 \\implies k = 18$</li>
|
| 436 |
+
<li>$x = 72 \\implies k = 72$</li>
|
| 437 |
+
<li>$x = 98 \\implies k = 98$</li>
|
| 438 |
+
</ul>
|
| 439 |
+
<p class="text-sm italic text-slate-400 mt-2">Note: The model assumes these are valid candidates without verifying the "exactly two minima" condition.</p>
|
| 440 |
+
</div>
|
| 441 |
+
|
| 442 |
+
<div>
|
| 443 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 3: Alternative Form</h4>
|
| 444 |
+
<p>Let $g(x) = (x-18)(x-72)(x-98)$:</p>
|
| 445 |
+
<div class="math-scroll">$$f(x) = g(x) \\left(1 - \\frac{k}{x}\\right)$$</div>
|
| 446 |
+
<p>Critical points satisfy:</p>
|
| 447 |
+
<div class="math-scroll">$$g'(x) \\left(1 - \\frac{k}{x}\\right) + g(x) \\frac{k}{x^2} = 0 \\quad \\Rightarrow \\quad g'(x)x(x-k) + g(x)k = 0$$</div>
|
| 448 |
+
</div>
|
| 449 |
+
|
| 450 |
+
<div>
|
| 451 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 4-5: Valid k Values</h4>
|
| 452 |
+
<p>The condition "exactly two minima" implies the derivative must have a double root. Observing the roots 18, 72, 98, the polynomial structure suggests the valid $k$ are related to these numbers.</p>
|
| 453 |
+
<p>Through analysis:</p>
|
| 454 |
+
<div class="math-scroll">$$k_1 = 18, \\quad k_2 = 72, \\quad k_3 = 98$$</div>
|
| 455 |
+
</div>
|
| 456 |
+
|
| 457 |
+
<div>
|
| 458 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 6: Sum of k</h4>
|
| 459 |
+
<div class="math-scroll text-xl">$$18 + 72 + 98 = 188$$</div>
|
| 460 |
+
</div>
|
| 461 |
+
|
| 462 |
+
<div class="p-6 bg-red-50 border border-red-200 rounded-xl text-center">
|
| 463 |
+
<p class="text-red-500 font-bold uppercase text-xs tracking-wider mb-2">Final Answer</p>
|
| 464 |
+
<span class="font-mono text-1xl font-bold text-red-700">188</span>
|
| 465 |
+
</div>
|
| 466 |
+
</div>`
|
| 467 |
+
},
|
| 468 |
+
entropic: {
|
| 469 |
+
btnId: 'btn-entropic',
|
| 470 |
+
content: `
|
| 471 |
+
<h3 class="font-bold text-2xl mb-6 text-slate-900 border-b pb-2">EntroPIC Response</h3>
|
| 472 |
+
<div class="space-y-8 text-slate-600">
|
| 473 |
+
<p>Alright, let's tackle this problem step by step.</p>
|
| 474 |
+
|
| 475 |
+
<div>
|
| 476 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Finding Critical Points $f'(x)=0$</h4>
|
| 477 |
+
<p>Let $N(x) = (x - 18)(x - 72)(x - 98)(x - k)$. The critical points are where $f'(x) = 0$. Using the quotient rule, we set the numerator to zero:</p>
|
| 478 |
+
<div class="math-scroll">$$N'(x) \\cdot x = N(x)$$</div>
|
| 479 |
+
</div>
|
| 480 |
+
|
| 481 |
+
<div>
|
| 482 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Checking Roots of N(x)</h4>
|
| 483 |
+
<p>If $x$ is a root of $N(x)$ (i.e., $x \\in \\{18, 72, 98, k\\}$), then $N(x) = 0$. The condition $N'(x) \\cdot x = N(x)$ becomes $N'(x) \\cdot x = 0$. Since $x \\neq 0$, we require $N'(x) = 0$.</p>
|
| 484 |
+
|
| 485 |
+
<!-- REFLECTION POINT -->
|
| 486 |
+
<div class="reflection-highlight shadow-sm">
|
| 487 |
+
<div class="flex items-center gap-2 mb-2">
|
| 488 |
+
<span class="w-2 h-2 bg-green-500 rounded-full"></span>
|
| 489 |
+
<p class="font-bold text-green-700 text-xs uppercase">Reflection</p>
|
| 490 |
+
</div>
|
| 491 |
+
<p class="text-slate-800 italic">Wait, so $x = 18$ is a critical point of $f$ if and only if $N'(18) = 0$. Let's compute $N'(x)$ and check:</p>
|
| 492 |
+
<div class="math-scroll text-slate-800 mt-2">$$N'(x) = \\frac{d}{dx} \\left[ (x-18)(x-72)(x-98)(x-k) \\right]$$</div>
|
| 493 |
+
<p class="text-sm mt-1">Set $x=18$: $N'(18) = (18-72)(18-98)(18-k)$. $N'(18)=0 \\implies k=18$. Similarly for $x=72$ and $x=98$.</p>
|
| 494 |
+
</div>
|
| 495 |
+
</div>
|
| 496 |
+
|
| 497 |
+
<div>
|
| 498 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Alternative Approach: Rewriting f(x)</h4>
|
| 499 |
+
<p>Alternatively, let $g(x) = (x-18)(x-72)(x-98)$.</p>
|
| 500 |
+
<div class="math-scroll">$$f(x) = g(x) \\left(1 - \\frac{k}{x}\\right)$$</div>
|
| 501 |
+
<p>The critical points satisfy $f'(x) = 0$, which rearranges to: $g'(x) x (x - k) + g(x) k = 0$.</p>
|
| 502 |
+
</div>
|
| 503 |
+
|
| 504 |
+
<div>
|
| 505 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Exploring Symmetry</h4>
|
| 506 |
+
<p>The condition "exactly two real values of $x$ where $f$ has a minimum" implies the quartic $N'(x) \\cdot x - N(x) = 0$ must have a specific root structure. This usually means that two critical points must coincide, leading to a double root.</p>
|
| 507 |
+
|
| 508 |
+
<!-- SELF-CORRECTION POINT -->
|
| 509 |
+
<div class="correction-highlight shadow-sm mt-4">
|
| 510 |
+
<div class="flex items-center gap-2 mb-2">
|
| 511 |
+
<span class="w-2 h-2 bg-purple-500 rounded-full"></span>
|
| 512 |
+
<p class="font-bold text-purple-700 text-xs uppercase">Self-Correction / Deep Check</p>
|
| 513 |
+
</div>
|
| 514 |
+
<p class="text-slate-800">Testing $k = 72$: If $k = 72$, $N(x)$ has a repeated root. This is a potential candidate.</p>
|
| 515 |
+
<p class="text-slate-800 mt-1">The analysis shows that for exactly two minima, the critical point equation must possess a double root for certain $k$.</p>
|
| 516 |
+
</div>
|
| 517 |
+
</div>
|
| 518 |
+
|
| 519 |
+
<div>
|
| 520 |
+
<h4 class="font-bold text-slate-900 text-lg mb-2">Correct Insight: Valid k Values</h4>
|
| 521 |
+
<p>Through systematic polynomial analysis, the three positive real values of $k$ are:</p>
|
| 522 |
+
<div class="math-scroll text-lg">$$k_1 = 46, \\quad k_2 = 72, \\quad k_3 = 122$$</div>
|
| 523 |
+
<p class="text-sm bg-slate-50 p-2 rounded mt-2 border border-slate-100">
|
| 524 |
+
These values are consistent with symmetry properties:
|
| 525 |
+
<br>• $k = 72$ is the middle root.
|
| 526 |
+
<br>• $k = 46$ and $k = 122$ are symmetric counterparts (average of 18 and 98 is 58; $72 - (72-46) = 46$).
|
| 527 |
+
</p>
|
| 528 |
+
</div>
|
| 529 |
+
|
| 530 |
+
<div>
|
| 531 |
+
<p class="font-bold text-lg">The sum of these three values of $k$ is:</p>
|
| 532 |
+
<div class="math-scroll text-xl">$$46 + 72 + 122 = 240$$</div>
|
| 533 |
+
|
| 534 |
+
<div class="p-6 bg-green-50 border border-green-200 rounded-xl text-center mt-4">
|
| 535 |
+
<p class="text-green-600 font-bold uppercase text-xs tracking-wider mb-2">Final Answer</p>
|
| 536 |
+
<span class="font-mono text-1xl font-bold text-green-700">240</span>
|
| 537 |
+
</div>
|
| 538 |
+
</div>
|
| 539 |
+
</div>`
|
| 540 |
+
}
|
| 541 |
+
};
|
| 542 |
+
|
| 543 |
+
function setCase(key) {
|
| 544 |
+
const data = caseData[key];
|
| 545 |
+
|
| 546 |
+
// Reset Buttons
|
| 547 |
+
document.getElementById('btn-entropic').className = "w-full p-5 rounded-xl border-2 text-left transition-all shadow-sm bg-white border-transparent hover:border-slate-200 opacity-60";
|
| 548 |
+
document.getElementById('btn-grpo').className = "w-full p-5 rounded-xl border-2 text-left transition-all shadow-sm bg-white border-transparent hover:border-slate-200 opacity-60";
|
| 549 |
+
|
| 550 |
+
// Activate Button
|
| 551 |
+
const activeBtn = document.getElementById(data.btnId);
|
| 552 |
+
activeBtn.className = "w-full p-5 rounded-xl border-2 text-left transition-all shadow-lg bg-white border-tencent transform scale-[1.02] opacity-100 ring-4 ring-blue-50";
|
| 553 |
+
|
| 554 |
+
// Inject Content
|
| 555 |
+
document.getElementById('case-content').innerHTML = data.content;
|
| 556 |
+
|
| 557 |
+
// Rerender Math
|
| 558 |
+
if(window.MathJax) {
|
| 559 |
+
MathJax.typesetPromise([document.getElementById('case-content')]);
|
| 560 |
+
}
|
| 561 |
+
}
|
| 562 |
+
|
| 563 |
+
// Init
|
| 564 |
+
document.addEventListener("DOMContentLoaded", function() {
|
| 565 |
+
Plotly.newPlot('chart-main-results', [traceInitial, traceGRPO, traceNSR, traceAEC, traceEntroPIC], mainLayout, {responsive: true, displayModeBar: false});
|
| 566 |
+
Plotly.newPlot('chart-off-policy', [offTrace], subLayout, {responsive: true, displayModeBar: false});
|
| 567 |
+
Plotly.newPlot('chart-temp', [tempTrace], subLayout, {responsive: true, displayModeBar: false});
|
| 568 |
+
|
| 569 |
+
setCase('entropic');
|
| 570 |
+
});
|
| 571 |
+
</script>
|
| 572 |
+
</body>
|
| 573 |
+
</html>
|