yangkaiSIGS commited on
Commit
ac10c68
·
verified ·
1 Parent(s): 55776ea

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ figures/overview.png filter=lfs diff=lfs merge=lfs -text
figures/alpha_diff_target.svg ADDED
figures/entropy_diff_target.svg ADDED
figures/entropy_quadrant.svg ADDED
figures/exp_entropy.svg ADDED
figures/exp_reward.svg ADDED
figures/exp_val.svg ADDED
figures/overview.png ADDED

Git LFS Details

  • SHA256: 3670e1d080106effbaa03b08b41d36b966c2bad48ecae32a838f37e8af72b0a1
  • Pointer size: 131 Bytes
  • Size of remote file: 321 kB
index.html CHANGED
@@ -1,19 +1,573 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>EntroPIC: Stable Long-Term Training of LLMs</title>
7
+
8
+ <!-- Tailwind CSS -->
9
+ <script src="https://cdn.tailwindcss.com"></script>
10
+
11
+ <!-- MathJax -->
12
+ <script>
13
+ MathJax = {
14
+ tex: {
15
+ inlineMath: [['$', '$'], ['\\(', '\\)']],
16
+ displayMath: [['$$', '$$'], ['\\[', '\\]']],
17
+ processEscapes: true
18
+ },
19
+ options: {
20
+ ignoreHtmlClass: 'tex2jax_ignore',
21
+ processHtmlClass: 'tex2jax_process'
22
+ }
23
+ };
24
+ </script>
25
+ <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
26
+
27
+ <!-- Plotly.js -->
28
+ <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
29
+
30
+ <!-- Fonts & Icons -->
31
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
32
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
33
+
34
+ <script>
35
+ tailwind.config = {
36
+ theme: {
37
+ extend: {
38
+ fontFamily: {
39
+ sans: ['Inter', 'sans-serif'],
40
+ mono: ['JetBrains Mono', 'monospace'],
41
+ },
42
+ colors: {
43
+ tencent: '#0052D9',
44
+ primary: '#2c3e50',
45
+ }
46
+ }
47
+ }
48
+ }
49
+ </script>
50
+ <style>
51
+ body {
52
+ /* 柔和的背景纹理 */
53
+ background-color: #f8fafc;
54
+ background-image: radial-gradient(#e2e8f0 1px, transparent 1px);
55
+ background-size: 24px 24px;
56
+ }
57
+ .gradient-text {
58
+ background: linear-gradient(135deg, #0052D9 0%, #0ea5e9 100%);
59
+ -webkit-background-clip: text;
60
+ -webkit-text-fill-color: transparent;
61
+ }
62
+ /* 标题强调部分的高亮色 */
63
+ .highlight-acronym {
64
+ color: #0052D9;
65
+ font-weight: 700;
66
+ }
67
+ .math-scroll {
68
+ overflow-x: auto;
69
+ overflow-y: hidden;
70
+ max-width: 100%;
71
+ padding: 8px 0;
72
+ }
73
+ .math-scroll::-webkit-scrollbar {
74
+ height: 6px;
75
+ }
76
+ .math-scroll::-webkit-scrollbar-thumb {
77
+ background: #cbd5e1;
78
+ border-radius: 3px;
79
+ }
80
+ .reflection-highlight {
81
+ background-color: #f0fdf4; border-left: 4px solid #22c55e; padding: 0.75rem; margin: 0.5rem 0; border-radius: 0 0.5rem 0.5rem 0;
82
+ }
83
+ .correction-highlight {
84
+ background-color: #f5f3ff; border-left: 4px solid #8b5cf6; padding: 0.75rem; margin: 0.5rem 0; border-radius: 0 0.5rem 0.5rem 0;
85
+ }
86
+ /* 作者链接样式 */
87
+ .author-link {
88
+ color: #0052D9;
89
+ text-decoration: none;
90
+ transition: color 0.2s;
91
+ }
92
+ .author-link:hover {
93
+ color: #00308F;
94
+ text-decoration: underline;
95
+ }
96
+ </style>
97
+ </head>
98
+ <body class="text-slate-800 antialiased">
99
+
100
+ <!-- Decorative Background Blur -->
101
+ <div class="fixed top-0 left-0 right-0 h-96 bg-gradient-to-b from-blue-50/80 to-transparent -z-10 pointer-events-none"></div>
102
+
103
+ <!-- Header -->
104
+ <header class="pt-24 pb-12">
105
+ <div class="max-w-6xl mx-auto px-4 text-center">
106
+ <!-- Title -->
107
+ <h1 class="text-4xl md:text-5xl lg:text-6xl font-bold tracking-tight mb-6 leading-tight">
108
+ <span class="gradient-text">EntroPIC</span>: Towards Stable Long-Term Training of LLMs via
109
+ <span class="highlight-acronym">Entro</span>py Stabilization with <br class="hidden lg:block">
110
+ <span class="highlight-acronym">P</span>roportional-<span class="highlight-acronym">I</span>ntegral <span class="highlight-acronym">C</span>ontrol
111
+ </h1>
112
+
113
+ <!-- Authors -->
114
+ <div class="text-lg mb-6 text-slate-700 font-medium leading-relaxed">
115
+ <div class="flex flex-wrap justify-center items-center gap-x-4 gap-y-2">
116
+ <span><a href="https://yk7333.github.io/" target="_blank" class="author-link">Kai Yang</a><sup>1</sup>,</span>
117
+ <span><a href="https://xinxu-ustc.github.io/" target="_blank" class="author-link">Xin Xu</a><sup>1,2</sup>,</span>
118
+ <span><a href="" target="_blank" class="author-link">Yangkun Chen</a><sup>1</sup>,</span>
119
+ <span><a href="https://github.com/autoliuweijie" target="_blank" class="author-link">Weijie Liu</a><sup>1</sup>,</span>
120
+ <span><a href="https://dmksjfl.github.io/" target="_blank" class="author-link">Jiafei Lyu</a><sup>1</sup>,</span><br class="hidden md:block">
121
+ <span><a href="" target="_blank" class="author-link">Zichuan Lin</a><sup>1</sup>,</span>
122
+ <span><a href="" target="_blank" class="author-link">Deheng Ye</a><sup>1</sup>,</span>
123
+ <span><a href="https://github.com/yangsaiyong" target="_blank" class="author-link">Saiyong Yang</a><sup>1†</sup></span>
124
+ </div>
125
+ <div class="mt-4 text-base text-slate-500">
126
+ <span class="mr-4">Tencent Hunyuan</span>
127
+ <span>HKUST</span>
128
+ </div>
129
+ <div class="mt-1 text-xs text-slate-400">† Corresponding Author</div>
130
+ </div>
131
+
132
+ <!-- Action Buttons -->
133
+ <div class="flex justify-center gap-4 mt-8">
134
+ <a href="#" class="flex items-center gap-2 px-8 py-3 bg-slate-900 text-white rounded-full hover:bg-slate-800 transition shadow-xl hover:shadow-2xl transform hover:-translate-y-0.5 duration-200">
135
+ <i class="fas fa-file-pdf"></i> Paper
136
+ </a>
137
+ <a href="#" class="flex items-center gap-2 px-8 py-3 bg-white border border-slate-200 text-slate-800 rounded-full hover:bg-slate-50 transition shadow-md hover:shadow-lg transform hover:-translate-y-0.5 duration-200">
138
+ <i class="fab fa-github text-xl"></i> Code
139
+ </a>
140
+ </div>
141
+ </div>
142
+ </header>
143
+
144
+ <main class="max-w-6xl mx-auto px-4 py-10 space-y-24">
145
+
146
+ <!-- Abstract -->
147
+ <section>
148
+ <div class="bg-white/80 backdrop-blur-sm p-8 md:p-10 rounded-3xl shadow-lg border border-white/50 text-justify text-slate-600 leading-relaxed">
149
+ <h2 class="text-2xl font-bold mb-4 text-slate-800">Abstract</h2>
150
+ Long-term training of large language models (LLMs) requires maintaining stable exploration to prevent the model from collapsing into sub-optimal behaviors. Entropy is crucial in this context. However, existing RL methods struggle to maintain an appropriate level of entropy as positive samples reduce it while negative samples increase it.
151
+ <br><br>
152
+ We propose <strong>EntroPIC</strong>, a novel method that uses Proportional-Integral (PI) control to adaptively adjust the loss coefficients of positive and negative samples. This stabilizes entropy throughout training, ensuring efficient exploration and steady progress.
153
+ </div>
154
+
155
+ <div class="mt-10 transform hover:scale-[1.01] transition duration-500">
156
+ <img src="figures/overview.svg" alt="EntroPIC Overview" class="w-full rounded-2xl shadow-xl border border-slate-200/60" onerror="this.src='https://placehold.co/1200x400?text=Overview+Figure+(Ensure+figures/overview.png+exists)'">
157
+ <p class="text-center text-sm text-slate-400 mt-3">Figure 1: EntroPIC uses PI control to dynamically adjust sample weights based on entropy error.</p>
158
+ </div>
159
+ </section>
160
+
161
+ <!-- Method Section -->
162
+ <section id="method">
163
+ <h2 class="text-3xl font-bold mb-12 text-center text-slate-800">Methodology</h2>
164
+
165
+ <!-- 1. Quadrant Analysis -->
166
+ <div class="mb-16 bg-white rounded-3xl shadow-sm border border-slate-100 p-8">
167
+ <div class="grid grid-cols-1 md:grid-cols-12 gap-12 items-center">
168
+ <div class="md:col-span-5">
169
+ <h3 class="text-xl font-bold mb-4 text-slate-800 flex items-center gap-2">
170
+ <span class="w-8 h-8 bg-blue-100 text-blue-600 rounded-lg flex items-center justify-center text-sm">01</span>
171
+ High-Probability Tokens Matter
172
+ </h3>
173
+ <p class="text-slate-600 leading-relaxed mb-6">
174
+ Not all tokens affect entropy equally. Our analysis reveals distinct impacts based on token probability and advantage. EntroPIC focuses control where it matters most:
175
+ </p>
176
+ <ul class="space-y-3">
177
+ <li class="flex items-start gap-3 p-3 bg-green-50 rounded-xl border border-green-100">
178
+ <i class="fas fa-arrow-trend-down text-green-600 mt-1"></i>
179
+ <span class="text-sm text-slate-700"><strong class="text-green-700">Positive High-Prob:</strong> Standard RL drops entropy too fast. We reduce their weight to maintain exploration.</span>
180
+ </li>
181
+ <li class="flex items-start gap-3 p-3 bg-red-50 rounded-xl border border-red-100">
182
+ <i class="fas fa-shield-alt text-red-500 mt-1"></i>
183
+ <span class="text-sm text-slate-700"><strong class="text-red-700">Negative Low-Prob:</strong> We avoid suppressing rare explorations to prevent degradation.</span>
184
+ </li>
185
+ </ul>
186
+ </div>
187
+ <div class="md:col-span-7">
188
+ <div class="rounded-xl overflow-hidden">
189
+ <img src="figures/entropy_quadrant.svg" alt="Entropy Quadrant Analysis" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Entropy+Quadrant+Analysis'">
190
+ </div>
191
+ </div>
192
+ </div>
193
+ </div>
194
+
195
+ <!-- 2. PI Control Dynamics -->
196
+ <div class="bg-white rounded-3xl shadow-sm border border-slate-100 p-8">
197
+ <div class="text-center max-w-3xl mx-auto mb-10">
198
+ <h3 class="text-xl font-bold mb-4 flex items-center justify-center gap-2">
199
+ <span class="w-8 h-8 bg-blue-100 text-blue-600 rounded-lg flex items-center justify-center text-sm">02</span>
200
+ Precise Control at Any Target
201
+ </h3>
202
+ <p class="text-slate-600">
203
+ Unlike static coefficients, EntroPIC's PI controller dynamically adjusts $\alpha$ to lock entropy to <strong>any arbitrary target value</strong>.
204
+ </p>
205
+ </div>
206
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-8">
207
+ <div class="bg-slate-50 p-4 rounded-2xl border border-slate-100">
208
+ <img src="figures/entropy_diff_target.svg" alt="Entropy at Different Targets" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Entropy+Convergence'">
209
+ <p class="text-center text-sm font-bold text-slate-700 mt-3">Entropy Convergence</p>
210
+ </div>
211
+ <div class="bg-slate-50 p-4 rounded-2xl border border-slate-100">
212
+ <img src="figures/alpha_diff_target.svg" alt="Alpha Dynamics" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Alpha+Coefficient+Dynamics'">
213
+ <p class="text-center text-sm font-bold text-slate-700 mt-3">Adaptive Coefficient ($\alpha$)</p>
214
+ </div>
215
+ </div>
216
+ </div>
217
+ </section>
218
+
219
+ <!-- Experimental Results -->
220
+ <section id="results">
221
+ <h2 class="text-3xl font-bold mb-12 text-center text-slate-800">Experimental Results</h2>
222
+
223
+ <!-- Training Curves -->
224
+ <div class="grid grid-cols-1 md:grid-cols-3 gap-6 mb-12">
225
+ <div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition">
226
+ <img src="figures/exp_entropy.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Entropy+Curve'">
227
+ <p class="text-center text-sm font-bold mt-3 text-slate-600">Entropy Stability</p>
228
+ </div>
229
+ <div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition">
230
+ <img src="figures/exp_reward.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Reward+Curve'">
231
+ <p class="text-center text-sm font-bold mt-3 text-slate-600">Training Reward</p>
232
+ </div>
233
+ <div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition">
234
+ <img src="figures/exp_val.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Validation+Accuracy'">
235
+ <p class="text-center text-sm font-bold mt-3 text-slate-600">Validation Accuracy</p>
236
+ </div>
237
+ </div>
238
+
239
+ <!-- Main Results Bar Chart -->
240
+ <div class="bg-white p-8 rounded-3xl shadow-lg border border-slate-100 mb-12">
241
+ <div class="flex flex-col md:flex-row justify-between items-start md:items-end mb-8 gap-4">
242
+ <div>
243
+ <h3 class="text-2xl font-bold text-slate-800">Main Performance Comparison (On-Policy)</h3>
244
+ <p class="text-slate-500 mt-1">Comparing Pass@N rates across mathematical datasets. EntroPIC achieves SOTA.</p>
245
+ </div>
246
+ <div class="px-3 py-1 bg-blue-50 text-blue-700 rounded-lg text-sm font-medium border border-blue-100">
247
+ Metric: Pass@N
248
+ </div>
249
+ </div>
250
+ <div id="chart-main-results" class="w-full h-[500px]"></div>
251
+ </div>
252
+
253
+ <!-- Smaller Charts -->
254
+ <div class="grid grid-cols-1 md:grid-cols-2 gap-8">
255
+ <div class="bg-white p-6 rounded-3xl shadow-sm border border-slate-100">
256
+ <h3 class="font-bold text-slate-700 mb-6 text-center">Off-Policy Training</h3>
257
+ <div id="chart-off-policy" class="w-full h-72"></div>
258
+ </div>
259
+ <div class="bg-white p-6 rounded-3xl shadow-sm border border-slate-100">
260
+ <h3 class="font-bold text-slate-700 mb-6 text-center">High Temperature (T=1.0)</h3>
261
+ <div id="chart-temp" class="w-full h-72"></div>
262
+ </div>
263
+ </div>
264
+ </section>
265
+
266
+ <!-- Interactive Case Study -->
267
+ <section id="case-study" class="scroll-mt-24">
268
+ <div class="bg-slate-900 text-white rounded-3xl p-1 overflow-hidden shadow-2xl ring-1 ring-white/10">
269
+ <div class="p-10 text-center">
270
+ <h2 class="text-3xl font-bold mb-3">Case Study: Reasoning Dynamics</h2>
271
+ <p class="text-slate-400">Witness how high entropy enables <span class="text-green-400 font-semibold">Reflection</span> and <span class="text-purple-400 font-semibold">Self-Correction</span>.</p>
272
+ </div>
273
+
274
+ <div class="bg-white text-slate-800 rounded-2xl m-1 grid grid-cols-1 lg:grid-cols-12 overflow-hidden h-[800px]">
275
+ <!-- Sidebar -->
276
+ <div class="lg:col-span-4 bg-slate-50 border-r border-slate-200 p-6 flex flex-col gap-4 overflow-y-auto">
277
+ <div class="text-sm font-mono bg-white p-5 rounded-xl border border-slate-200 shadow-sm mb-4">
278
+ <strong class="text-slate-400 text-xs uppercase block mb-2 tracking-wider">Problem</strong>
279
+ Let
280
+ $$f(x)=\frac{(x-18)(x-72)(x-98)(x-k)}{x}.$$
281
+ There exist exactly three positive real values of $k$ such that $f$ has a minimum at exactly two real values of $x$. Find the sum of these three values of $k$.
282
+ </div>
283
+
284
+ <div class="space-y-4">
285
+ <button onclick="setCase('entropic')" id="btn-entropic" class="group w-full p-5 rounded-xl border-2 text-left transition-all shadow-md bg-white border-tencent transform scale-[1.02]">
286
+ <div class="font-bold flex justify-between items-center text-lg">
287
+ <span>EntroPIC</span>
288
+ <i class="fas fa-check-circle text-green-500 text-xl"></i>
289
+ </div>
290
+ <div class="text-xs font-bold text-green-600 mt-1 mb-3 uppercase tracking-wide">Correct Answer (240)</div>
291
+ <div class="text-xs text-slate-500 flex flex-wrap gap-2">
292
+ <span class="px-2 py-1 bg-green-100 rounded text-green-700 font-medium">Reflection</span>
293
+ <span class="px-2 py-1 bg-purple-100 rounded text-purple-700 font-medium">Self-Correction</span>
294
+ </div>
295
+ </button>
296
+
297
+ <button onclick="setCase('grpo')" id="btn-grpo" class="group w-full p-5 rounded-xl border-2 text-left transition-all bg-white border-transparent hover:border-slate-200 opacity-60 hover:opacity-100">
298
+ <div class="font-bold flex justify-between items-center text-lg">
299
+ <span>GRPO (Baseline)</span>
300
+ <i class="fas fa-times-circle text-red-400 text-xl"></i>
301
+ </div>
302
+ <div class="text-xs font-bold text-red-500 mt-1 mb-3 uppercase tracking-wide">Incorrect Answer (188)</div>
303
+ <div class="text-xs text-slate-400">
304
+ Linear reasoning, missing edge cases.
305
+ </div>
306
+ </button>
307
+ </div>
308
+ </div>
309
+
310
+ <!-- Content Area -->
311
+ <div class="lg:col-span-8 p-10 bg-white overflow-y-auto relative">
312
+ <div id="case-content" class="prose prose-slate prose-lg max-w-none pb-12">
313
+ <!-- JS Injected -->
314
+ </div>
315
+ <!-- Fade overlay -->
316
+ <div class="absolute bottom-0 left-0 w-full h-24 bg-gradient-to-t from-white to-transparent pointer-events-none"></div>
317
+ </div>
318
+ </div>
319
+ </div>
320
+ </section>
321
+
322
+ <!-- Citation -->
323
+ <section class="max-w-4xl mx-auto">
324
+ <div class="bg-slate-100 text-slate-700 p-8 rounded-3xl border border-slate-200/60">
325
+ <div class="flex justify-between items-center mb-4">
326
+ <h2 class="text-xl font-bold text-slate-900">Citation</h2>
327
+ <button onclick="navigator.clipboard.writeText(document.querySelector('pre').innerText); alert('Copied!')" class="text-xs bg-white border border-slate-300 px-3 py-1 rounded hover:bg-slate-50 transition">
328
+ Copy BibTeX
329
+ </button>
330
+ </div>
331
+ <pre class="font-mono text-xs md:text-sm overflow-x-auto p-4 bg-white border border-slate-200 rounded-xl text-slate-600 shadow-sm selection:bg-blue-100">
332
+ @article{yang2025entropic,
333
+ title={EntroPIC: Towards Stable Long-Term Training of LLMs via Entropy Stabilization with Proportional-Integral Control},
334
+ author={Yang, Kai and Xu, Xin and Chen, Yangkun and Liu, Weijie and Lyu, Jiafei and Lin, Zichuan and Ye, Deheng and Yang, Saiyong},
335
+ journal={arXiv preprint},
336
+ year={2025}
337
+ }</pre>
338
+ </div>
339
+ </section>
340
+
341
+ </main>
342
+
343
+ <footer class="bg-white border-t border-slate-200 py-12 mt-12">
344
+ <div class="max-w-6xl mx-auto px-4 text-center text-slate-500 text-sm">
345
+ <p class="mb-2">Project page based on EntroPIC.</p>
346
+ <p>© 2025 Tencent Hunyuan. All rights reserved.</p>
347
+ </div>
348
+ </footer>
349
+
350
+ <script>
351
+ // --- Data for Charts ---
352
+
353
+ // 1. Main Results (5 Models)
354
+ const datasets = ['Math', 'AMC', 'AIME24', 'AIME25', 'Olympic', 'Omni-math', 'Overall'];
355
+
356
+ const dataInitial = [97.0, 81.6, 60.0, 53.0, 68.7, 49.3, 68.3];
357
+ const dataGRPO = [97.4, 88.0, 70.0, 53.3, 72.7, 57.6, 73.2];
358
+ const dataNSR = [96.4, 89.2, 63.3, 46.7, 71.3, 56.2, 70.5];
359
+ const dataAEC = [97.8, 89.2, 73.3, 60.0, 72.5, 58.5, 75.2];
360
+ const dataEntroPIC =[97.2, 91.6, 76.7, 66.7, 71.3, 58.4, 77.0];
361
+
362
+ const traceInitial = { x: datasets, y: dataInitial, name: 'Initial', type: 'bar', marker: { color: '#e2e8f0' } };
363
+ const traceGRPO = { x: datasets, y: dataGRPO, name: 'GRPO', type: 'bar', marker: { color: '#94a3b8' } };
364
+ const traceNSR = { x: datasets, y: dataNSR, name: 'NSR', type: 'bar', marker: { color: '#facc15' } };
365
+ const traceAEC = { x: datasets, y: dataAEC, name: 'AEC', type: 'bar', marker: { color: '#4ade80' } };
366
+ const traceEntroPIC = { x: datasets, y: dataEntroPIC, name: 'EntroPIC', type: 'bar', marker: { color: '#0052D9' } };
367
+
368
+ const mainLayout = {
369
+ barmode: 'group',
370
+ bargap: 0.15,
371
+ bargroupgap: 0.05,
372
+ margin: { t: 20, b: 40, l: 50, r: 20 },
373
+ legend: { orientation: 'h', y: 1.1, x: 0.5, xanchor: 'center' },
374
+ yaxis: { title: 'Pass@N (%)', range: [40, 100], gridcolor: '#f1f5f9' },
375
+ xaxis: { title: '', tickfont: { size: 13 } },
376
+ plot_bgcolor: 'rgba(0,0,0,0)',
377
+ paper_bgcolor: 'rgba(0,0,0,0)'
378
+ };
379
+
380
+ // 2. Off-Policy
381
+ const offModels = ['GRPO', 'EntroPIC(P)', 'EntroPIC(PI)'];
382
+ const offAvg = [69.4, 72.2, 73.2];
383
+
384
+ const offTrace = {
385
+ x: offModels, y: offAvg, type: 'bar',
386
+ marker: { color: ['#cbd5e1', '#60a5fa', '#2563eb'] },
387
+ text: offAvg.map(String), textposition: 'auto'
388
+ };
389
+
390
+ // 3. High Temp
391
+ const tempModels = ['GRPO (T=1)', 'EntroPIC (T=1)'];
392
+ const tempAvg = [71.0, 74.7];
393
+
394
+ const tempTrace = {
395
+ x: tempModels, y: tempAvg, type: 'bar',
396
+ marker: { color: ['#f87171', '#10b981'] },
397
+ width: [0.4, 0.4],
398
+ text: tempAvg.map(String), textposition: 'auto'
399
+ };
400
+
401
+ const subLayout = {
402
+ margin: { t: 10, b: 30, l: 40, r: 10 },
403
+ yaxis: { range: [60, 80], title: 'Pass@N (%)', gridcolor: '#f1f5f9' },
404
+ plot_bgcolor: 'rgba(0,0,0,0)',
405
+ paper_bgcolor: 'rgba(0,0,0,0)'
406
+ };
407
+
408
+ // --- Case Study Content ---
409
+ const caseData = {
410
+ grpo: {
411
+ btnId: 'btn-grpo',
412
+ content: `
413
+ <h3 class="font-bold text-2xl mb-6 text-slate-900 border-b pb-2">GRPO Response</h3>
414
+ <div class="space-y-8 text-slate-600">
415
+ <div>
416
+ <p class="mb-2">Alright, I have this problem:</p>
417
+ <div class="bg-slate-50 p-4 rounded-lg border border-slate-200">
418
+ <strong class="text-slate-800 block mb-2">Problem Statement:</strong>
419
+ Let $$f(x) = \\frac{(x-18)(x-72)(x-98)(x-k)}{x}$$
420
+ There exist exactly three positive real values of $k$ such that $f$ has a minimum at exactly two real values of $x$. Find the sum of these three values of $k$.
421
+ </div>
422
+ </div>
423
+
424
+ <div>
425
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Step 1: Critical Points</h4>
426
+ <p>Let $N(x) = (x-18)(x-72)(x-98)(x-k)$. Derivative:</p>
427
+ <div class="math-scroll">$$f'(x) = \\frac{N'(x) \\cdot x - N(x)}{x^2}$$</div>
428
+ <p>Set numerator to zero: $N'(x) \\cdot x = N(x)$.</p>
429
+ </div>
430
+
431
+ <div>
432
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Step 2: Roots of N(x)</h4>
433
+ <p>If $x$ is a root of $N(x)$ (i.e., $x \\in \\{18,72,98,k\\}$), then $N(x) = 0$. Equation reduces to $N'(x) \\cdot x = 0 \\implies N'(x) = 0$.</p>
434
+ <ul class="list-disc list-inside ml-4 font-mono text-sm bg-slate-50 p-4 rounded border border-slate-100">
435
+ <li>$x = 18 \\implies k = 18$</li>
436
+ <li>$x = 72 \\implies k = 72$</li>
437
+ <li>$x = 98 \\implies k = 98$</li>
438
+ </ul>
439
+ <p class="text-sm italic text-slate-400 mt-2">Note: The model assumes these are valid candidates without verifying the "exactly two minima" condition.</p>
440
+ </div>
441
+
442
+ <div>
443
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Step 3: Alternative Form</h4>
444
+ <p>Let $g(x) = (x-18)(x-72)(x-98)$:</p>
445
+ <div class="math-scroll">$$f(x) = g(x) \\left(1 - \\frac{k}{x}\\right)$$</div>
446
+ <p>Critical points satisfy:</p>
447
+ <div class="math-scroll">$$g'(x) \\left(1 - \\frac{k}{x}\\right) + g(x) \\frac{k}{x^2} = 0 \\quad \\Rightarrow \\quad g'(x)x(x-k) + g(x)k = 0$$</div>
448
+ </div>
449
+
450
+ <div>
451
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Step 4-5: Valid k Values</h4>
452
+ <p>The condition "exactly two minima" implies the derivative must have a double root. Observing the roots 18, 72, 98, the polynomial structure suggests the valid $k$ are related to these numbers.</p>
453
+ <p>Through analysis:</p>
454
+ <div class="math-scroll">$$k_1 = 18, \\quad k_2 = 72, \\quad k_3 = 98$$</div>
455
+ </div>
456
+
457
+ <div>
458
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Step 6: Sum of k</h4>
459
+ <div class="math-scroll text-xl">$$18 + 72 + 98 = 188$$</div>
460
+ </div>
461
+
462
+ <div class="p-6 bg-red-50 border border-red-200 rounded-xl text-center">
463
+ <p class="text-red-500 font-bold uppercase text-xs tracking-wider mb-2">Final Answer</p>
464
+ <span class="font-mono text-1xl font-bold text-red-700">188</span>
465
+ </div>
466
+ </div>`
467
+ },
468
+ entropic: {
469
+ btnId: 'btn-entropic',
470
+ content: `
471
+ <h3 class="font-bold text-2xl mb-6 text-slate-900 border-b pb-2">EntroPIC Response</h3>
472
+ <div class="space-y-8 text-slate-600">
473
+ <p>Alright, let's tackle this problem step by step.</p>
474
+
475
+ <div>
476
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Finding Critical Points $f'(x)=0$</h4>
477
+ <p>Let $N(x) = (x - 18)(x - 72)(x - 98)(x - k)$. The critical points are where $f'(x) = 0$. Using the quotient rule, we set the numerator to zero:</p>
478
+ <div class="math-scroll">$$N'(x) \\cdot x = N(x)$$</div>
479
+ </div>
480
+
481
+ <div>
482
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Checking Roots of N(x)</h4>
483
+ <p>If $x$ is a root of $N(x)$ (i.e., $x \\in \\{18, 72, 98, k\\}$), then $N(x) = 0$. The condition $N'(x) \\cdot x = N(x)$ becomes $N'(x) \\cdot x = 0$. Since $x \\neq 0$, we require $N'(x) = 0$.</p>
484
+
485
+ <!-- REFLECTION POINT -->
486
+ <div class="reflection-highlight shadow-sm">
487
+ <div class="flex items-center gap-2 mb-2">
488
+ <span class="w-2 h-2 bg-green-500 rounded-full"></span>
489
+ <p class="font-bold text-green-700 text-xs uppercase">Reflection</p>
490
+ </div>
491
+ <p class="text-slate-800 italic">Wait, so $x = 18$ is a critical point of $f$ if and only if $N'(18) = 0$. Let's compute $N'(x)$ and check:</p>
492
+ <div class="math-scroll text-slate-800 mt-2">$$N'(x) = \\frac{d}{dx} \\left[ (x-18)(x-72)(x-98)(x-k) \\right]$$</div>
493
+ <p class="text-sm mt-1">Set $x=18$: $N'(18) = (18-72)(18-98)(18-k)$. $N'(18)=0 \\implies k=18$. Similarly for $x=72$ and $x=98$.</p>
494
+ </div>
495
+ </div>
496
+
497
+ <div>
498
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Alternative Approach: Rewriting f(x)</h4>
499
+ <p>Alternatively, let $g(x) = (x-18)(x-72)(x-98)$.</p>
500
+ <div class="math-scroll">$$f(x) = g(x) \\left(1 - \\frac{k}{x}\\right)$$</div>
501
+ <p>The critical points satisfy $f'(x) = 0$, which rearranges to: $g'(x) x (x - k) + g(x) k = 0$.</p>
502
+ </div>
503
+
504
+ <div>
505
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Exploring Symmetry</h4>
506
+ <p>The condition "exactly two real values of $x$ where $f$ has a minimum" implies the quartic $N'(x) \\cdot x - N(x) = 0$ must have a specific root structure. This usually means that two critical points must coincide, leading to a double root.</p>
507
+
508
+ <!-- SELF-CORRECTION POINT -->
509
+ <div class="correction-highlight shadow-sm mt-4">
510
+ <div class="flex items-center gap-2 mb-2">
511
+ <span class="w-2 h-2 bg-purple-500 rounded-full"></span>
512
+ <p class="font-bold text-purple-700 text-xs uppercase">Self-Correction / Deep Check</p>
513
+ </div>
514
+ <p class="text-slate-800">Testing $k = 72$: If $k = 72$, $N(x)$ has a repeated root. This is a potential candidate.</p>
515
+ <p class="text-slate-800 mt-1">The analysis shows that for exactly two minima, the critical point equation must possess a double root for certain $k$.</p>
516
+ </div>
517
+ </div>
518
+
519
+ <div>
520
+ <h4 class="font-bold text-slate-900 text-lg mb-2">Correct Insight: Valid k Values</h4>
521
+ <p>Through systematic polynomial analysis, the three positive real values of $k$ are:</p>
522
+ <div class="math-scroll text-lg">$$k_1 = 46, \\quad k_2 = 72, \\quad k_3 = 122$$</div>
523
+ <p class="text-sm bg-slate-50 p-2 rounded mt-2 border border-slate-100">
524
+ These values are consistent with symmetry properties:
525
+ <br>• $k = 72$ is the middle root.
526
+ <br>• $k = 46$ and $k = 122$ are symmetric counterparts (average of 18 and 98 is 58; $72 - (72-46) = 46$).
527
+ </p>
528
+ </div>
529
+
530
+ <div>
531
+ <p class="font-bold text-lg">The sum of these three values of $k$ is:</p>
532
+ <div class="math-scroll text-xl">$$46 + 72 + 122 = 240$$</div>
533
+
534
+ <div class="p-6 bg-green-50 border border-green-200 rounded-xl text-center mt-4">
535
+ <p class="text-green-600 font-bold uppercase text-xs tracking-wider mb-2">Final Answer</p>
536
+ <span class="font-mono text-1xl font-bold text-green-700">240</span>
537
+ </div>
538
+ </div>
539
+ </div>`
540
+ }
541
+ };
542
+
543
+ function setCase(key) {
544
+ const data = caseData[key];
545
+
546
+ // Reset Buttons
547
+ document.getElementById('btn-entropic').className = "w-full p-5 rounded-xl border-2 text-left transition-all shadow-sm bg-white border-transparent hover:border-slate-200 opacity-60";
548
+ document.getElementById('btn-grpo').className = "w-full p-5 rounded-xl border-2 text-left transition-all shadow-sm bg-white border-transparent hover:border-slate-200 opacity-60";
549
+
550
+ // Activate Button
551
+ const activeBtn = document.getElementById(data.btnId);
552
+ activeBtn.className = "w-full p-5 rounded-xl border-2 text-left transition-all shadow-lg bg-white border-tencent transform scale-[1.02] opacity-100 ring-4 ring-blue-50";
553
+
554
+ // Inject Content
555
+ document.getElementById('case-content').innerHTML = data.content;
556
+
557
+ // Rerender Math
558
+ if(window.MathJax) {
559
+ MathJax.typesetPromise([document.getElementById('case-content')]);
560
+ }
561
+ }
562
+
563
+ // Init
564
+ document.addEventListener("DOMContentLoaded", function() {
565
+ Plotly.newPlot('chart-main-results', [traceInitial, traceGRPO, traceNSR, traceAEC, traceEntroPIC], mainLayout, {responsive: true, displayModeBar: false});
566
+ Plotly.newPlot('chart-off-policy', [offTrace], subLayout, {responsive: true, displayModeBar: false});
567
+ Plotly.newPlot('chart-temp', [tempTrace], subLayout, {responsive: true, displayModeBar: false});
568
+
569
+ setCase('entropic');
570
+ });
571
+ </script>
572
+ </body>
573
+ </html>