Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>EntroPIC: Stable Long-Term Training of LLMs</title> | |
| <!-- Tailwind CSS --> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| <!-- MathJax --> | |
| <script> | |
| MathJax = { | |
| tex: { | |
| inlineMath: [['$', '$'], ['\\(', '\\)']], | |
| displayMath: [['$$', '$$'], ['\\[', '\\]']], | |
| processEscapes: true, | |
| macros: { | |
| boxed: ['\\fbox{#1}', 1] | |
| } | |
| }, | |
| options: { | |
| ignoreHtmlClass: 'tex2jax_ignore', | |
| processHtmlClass: 'tex2jax_process' | |
| } | |
| }; | |
| </script> | |
| <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script> | |
| <!-- Plotly.js --> | |
| <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script> | |
| <!-- Fonts & Icons --> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet"> | |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"> | |
| <script> | |
| tailwind.config = { | |
| theme: { | |
| extend: { | |
| fontFamily: { | |
| sans: ['Inter', 'sans-serif'], | |
| mono: ['JetBrains Mono', 'monospace'], | |
| }, | |
| colors: { | |
| tencent: '#0052D9', | |
| primary: '#2c3e50', | |
| } | |
| } | |
| } | |
| } | |
| </script> | |
| <style> | |
| body { | |
| background-color: #f8fafc; | |
| background-image: radial-gradient(#e2e8f0 1px, transparent 1px); | |
| background-size: 24px 24px; | |
| } | |
| .gradient-text { | |
| background: linear-gradient(135deg, #0052D9 0%, #0ea5e9 100%); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| } | |
| .highlight-acronym { | |
| color: #0052D9; | |
| font-weight: 700; | |
| } | |
| .math-scroll { | |
| overflow-x: auto; | |
| overflow-y: hidden; | |
| max-width: 100%; | |
| padding: 8px 0; | |
| } | |
| .math-scroll::-webkit-scrollbar { | |
| height: 6px; | |
| } | |
| .math-scroll::-webkit-scrollbar-thumb { | |
| background: #cbd5e1; | |
| border-radius: 3px; | |
| } | |
| /* Case Study Highlights */ | |
| .reflection-highlight { | |
| background-color: #f0fdf4; | |
| border-left: 4px solid #22c55e; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| border-radius: 0 0.5rem 0.5rem 0; | |
| } | |
| .correction-highlight { | |
| background-color: #f5f3ff; | |
| border-left: 4px solid #8b5cf6; | |
| padding: 1rem; | |
| margin: 1rem 0; | |
| border-radius: 0 0.5rem 0.5rem 0; | |
| } | |
| .author-link { | |
| color: #0052D9; | |
| text-decoration: none; | |
| transition: color 0.2s; | |
| } | |
| .author-link:hover { | |
| color: #00308F; | |
| text-decoration: underline; | |
| } | |
| /* Toggle Button Styles */ | |
| .toggle-btn { | |
| transition: all 0.3s ease; | |
| position: relative; | |
| overflow: hidden; | |
| } | |
| .toggle-btn.active { | |
| background-color: white; | |
| color: #0f172a; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.05); | |
| font-weight: 600; | |
| } | |
| .toggle-btn.inactive { | |
| color: #64748b; | |
| } | |
| .toggle-btn.inactive:hover { | |
| color: #334155; | |
| background-color: rgba(255,255,255,0.5); | |
| } | |
| /* Chart Background Transition */ | |
| .chart-container { | |
| transition: background-color 1.0s ease-in-out; /* 平滑背景色切换 */ | |
| } | |
| .bg-theme-blue { | |
| background-color: #eff6ff; /* blue-50 */ | |
| } | |
| .bg-theme-orange { | |
| background-color: #fff7ed; /* orange-50 */ | |
| } | |
| </style> | |
| </head> | |
| <body class="text-slate-800 antialiased"> | |
| <!-- Decorative Background Blur --> | |
| <div class="fixed top-0 left-0 right-0 h-96 bg-gradient-to-b from-blue-50/80 to-transparent -z-10 pointer-events-none"></div> | |
| <!-- Header --> | |
| <header class="pt-24 pb-12"> | |
| <div class="max-w-6xl mx-auto px-4 text-center"> | |
| <h1 class="text-4xl md:text-5xl lg:text-6xl font-bold tracking-tight mb-6 leading-tight"> | |
| <span class="gradient-text">EntroPIC</span>: Towards Stable Long-Term Training of LLMs via | |
| <span class="highlight-acronym">Entro</span>py Stabilization with <br class="hidden lg:block"> | |
| <span class="highlight-acronym">P</span>roportional-<span class="highlight-acronym">I</span>ntegral <span class="highlight-acronym">C</span>ontrol | |
| </h1> | |
| <div class="text-lg mb-6 text-slate-700 font-medium leading-relaxed"> | |
| <div class="flex flex-wrap justify-center items-center gap-x-4 gap-y-2"> | |
| <span><a href="https://yk7333.github.io/" target="_blank" class="author-link">Kai Yang</a><sup>1</sup>,</span> | |
| <span><a href="https://xinxu-ustc.github.io/" target="_blank" class="author-link">Xin Xu</a><sup>1,2</sup>,</span> | |
| <span><a href="https://github.com/kkane99" target="_blank" class="author-link">Yangkun Chen</a><sup>1</sup>,</span> | |
| <span><a href="https://github.com/autoliuweijie" target="_blank" class="author-link">Weijie Liu</a><sup>1</sup>,</span> | |
| <span><a href="https://dmksjfl.github.io/" target="_blank" class="author-link">Jiafei Lyu</a><sup>1</sup>,</span> | |
| <span><a href="https://linzichuan.github.io/" target="_blank" class="author-link">Zichuan Lin</a><sup>1</sup>,</span> | |
| <span><a href="https://scholar.google.com/citations?user=jz5XKuQAAAAJ&hl=en&oi=ao" target="_blank" class="author-link">Deheng Ye</a><sup>1</sup>,</span> | |
| <span><a href="https://github.com/yangsaiyong" target="_blank" class="author-link">Saiyong Yang</a><sup>1†</sup></span> | |
| </div> | |
| <div class="mt-4 text-base text-slate-500"> | |
| <span class="mr-4">Tencent Hunyuan</span> | |
| <span>HKUST</span> | |
| </div> | |
| <div class="mt-1 text-xs text-slate-400">† Corresponding Author</div> | |
| </div> | |
| <div class="flex justify-center gap-4 mt-8"> | |
| <a href="http://arxiv.org/abs/2511.15248" target="_blank" class="flex items-center gap-2 px-8 py-3 bg-slate-900 text-white rounded-full hover:bg-slate-800 transition shadow-xl hover:shadow-2xl transform hover:-translate-y-0.5 duration-200"> | |
| <i class="fas fa-file-pdf"></i> Paper | |
| </a> | |
| <a href="https://github.com/yk7333/EntroPIC" target="_blank" class="flex items-center gap-2 px-8 py-3 bg-white border border-slate-200 text-slate-800 rounded-full hover:bg-slate-50 transition shadow-md hover:shadow-lg transform hover:-translate-y-0.5 duration-200"> | |
| <i class="fab fa-github text-xl"></i> Code | |
| </a> | |
| </div> | |
| </div> | |
| </header> | |
| <main class="max-w-6xl mx-auto px-4 py-10 space-y-24"> | |
| <!-- Abstract --> | |
| <section> | |
| <div class="bg-white/80 backdrop-blur-sm p-8 md:p-10 rounded-3xl shadow-lg border border-white/50 text-justify text-slate-600 leading-relaxed"> | |
| <h2 class="text-2xl font-bold mb-4 text-slate-800">Abstract</h2> | |
| Long-term training of large language models (LLMs) requires maintaining stable exploration to prevent the model from collapsing into sub-optimal behaviors. Entropy is crucial in this context. However, existing RL methods struggle to maintain an appropriate level of entropy as positive samples reduce it while negative samples increase it. | |
| <br><br> | |
| We propose <strong>EntroPIC</strong>, a novel method that uses Proportional-Integral (PI) control to adaptively adjust the loss coefficients of positive and negative samples. This stabilizes entropy throughout training, ensuring efficient exploration and steady progress. | |
| </div> | |
| <div class="mt-10 transform hover:scale-[1.01] transition duration-500"> | |
| <img src="figures/overview.png" alt="EntroPIC Overview" class="w-full rounded-2xl shadow-xl border border-slate-200/60" onerror="this.src='https://placehold.co/1200x400?text=Overview+Figure+(Ensure+figures/overview.png+exists)'"> | |
| <p class="text-center text-sm text-slate-400 mt-3">Figure 1: EntroPIC uses PI control to dynamically adjust sample weights based on entropy error.</p> | |
| </div> | |
| </section> | |
| <!-- Method Section --> | |
| <section id="method"> | |
| <h2 class="text-3xl font-bold mb-12 text-center text-slate-800">Methodology</h2> | |
| <div class="mb-16 bg-white rounded-3xl shadow-sm border border-slate-100 p-8"> | |
| <div class="grid grid-cols-1 md:grid-cols-12 gap-12 items-center"> | |
| <div class="md:col-span-5"> | |
| <h3 class="text-xl font-bold mb-4 text-slate-800 flex items-center gap-2"> | |
| <span class="w-8 h-8 bg-blue-100 text-blue-600 rounded-lg flex items-center justify-center text-sm">01</span> | |
| High-Probability Tokens Matter | |
| </h3> | |
| <p class="text-slate-600 leading-relaxed mb-6"> | |
| Not all tokens affect entropy equally. Our analysis reveals distinct impacts based on token probability and advantage. EntroPIC focuses control where it matters most: | |
| </p> | |
| <ul class="space-y-3"> | |
| <li class="flex items-start gap-3 p-3 bg-green-50 rounded-xl border border-green-100"> | |
| <i class="fas fa-arrow-trend-down text-green-600 mt-1"></i> | |
| <span class="text-sm text-slate-700"><strong class="text-green-700">Positive High-Prob:</strong> Standard RL drops entropy too fast. We reduce their weight to maintain exploration.</span> | |
| </li> | |
| <li class="flex items-start gap-3 p-3 bg-red-50 rounded-xl border border-red-100"> | |
| <i class="fas fa-shield-alt text-red-500 mt-1"></i> | |
| <span class="text-sm text-slate-700"><strong class="text-red-700">Negative Low-Prob:</strong> We avoid suppressing rare explorations to prevent degradation.</span> | |
| </li> | |
| </ul> | |
| </div> | |
| <div class="md:col-span-7"> | |
| <div class="rounded-xl overflow-hidden"> | |
| <img src="figures/entropy_quadrant.svg" alt="Entropy Quadrant Analysis" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Entropy+Quadrant+Analysis'"> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="bg-white rounded-3xl shadow-sm border border-slate-100 p-8"> | |
| <div class="text-center max-w-3xl mx-auto mb-10"> | |
| <h3 class="text-xl font-bold mb-4 flex items-center justify-center gap-2"> | |
| <span class="w-8 h-8 bg-blue-100 text-blue-600 rounded-lg flex items-center justify-center text-sm">02</span> | |
| Precise Control at Any Target | |
| </h3> | |
| <p class="text-slate-600"> | |
| Unlike static coefficients, EntroPIC's PI controller dynamically adjusts $\alpha$ to lock entropy to <strong>any arbitrary target value</strong>. | |
| </p> | |
| </div> | |
| <div class="grid grid-cols-1 md:grid-cols-2 gap-8"> | |
| <div class="bg-slate-50 p-4 rounded-2xl border border-slate-100"> | |
| <img src="figures/entropy_diff_target.svg" alt="Entropy at Different Targets" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Entropy+Convergence'"> | |
| <p class="text-center text-sm font-bold text-slate-700 mt-3">Entropy Convergence</p> | |
| </div> | |
| <div class="bg-slate-50 p-4 rounded-2xl border border-slate-100"> | |
| <img src="figures/alpha_diff_target.svg" alt="Alpha Dynamics" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Alpha+Coefficient+Dynamics'"> | |
| <p class="text-center text-sm font-bold text-slate-700 mt-3">Adaptive Coefficient ($\alpha$)</p> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Experimental Results --> | |
| <section id="results"> | |
| <h2 class="text-3xl font-bold mb-12 text-center text-slate-800">Experimental Results</h2> | |
| <!-- Training Curves --> | |
| <div class="grid grid-cols-1 md:grid-cols-3 gap-6 mb-12"> | |
| <div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition"> | |
| <img src="figures/exp_entropy.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Entropy+Curve'"> | |
| <p class="text-center text-sm font-bold mt-3 text-slate-600">Entropy Stability</p> | |
| </div> | |
| <div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition"> | |
| <img src="figures/exp_reward.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Reward+Curve'"> | |
| <p class="text-center text-sm font-bold mt-3 text-slate-600">Training Reward</p> | |
| </div> | |
| <div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition"> | |
| <img src="figures/exp_val.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Validation+Accuracy'"> | |
| <p class="text-center text-sm font-bold mt-3 text-slate-600">Validation Accuracy</p> | |
| </div> | |
| </div> | |
| <!-- Main Results Bar Chart --> | |
| <div class="bg-white p-8 rounded-3xl shadow-lg border border-slate-100 mb-12"> | |
| <div class="flex flex-col md:flex-row justify-between items-start md:items-end mb-8 gap-4"> | |
| <div> | |
| <h3 class="text-2xl font-bold text-slate-800">Main Performance Comparison</h3> | |
| <p class="text-slate-500 mt-1">Comparing performance across mathematical datasets.</p> | |
| </div> | |
| <!-- Metric Toggle Switch --> | |
| <div class="bg-slate-100 p-1.5 rounded-xl flex font-medium text-sm shadow-inner ring-1 ring-slate-200"> | |
| <button onclick="updateCharts('pass')" id="btn-metric-pass" class="toggle-btn active px-5 py-2 rounded-lg flex items-center gap-2"> | |
| <i class="fas fa-chart-bar"></i> Pass@N | |
| </button> | |
| <button onclick="updateCharts('avg')" id="btn-metric-avg" class="toggle-btn inactive px-5 py-2 rounded-lg flex items-center gap-2"> | |
| <i class="fas fa-chart-line"></i> Pass@1 (Avg) | |
| </button> | |
| </div> | |
| </div> | |
| <!-- Chart Container with Background Transition --> | |
| <div id="chart-main-results" class="w-full h-[500px] chart-container bg-theme-blue rounded-2xl p-4"></div> | |
| </div> | |
| <!-- Smaller Charts --> | |
| <div class="grid grid-cols-1 md:grid-cols-2 gap-8"> | |
| <div class="bg-white p-6 rounded-3xl shadow-sm border border-slate-100"> | |
| <h3 class="font-bold text-slate-700 mb-6 text-center">Off-Policy Training (Overall)</h3> | |
| <div id="chart-off-policy" class="w-full h-72 chart-container bg-theme-blue rounded-2xl p-4"></div> | |
| </div> | |
| <div class="bg-white p-6 rounded-3xl shadow-sm border border-slate-100"> | |
| <h3 class="font-bold text-slate-700 mb-6 text-center">High Temperature (T=1.0)</h3> | |
| <div id="chart-temp" class="w-full h-72 chart-container bg-theme-blue rounded-2xl p-4"></div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Case Study --> | |
| <section id="case-study" class="scroll-mt-24"> | |
| <div class="bg-slate-900 text-white rounded-3xl p-1 overflow-hidden shadow-2xl ring-1 ring-white/10"> | |
| <div class="p-10 text-center"> | |
| <h2 class="text-3xl font-bold mb-3">Case Study: Reasoning Dynamics</h2> | |
| <p class="text-slate-400">Witness how high entropy enables <span class="text-green-400 font-semibold">Reflection</span> and <span class="text-purple-400 font-semibold">Self-Correction</span>.</p> | |
| </div> | |
| <div class="bg-white text-slate-800 rounded-2xl m-1 grid grid-cols-1 lg:grid-cols-12 overflow-hidden h-[800px]"> | |
| <div class="lg:col-span-4 bg-slate-50 border-r border-slate-200 p-6 flex flex-col gap-4 overflow-y-auto"> | |
| <div class="text-sm font-mono bg-white p-5 rounded-xl border border-slate-200 shadow-sm mb-4"> | |
| <strong class="text-slate-400 text-xs uppercase block mb-2 tracking-wider">Prompt</strong> | |
| Let | |
| $$f(x)=\frac{(x-18)(x-72)(x-98)(x-k)}{x}.$$ | |
| There exist exactly three positive real values of $k$ such that $f$ has a minimum at exactly two real values of $x$. Find the sum of these three values of $k$. | |
| </div> | |
| <div class="space-y-4"> | |
| <button onclick="setCase('entropic')" id="btn-entropic" class="group w-full p-5 rounded-xl border-2 text-left transition-all shadow-md bg-white border-tencent transform scale-[1.02]"> | |
| <div class="font-bold flex justify-between items-center text-lg"> | |
| <span>EntroPIC Response</span> | |
| <i class="fas fa-check-circle text-green-500 text-xl"></i> | |
| </div> | |
| <div class="text-xs font-bold text-green-600 mt-1 mb-3 uppercase tracking-wide">Correct Answer (240)</div> | |
| <div class="text-xs text-slate-500 flex flex-wrap gap-2"> | |
| <span class="px-2 py-1 bg-green-100 rounded text-green-700 font-medium">Reflection</span> | |
| <span class="px-2 py-1 bg-purple-100 rounded text-purple-700 font-medium">Self-Correction</span> | |
| </div> | |
| </button> | |
| <button onclick="setCase('grpo')" id="btn-grpo" class="group w-full p-5 rounded-xl border-2 text-left transition-all bg-white border-transparent hover:border-slate-200 opacity-60 hover:opacity-100"> | |
| <div class="font-bold flex justify-between items-center text-lg"> | |
| <span>GRPO Response</span> | |
| <i class="fas fa-times-circle text-red-400 text-xl"></i> | |
| </div> | |
| <div class="text-xs font-bold text-red-500 mt-1 mb-3 uppercase tracking-wide">Incorrect Answer (188)</div> | |
| <div class="text-xs text-slate-400"> | |
| Linear reasoning, missing edge cases. | |
| </div> | |
| </button> | |
| </div> | |
| </div> | |
| <div class="lg:col-span-8 p-10 bg-white overflow-y-auto relative"> | |
| <div id="case-content" class="prose prose-slate prose-lg max-w-none pb-12"> | |
| <!-- JS Injected --> | |
| </div> | |
| <div class="absolute bottom-0 left-0 w-full h-24 bg-gradient-to-t from-white to-transparent pointer-events-none"></div> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- Citation --> | |
| <section class="max-w-4xl mx-auto"> | |
| <div class="bg-slate-100 text-slate-700 p-8 rounded-3xl border border-slate-200/60"> | |
| <div class="flex justify-between items-center mb-4"> | |
| <h2 class="text-xl font-bold text-slate-900">Citation</h2> | |
| <button onclick="navigator.clipboard.writeText(document.querySelector('pre').innerText); alert('Copied!')" class="text-xs bg-white border border-slate-300 px-3 py-1 rounded hover:bg-slate-50 transition"> | |
| Copy BibTeX | |
| </button> | |
| </div> | |
| <pre class="font-mono text-xs md:text-sm overflow-x-auto p-4 bg-white border border-slate-200 rounded-xl text-slate-600 shadow-sm selection:bg-blue-100"> | |
| @article{yang2025entropic, | |
| title={EntroPIC: Towards Stable Long-Term Training of LLMs via Entropy Stabilization with Proportional-Integral Control}, | |
| author={Yang, Kai and Xu, Xin and Chen, Yangkun and Liu, Weijie and Lyu, Jiafei and Lin, Zichuan and Ye, Deheng and Yang, Saiyong}, | |
| journal={arXiv preprint arXiv:2511.15248}, | |
| year={2025} | |
| }</pre> | |
| </div> | |
| </section> | |
| </main> | |
| <footer class="bg-white border-t border-slate-200 py-12 mt-12"> | |
| <div class="max-w-6xl mx-auto px-4 text-center text-slate-500 text-sm"> | |
| <p class="mb-2">Project page based on EntroPIC.</p> | |
| <p>© 2025 Tencent Hunyuan. All rights reserved.</p> | |
| </div> | |
| </footer> | |
| <script> | |
| </script> | |
| </body> | |
| </html> |