entropic / index.html
yangkaiSIGS's picture
Update index.html
516c99f verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>EntroPIC: Stable Long-Term Training of LLMs</title>
<!-- Tailwind CSS -->
<script src="https://cdn.tailwindcss.com"></script>
<!-- MathJax -->
<script>
MathJax = {
tex: {
inlineMath: [['$', '$'], ['\\(', '\\)']],
displayMath: [['$$', '$$'], ['\\[', '\\]']],
processEscapes: true,
macros: {
boxed: ['\\fbox{#1}', 1]
}
},
options: {
ignoreHtmlClass: 'tex2jax_ignore',
processHtmlClass: 'tex2jax_process'
}
};
</script>
<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
<!-- Plotly.js -->
<script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
<!-- Fonts & Icons -->
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<script>
tailwind.config = {
theme: {
extend: {
fontFamily: {
sans: ['Inter', 'sans-serif'],
mono: ['JetBrains Mono', 'monospace'],
},
colors: {
tencent: '#0052D9',
primary: '#2c3e50',
}
}
}
}
</script>
<style>
body {
background-color: #f8fafc;
background-image: radial-gradient(#e2e8f0 1px, transparent 1px);
background-size: 24px 24px;
}
.gradient-text {
background: linear-gradient(135deg, #0052D9 0%, #0ea5e9 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
.highlight-acronym {
color: #0052D9;
font-weight: 700;
}
.math-scroll {
overflow-x: auto;
overflow-y: hidden;
max-width: 100%;
padding: 8px 0;
}
.math-scroll::-webkit-scrollbar {
height: 6px;
}
.math-scroll::-webkit-scrollbar-thumb {
background: #cbd5e1;
border-radius: 3px;
}
/* Case Study Highlights */
.reflection-highlight {
background-color: #f0fdf4;
border-left: 4px solid #22c55e;
padding: 1rem;
margin: 1rem 0;
border-radius: 0 0.5rem 0.5rem 0;
}
.correction-highlight {
background-color: #f5f3ff;
border-left: 4px solid #8b5cf6;
padding: 1rem;
margin: 1rem 0;
border-radius: 0 0.5rem 0.5rem 0;
}
.author-link {
color: #0052D9;
text-decoration: none;
transition: color 0.2s;
}
.author-link:hover {
color: #00308F;
text-decoration: underline;
}
/* Toggle Button Styles */
.toggle-btn {
transition: all 0.3s ease;
position: relative;
overflow: hidden;
}
.toggle-btn.active {
background-color: white;
color: #0f172a;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
font-weight: 600;
}
.toggle-btn.inactive {
color: #64748b;
}
.toggle-btn.inactive:hover {
color: #334155;
background-color: rgba(255,255,255,0.5);
}
/* Chart Background Transition */
.chart-container {
transition: background-color 1.0s ease-in-out; /* 平滑背景色切换 */
}
.bg-theme-blue {
background-color: #eff6ff; /* blue-50 */
}
.bg-theme-orange {
background-color: #fff7ed; /* orange-50 */
}
</style>
</head>
<body class="text-slate-800 antialiased">
<!-- Decorative Background Blur -->
<div class="fixed top-0 left-0 right-0 h-96 bg-gradient-to-b from-blue-50/80 to-transparent -z-10 pointer-events-none"></div>
<!-- Header -->
<header class="pt-24 pb-12">
<div class="max-w-6xl mx-auto px-4 text-center">
<h1 class="text-4xl md:text-5xl lg:text-6xl font-bold tracking-tight mb-6 leading-tight">
<span class="gradient-text">EntroPIC</span>: Towards Stable Long-Term Training of LLMs via
<span class="highlight-acronym">Entro</span>py Stabilization with <br class="hidden lg:block">
<span class="highlight-acronym">P</span>roportional-<span class="highlight-acronym">I</span>ntegral <span class="highlight-acronym">C</span>ontrol
</h1>
<div class="text-lg mb-6 text-slate-700 font-medium leading-relaxed">
<div class="flex flex-wrap justify-center items-center gap-x-4 gap-y-2">
<span><a href="https://yk7333.github.io/" target="_blank" class="author-link">Kai Yang</a><sup>1</sup>,</span>
<span><a href="https://xinxu-ustc.github.io/" target="_blank" class="author-link">Xin Xu</a><sup>1,2</sup>,</span>
<span><a href="https://github.com/kkane99" target="_blank" class="author-link">Yangkun Chen</a><sup>1</sup>,</span>
<span><a href="https://github.com/autoliuweijie" target="_blank" class="author-link">Weijie Liu</a><sup>1</sup>,</span>
<span><a href="https://dmksjfl.github.io/" target="_blank" class="author-link">Jiafei Lyu</a><sup>1</sup>,</span>
<span><a href="https://linzichuan.github.io/" target="_blank" class="author-link">Zichuan Lin</a><sup>1</sup>,</span>
<span><a href="https://scholar.google.com/citations?user=jz5XKuQAAAAJ&hl=en&oi=ao" target="_blank" class="author-link">Deheng Ye</a><sup>1</sup>,</span>
<span><a href="https://github.com/yangsaiyong" target="_blank" class="author-link">Saiyong Yang</a><sup>1†</sup></span>
</div>
<div class="mt-4 text-base text-slate-500">
<span class="mr-4">Tencent Hunyuan</span>
<span>HKUST</span>
</div>
<div class="mt-1 text-xs text-slate-400">† Corresponding Author</div>
</div>
<div class="flex justify-center gap-4 mt-8">
<a href="http://arxiv.org/abs/2511.15248" target="_blank" class="flex items-center gap-2 px-8 py-3 bg-slate-900 text-white rounded-full hover:bg-slate-800 transition shadow-xl hover:shadow-2xl transform hover:-translate-y-0.5 duration-200">
<i class="fas fa-file-pdf"></i> Paper
</a>
<a href="https://github.com/yk7333/EntroPIC" target="_blank" class="flex items-center gap-2 px-8 py-3 bg-white border border-slate-200 text-slate-800 rounded-full hover:bg-slate-50 transition shadow-md hover:shadow-lg transform hover:-translate-y-0.5 duration-200">
<i class="fab fa-github text-xl"></i> Code
</a>
</div>
</div>
</header>
<main class="max-w-6xl mx-auto px-4 py-10 space-y-24">
<!-- Abstract -->
<section>
<div class="bg-white/80 backdrop-blur-sm p-8 md:p-10 rounded-3xl shadow-lg border border-white/50 text-justify text-slate-600 leading-relaxed">
<h2 class="text-2xl font-bold mb-4 text-slate-800">Abstract</h2>
Long-term training of large language models (LLMs) requires maintaining stable exploration to prevent the model from collapsing into sub-optimal behaviors. Entropy is crucial in this context. However, existing RL methods struggle to maintain an appropriate level of entropy as positive samples reduce it while negative samples increase it.
<br><br>
We propose <strong>EntroPIC</strong>, a novel method that uses Proportional-Integral (PI) control to adaptively adjust the loss coefficients of positive and negative samples. This stabilizes entropy throughout training, ensuring efficient exploration and steady progress.
</div>
<div class="mt-10 transform hover:scale-[1.01] transition duration-500">
<img src="figures/overview.png" alt="EntroPIC Overview" class="w-full rounded-2xl shadow-xl border border-slate-200/60" onerror="this.src='https://placehold.co/1200x400?text=Overview+Figure+(Ensure+figures/overview.png+exists)'">
<p class="text-center text-sm text-slate-400 mt-3">Figure 1: EntroPIC uses PI control to dynamically adjust sample weights based on entropy error.</p>
</div>
</section>
<!-- Method Section -->
<section id="method">
<h2 class="text-3xl font-bold mb-12 text-center text-slate-800">Methodology</h2>
<div class="mb-16 bg-white rounded-3xl shadow-sm border border-slate-100 p-8">
<div class="grid grid-cols-1 md:grid-cols-12 gap-12 items-center">
<div class="md:col-span-5">
<h3 class="text-xl font-bold mb-4 text-slate-800 flex items-center gap-2">
<span class="w-8 h-8 bg-blue-100 text-blue-600 rounded-lg flex items-center justify-center text-sm">01</span>
High-Probability Tokens Matter
</h3>
<p class="text-slate-600 leading-relaxed mb-6">
Not all tokens affect entropy equally. Our analysis reveals distinct impacts based on token probability and advantage. EntroPIC focuses control where it matters most:
</p>
<ul class="space-y-3">
<li class="flex items-start gap-3 p-3 bg-green-50 rounded-xl border border-green-100">
<i class="fas fa-arrow-trend-down text-green-600 mt-1"></i>
<span class="text-sm text-slate-700"><strong class="text-green-700">Positive High-Prob:</strong> Standard RL drops entropy too fast. We reduce their weight to maintain exploration.</span>
</li>
<li class="flex items-start gap-3 p-3 bg-red-50 rounded-xl border border-red-100">
<i class="fas fa-shield-alt text-red-500 mt-1"></i>
<span class="text-sm text-slate-700"><strong class="text-red-700">Negative Low-Prob:</strong> We avoid suppressing rare explorations to prevent degradation.</span>
</li>
</ul>
</div>
<div class="md:col-span-7">
<div class="rounded-xl overflow-hidden">
<img src="figures/entropy_quadrant.svg" alt="Entropy Quadrant Analysis" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Entropy+Quadrant+Analysis'">
</div>
</div>
</div>
</div>
<div class="bg-white rounded-3xl shadow-sm border border-slate-100 p-8">
<div class="text-center max-w-3xl mx-auto mb-10">
<h3 class="text-xl font-bold mb-4 flex items-center justify-center gap-2">
<span class="w-8 h-8 bg-blue-100 text-blue-600 rounded-lg flex items-center justify-center text-sm">02</span>
Precise Control at Any Target
</h3>
<p class="text-slate-600">
Unlike static coefficients, EntroPIC's PI controller dynamically adjusts $\alpha$ to lock entropy to <strong>any arbitrary target value</strong>.
</p>
</div>
<div class="grid grid-cols-1 md:grid-cols-2 gap-8">
<div class="bg-slate-50 p-4 rounded-2xl border border-slate-100">
<img src="figures/entropy_diff_target.svg" alt="Entropy at Different Targets" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Entropy+Convergence'">
<p class="text-center text-sm font-bold text-slate-700 mt-3">Entropy Convergence</p>
</div>
<div class="bg-slate-50 p-4 rounded-2xl border border-slate-100">
<img src="figures/alpha_diff_target.svg" alt="Alpha Dynamics" class="w-full" onerror="this.src='https://placehold.co/600x400?text=Alpha+Coefficient+Dynamics'">
<p class="text-center text-sm font-bold text-slate-700 mt-3">Adaptive Coefficient ($\alpha$)</p>
</div>
</div>
</div>
</section>
<!-- Experimental Results -->
<section id="results">
<h2 class="text-3xl font-bold mb-12 text-center text-slate-800">Experimental Results</h2>
<!-- Training Curves -->
<div class="grid grid-cols-1 md:grid-cols-3 gap-6 mb-12">
<div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition">
<img src="figures/exp_entropy.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Entropy+Curve'">
<p class="text-center text-sm font-bold mt-3 text-slate-600">Entropy Stability</p>
</div>
<div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition">
<img src="figures/exp_reward.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Reward+Curve'">
<p class="text-center text-sm font-bold mt-3 text-slate-600">Training Reward</p>
</div>
<div class="bg-white p-2 rounded-2xl shadow-sm border border-slate-100 hover:shadow-md transition">
<img src="figures/exp_val.svg" class="w-full rounded-xl" onerror="this.src='https://placehold.co/400x300?text=Validation+Accuracy'">
<p class="text-center text-sm font-bold mt-3 text-slate-600">Validation Accuracy</p>
</div>
</div>
<!-- Main Results Bar Chart -->
<div class="bg-white p-8 rounded-3xl shadow-lg border border-slate-100 mb-12">
<div class="flex flex-col md:flex-row justify-between items-start md:items-end mb-8 gap-4">
<div>
<h3 class="text-2xl font-bold text-slate-800">Main Performance Comparison</h3>
<p class="text-slate-500 mt-1">Comparing performance across mathematical datasets.</p>
</div>
<!-- Metric Toggle Switch -->
<div class="bg-slate-100 p-1.5 rounded-xl flex font-medium text-sm shadow-inner ring-1 ring-slate-200">
<button onclick="updateCharts('pass')" id="btn-metric-pass" class="toggle-btn active px-5 py-2 rounded-lg flex items-center gap-2">
<i class="fas fa-chart-bar"></i> Pass@N
</button>
<button onclick="updateCharts('avg')" id="btn-metric-avg" class="toggle-btn inactive px-5 py-2 rounded-lg flex items-center gap-2">
<i class="fas fa-chart-line"></i> Pass@1 (Avg)
</button>
</div>
</div>
<!-- Chart Container with Background Transition -->
<div id="chart-main-results" class="w-full h-[500px] chart-container bg-theme-blue rounded-2xl p-4"></div>
</div>
<!-- Smaller Charts -->
<div class="grid grid-cols-1 md:grid-cols-2 gap-8">
<div class="bg-white p-6 rounded-3xl shadow-sm border border-slate-100">
<h3 class="font-bold text-slate-700 mb-6 text-center">Off-Policy Training (Overall)</h3>
<div id="chart-off-policy" class="w-full h-72 chart-container bg-theme-blue rounded-2xl p-4"></div>
</div>
<div class="bg-white p-6 rounded-3xl shadow-sm border border-slate-100">
<h3 class="font-bold text-slate-700 mb-6 text-center">High Temperature (T=1.0)</h3>
<div id="chart-temp" class="w-full h-72 chart-container bg-theme-blue rounded-2xl p-4"></div>
</div>
</div>
</section>
<!-- Case Study -->
<section id="case-study" class="scroll-mt-24">
<div class="bg-slate-900 text-white rounded-3xl p-1 overflow-hidden shadow-2xl ring-1 ring-white/10">
<div class="p-10 text-center">
<h2 class="text-3xl font-bold mb-3">Case Study: Reasoning Dynamics</h2>
<p class="text-slate-400">Witness how high entropy enables <span class="text-green-400 font-semibold">Reflection</span> and <span class="text-purple-400 font-semibold">Self-Correction</span>.</p>
</div>
<div class="bg-white text-slate-800 rounded-2xl m-1 grid grid-cols-1 lg:grid-cols-12 overflow-hidden h-[800px]">
<div class="lg:col-span-4 bg-slate-50 border-r border-slate-200 p-6 flex flex-col gap-4 overflow-y-auto">
<div class="text-sm font-mono bg-white p-5 rounded-xl border border-slate-200 shadow-sm mb-4">
<strong class="text-slate-400 text-xs uppercase block mb-2 tracking-wider">Prompt</strong>
Let
$$f(x)=\frac{(x-18)(x-72)(x-98)(x-k)}{x}.$$
There exist exactly three positive real values of $k$ such that $f$ has a minimum at exactly two real values of $x$. Find the sum of these three values of $k$.
</div>
<div class="space-y-4">
<button onclick="setCase('entropic')" id="btn-entropic" class="group w-full p-5 rounded-xl border-2 text-left transition-all shadow-md bg-white border-tencent transform scale-[1.02]">
<div class="font-bold flex justify-between items-center text-lg">
<span>EntroPIC Response</span>
<i class="fas fa-check-circle text-green-500 text-xl"></i>
</div>
<div class="text-xs font-bold text-green-600 mt-1 mb-3 uppercase tracking-wide">Correct Answer (240)</div>
<div class="text-xs text-slate-500 flex flex-wrap gap-2">
<span class="px-2 py-1 bg-green-100 rounded text-green-700 font-medium">Reflection</span>
<span class="px-2 py-1 bg-purple-100 rounded text-purple-700 font-medium">Self-Correction</span>
</div>
</button>
<button onclick="setCase('grpo')" id="btn-grpo" class="group w-full p-5 rounded-xl border-2 text-left transition-all bg-white border-transparent hover:border-slate-200 opacity-60 hover:opacity-100">
<div class="font-bold flex justify-between items-center text-lg">
<span>GRPO Response</span>
<i class="fas fa-times-circle text-red-400 text-xl"></i>
</div>
<div class="text-xs font-bold text-red-500 mt-1 mb-3 uppercase tracking-wide">Incorrect Answer (188)</div>
<div class="text-xs text-slate-400">
Linear reasoning, missing edge cases.
</div>
</button>
</div>
</div>
<div class="lg:col-span-8 p-10 bg-white overflow-y-auto relative">
<div id="case-content" class="prose prose-slate prose-lg max-w-none pb-12">
<!-- JS Injected -->
</div>
<div class="absolute bottom-0 left-0 w-full h-24 bg-gradient-to-t from-white to-transparent pointer-events-none"></div>
</div>
</div>
</div>
</section>
<!-- Citation -->
<section class="max-w-4xl mx-auto">
<div class="bg-slate-100 text-slate-700 p-8 rounded-3xl border border-slate-200/60">
<div class="flex justify-between items-center mb-4">
<h2 class="text-xl font-bold text-slate-900">Citation</h2>
<button onclick="navigator.clipboard.writeText(document.querySelector('pre').innerText); alert('Copied!')" class="text-xs bg-white border border-slate-300 px-3 py-1 rounded hover:bg-slate-50 transition">
Copy BibTeX
</button>
</div>
<pre class="font-mono text-xs md:text-sm overflow-x-auto p-4 bg-white border border-slate-200 rounded-xl text-slate-600 shadow-sm selection:bg-blue-100">
@article{yang2025entropic,
title={EntroPIC: Towards Stable Long-Term Training of LLMs via Entropy Stabilization with Proportional-Integral Control},
author={Yang, Kai and Xu, Xin and Chen, Yangkun and Liu, Weijie and Lyu, Jiafei and Lin, Zichuan and Ye, Deheng and Yang, Saiyong},
journal={arXiv preprint arXiv:2511.15248},
year={2025}
}</pre>
</div>
</section>
</main>
<footer class="bg-white border-t border-slate-200 py-12 mt-12">
<div class="max-w-6xl mx-auto px-4 text-center text-slate-500 text-sm">
<p class="mb-2">Project page based on EntroPIC.</p>
<p>© 2025 Tencent Hunyuan. All rights reserved.</p>
</div>
</footer>
<script>
// --- Data Database ---
const datasets = ['Math', 'AMC', 'AIME24', 'AIME25', 'Olympic', 'Omni-math', 'Overall'];
const db = {
onPolicy: {
initial: {
pass: [97.0, 81.6, 60.0, 53.0, 68.7, 49.3, 68.3],
avg: [86.1, 58.4, 23.4, 23.0, 49.9, 32.0, 45.5]
},
grpo: {
pass: [97.4, 88.0, 70.0, 53.3, 72.7, 57.6, 73.2],
avg: [91.2, 75.1, 34.3, 31.0, 59.1, 40.7, 55.2]
},
nsr: {
pass: [96.4, 89.2, 63.3, 46.7, 71.3, 56.2, 70.5],
avg: [91.5, 74.1, 34.7, 30.0, 58.5, 39.7, 54.8]
},
aec: {
pass: [97.8, 89.2, 73.3, 60.0, 72.5, 58.5, 75.2],
avg: [92.5, 77.6, 37.1, 31.6, 60.9, 42.0, 56.9]
},
entropic: {
pass: [97.2, 91.6, 76.7, 66.7, 71.3, 58.4, 77.0],
avg: [92.4, 80.1, 42.3, 34.6, 60.0, 42.7, 58.7]
}
},
// UPDATED: Now includes correct 3 values for EntroPIC (PI)
offPolicy: {
models: ['GRPO', 'EntroPIC (P)', 'EntroPIC (PI)'],
// GRPO: 69.4, EntroPIC(P): 72.2, EntroPIC(PI): 73.2
pass: [69.4, 72.2, 73.2],
// GRPO: 49.3, EntroPIC(P): 52.2, EntroPIC(PI): 54.7
avg: [49.3, 52.2, 54.7]
},
temp: {
models: ['GRPO (T=1)', 'EntroPIC (T=1)'],
pass: [71.0, 74.7],
avg: [53.6, 57.8]
}
};
// Fixed Colors (Consistent Curve/Bar Colors)
const barColors = {
onPolicy: {
initial: '#cbd5e1', // Slate 300
grpo: '#94a3b8', // Slate 400
nsr: '#facc15', // Yellow
aec: '#4ade80', // Green
entropic:'#0052D9' // Tencent Blue
},
// Added 3rd color for EntroPIC (PI)
offPolicy: ['#cbd5e1', '#60a5fa', '#1e40af'],
temp: ['#f87171', '#10b981']
};
// Layout Common
const commonLayout = {
plot_bgcolor: 'rgba(0,0,0,0)', // Transparent to show CSS background
paper_bgcolor: 'rgba(0,0,0,0)',
font: { family: 'Inter, sans-serif' },
// NOTE: We don't set a global transition here to prevent the "bounce" artifact
// We will handle updates via Plotly.react
};
function getOnPolicyTraces(metric) {
return [
{ x: datasets, y: db.onPolicy.initial[metric], name: 'Initial', type: 'bar', marker: { color: barColors.onPolicy.initial } },
{ x: datasets, y: db.onPolicy.grpo[metric], name: 'GRPO', type: 'bar', marker: { color: barColors.onPolicy.grpo } },
{ x: datasets, y: db.onPolicy.nsr[metric], name: 'NSR', type: 'bar', marker: { color: barColors.onPolicy.nsr } },
{ x: datasets, y: db.onPolicy.aec[metric], name: 'AEC', type: 'bar', marker: { color: barColors.onPolicy.aec } },
{ x: datasets, y: db.onPolicy.entropic[metric], name: 'EntroPIC', type: 'bar', marker: { color: barColors.onPolicy.entropic } }
];
}
function updateCharts(metric) {
// 1. Update Buttons UI
const btnPass = document.getElementById('btn-metric-pass');
const btnAvg = document.getElementById('btn-metric-avg');
if(metric === 'pass') {
btnPass.className = "toggle-btn active px-5 py-2 rounded-lg flex items-center gap-2";
btnAvg.className = "toggle-btn inactive px-5 py-2 rounded-lg flex items-center gap-2";
} else {
btnPass.className = "toggle-btn inactive px-5 py-2 rounded-lg flex items-center gap-2";
btnAvg.className = "toggle-btn active px-5 py-2 rounded-lg flex items-center gap-2";
}
// 2. Update Background Colors (CSS Transition handles the smoothness)
const charts = document.querySelectorAll('.chart-container');
charts.forEach(chart => {
if (metric === 'pass') {
chart.classList.remove('bg-theme-orange');
chart.classList.add('bg-theme-blue');
} else {
chart.classList.remove('bg-theme-blue');
chart.classList.add('bg-theme-orange');
}
});
// 3. Calculate new Ranges
// Setting fixed ranges prevents the "bounce"/jumping effect during data updates
const yTitle = metric === 'pass' ? 'Pass@N (%)' : 'Pass@1 (Avg %)';
const yRangeMain = metric === 'pass' ? [40, 100] : [10, 95];
const yRangeSub = metric === 'pass' ? [40, 80] : [30, 60];
// 4. Update Charts using React (More stable than Animate for Axis changes)
// Main
const mainLayout = {
...commonLayout,
barmode: 'group',
bargap: 0.15,
bargroupgap: 0.05,
margin: { t: 30, b: 40, l: 50, r: 20 },
legend: { orientation: 'h', y: 1.1, x: 0.5, xanchor: 'center' },
yaxis: { title: yTitle, range: yRangeMain, gridcolor: '#e2e8f0' },
xaxis: { title: '', tickfont: { size: 13 } },
title: '' // Remove internal title to keep clean
};
Plotly.react('chart-main-results', getOnPolicyTraces(metric), mainLayout, {responsive: true, displayModeBar: false});
// Off-Policy
const offLayout = {
...commonLayout,
margin: { t: 10, b: 30, l: 40, r: 10 },
yaxis: { range: yRangeSub, title: yTitle, gridcolor: '#e2e8f0' }
};
Plotly.react('chart-off-policy', [{
x: db.offPolicy.models,
y: db.offPolicy[metric],
type: 'bar',
marker: { color: barColors.offPolicy },
text: db.offPolicy[metric].map(String),
textposition: 'auto'
}], offLayout, {responsive: true, displayModeBar: false});
// Temp
Plotly.react('chart-temp', [{
x: db.temp.models,
y: db.temp[metric],
type: 'bar',
marker: { color: barColors.temp },
text: db.temp[metric].map(String),
textposition: 'auto'
}], offLayout, {responsive: true, displayModeBar: false});
}
// --- Init ---
document.addEventListener("DOMContentLoaded", function() {
// Initial Draw uses the same update logic to ensure consistency
updateCharts('pass');
setCase('entropic');
});
// --- Case Study Logic (Preserved) ---
const caseData = {
grpo: {
btnId: 'btn-grpo',
content: `
<h3 class="font-bold text-2xl mb-6 text-slate-900 border-b pb-2">GRPO Response</h3>
<div class="space-y-8 text-slate-600">
<p>Alright, I have this problem:</p>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Problem Statement:</h4>
<p>Let
$$f(x) = \\frac{(x-18)(x-72)(x-98)(x-k)}{x}.$$
There exist exactly three positive real values of $k$ such that $f$ has a minimum at exactly two real values of $x$. Find the sum of these three values of $k$.</p>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 1: Critical Points</h4>
<p>Let $N(x) = (x-18)(x-72)(x-98)(x-k)$. Derivative:</p>
<div class="math-scroll">$$f'(x) = \\frac{N'(x) \\cdot x - N(x)}{x^2}$$</div>
<p>Set numerator to zero:</p>
<div class="math-scroll">$$N'(x) \\cdot x = N(x)$$</div>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 2: Roots of $N(x)$</h4>
<p>If $x$ is a root of $N(x)$ (i.e., $x \\in \\{18,72,98,k\\}$), then $N(x) = 0$.
Equation reduces to $N'(x) \\cdot x = 0 \\implies N'(x) = 0$.</p>
<ul class="list-disc list-inside mt-2">
<li>$x=18 \\implies k=18$</li>
<li>$x=72 \\implies k=72$</li>
<li>$x=98 \\implies k=98$</li>
</ul>
<p>These are candidates but may not satisfy exactly two minima.</p>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 3: Alternative Form</h4>
<p>Let $g(x) = (x-18)(x-72)(x-98)$:</p>
<div class="math-scroll">$$f(x) = g(x) \\left(1 - \\frac{k}{x}\\right)$$</div>
<p>Critical points satisfy:</p>
<div class="math-scroll">$$g'(x) \\left(1 - \\frac{k}{x}\\right) + g(x) \\frac{k}{x^2} = 0 \\quad \\Rightarrow \\quad g'(x) x (x - k) + g(x) k = 0$$</div>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 4: Behavior and Extrema</h4>
<p>The condition “exactly two minima” implies the derivative must have a double root. Observing the roots 18, 72, 98, the polynomial structure suggests the valid $k$ are related to these numbers.</p>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 5: Valid $k$ Values</h4>
<p>Through analysis:</p>
<div class="math-scroll">$$k_1 = 18, \\quad k_2 = 72, \\quad k_3 = 98$$</div>
<ul class="list-disc list-inside mt-2">
<li>$k = 18$ aligns with left root.</li>
<li>$k = 72$ aligns with middle root.</li>
<li>$k = 98$ aligns with right root.</li>
</ul>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Step 6: Sum of $k$</h4>
<div class="math-scroll">$$18 + 72 + 98 = 188$$</div>
</div>
<div class="p-6 bg-red-50 border border-red-200 rounded-xl text-center mt-4">
<span class="font-mono text-xl font-bold text-red-700">$$ \\boxed{188} $$</span>
</div>
</div>`
},
entropic: {
btnId: 'btn-entropic',
content: `
<h3 class="font-bold text-2xl mb-6 text-slate-900 border-b pb-2">EntroPIC Response</h3>
<div class="space-y-8 text-slate-600">
<p>Alright, let's tackle this problem step by step. We have:</p>
<div class="math-scroll">$$ f(x) = \\frac{(x - 18)(x - 72)(x - 98)(x - k)}{x} $$</div>
<p>The goal is to find three positive $k$ values such that $f$ has exactly two real minima.</p>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Finding Critical Points $f'(x)=0$</h4>
<p>Let $N(x) = (x - 18)(x - 72)(x - 98)(x - k)$. The critical points are where $f'(x) = 0$.
Using the quotient rule $f'(x) = \\frac{N'(x) \\cdot x - N(x)}{x^2}$, we set the numerator to zero:</p>
<div class="math-scroll">$$ N'(x) \\cdot x = N(x) $$</div>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Checking Roots of $N(x)$</h4>
<p>If $x$ is a root of $N(x)$ (i.e., $x \\in \\{18, 72, 98, k\\}$), then $N(x) = 0$.
The condition $N'(x) \\cdot x = N(x)$ becomes $N'(x) \\cdot x = 0$. Since $x \\neq 0$, we require $N'(x) = 0$.</p>
<div class="reflection-highlight shadow-sm">
<div class="flex items-center gap-2 mb-2">
<span class="w-2 h-2 bg-green-500 rounded-full"></span>
<p class="font-bold text-green-700 text-xs uppercase">Reflection</p>
</div>
<p class="text-slate-800 italic">Wait, so $x = 18$ is a critical point of $f$ if and only if $N'(18) = 0$.</p>
</div>
<p>Let's compute $N'(x)$ and check:</p>
<div class="math-scroll">$$ N'(x) = \\frac{d}{dx} \\left[ (x-18)(x-72)(x-98)(x-k) \\right] $$</div>
<p>Set $x = 18$:</p>
<div class="math-scroll">$$ N'(18) = (18-72)(18-98)(18-k) $$</div>
<p>$N'(18) = 0 \\implies k = 18$.
Similarly, $x=72$ is a critical point of $f$ if $k=72$, and $x=98$ if $k=98$.</p>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Alternative Approach: Rewriting $f(x)$</h4>
<p>Alternatively, let $g(x) = (x-18)(x-72)(x-98)$.</p>
<div class="math-scroll">$$ f(x) = g(x) \\left(1 - \\frac{k}{x}\\right) $$</div>
<p>The critical points satisfy $f'(x) = 0$:</p>
<div class="math-scroll">$$ g'(x) \\left(1 - \\frac{k}{x}\\right) + g(x) \\cdot \\frac{k}{x^2} = 0 $$</div>
<p>This rearranges to:</p>
<div class="math-scroll">$$ g'(x) x (x - k) + g(x) k = 0 $$</div>
<p>Finding explicit solutions is non-trivial, but perhaps we can look for symmetry.</p>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Exploring Specific Cases and Symmetry</h4>
<p>The condition “exactly two real values of $x$ where $f$ has a minimum” implies the quartic $N'(x) \\cdot x - N(x) = 0$ must have a specific root structure. This usually means that two critical points must coincide, leading to a double root in the derivative and simplifying the analysis.</p>
<p>Alternatively, observe that 18, 72, 98 have a specific structure. The three values of $k$ are known to be related to $18, 72, 98$ themselves or symmetric counterparts that force the necessary double critical point.</p>
<div class="correction-highlight shadow-sm mt-4">
<div class="flex items-center gap-2 mb-2">
<span class="w-2 h-2 bg-purple-500 rounded-full"></span>
<p class="font-bold text-purple-700 text-xs uppercase">Self-Correction / Deep Check</p>
</div>
<p class="text-slate-800">Testing $k = 72$: If $k = 72$, $N(x)$ has a repeated root. This is a potential candidate.</p>
<p class="text-slate-800 mt-2">The analysis shows that for exactly two minima, the critical point equation must possess a double root for certain $k$.</p>
</div>
</div>
<div>
<h4 class="font-bold text-slate-900 text-lg mb-2">Correct Insight: Valid $k$ Values</h4>
<p>Through systematic polynomial analysis (not shown here due to complexity), the three positive real values of $k$ that ensure exactly two distinct real minima are:</p>
<div class="math-scroll">$$ k_1 = 46, \\quad k_2 = 72, \\quad k_3 = 122 $$</div>
<p>These values are consistent with symmetry properties for this type of function:</p>
<ul class="list-disc list-inside mt-2">
<li>$k = 72$ is the middle root.</li>
<li>$k = 46$ and $k = 122$ are symmetric counterparts (e.g., related to the average/midpoint of 18 and 98, which is $\\frac{18+98}{2} = 58$, and $72 - (72-46)=46$, $72 + (122-72)=122$, but these are simplified relations).</li>
</ul>
</div>
<div>
<p>The sum of these three values of $k$ is:</p>
<div class="math-scroll">$$ 46 + 72 + 122 = 240 $$</div>
<div class="p-6 bg-green-50 border border-green-200 rounded-xl text-center mt-4">
<span class="font-mono text-xl font-bold text-green-700">$$ \\boxed{240} $$</span>
</div>
</div>
</div>`
}
};
function setCase(key) {
const data = caseData[key];
document.getElementById('btn-entropic').className = "w-full p-5 rounded-xl border-2 text-left transition-all shadow-sm bg-white border-transparent hover:border-slate-200 opacity-60";
document.getElementById('btn-grpo').className = "w-full p-5 rounded-xl border-2 text-left transition-all shadow-sm bg-white border-transparent hover:border-slate-200 opacity-60";
const activeBtn = document.getElementById(data.btnId);
activeBtn.className = "w-full p-5 rounded-xl border-2 text-left transition-all shadow-lg bg-white border-tencent transform scale-[1.02] opacity-100 ring-4 ring-blue-50";
document.getElementById('case-content').innerHTML = data.content;
if(window.MathJax) MathJax.typesetPromise([document.getElementById('case-content')]);
}
</script>
</body>
</html>