akhaliq's picture
akhaliq HF Staff
Upload 37 files
26e0cd3 verified
<!DOCTYPE html>
<html lang="en">
<meta charset="UTF-8" />
<title>VibeVoice-Realtime TTS Demo</title>
<style>
:root {
--bg: #f5f7fc;
--surface: #ffffff;
--accent: #5562ff;
--accent-strong: #3f4dff;
--text-primary: #1f2742;
--text-muted: #5d6789;
--border: rgba(85, 98, 255, 0.18);
--shadow: 0 18px 45px rgba(31, 39, 66, 0.08);
}
.helper-text {
font-size: 12px;
color: #8a93b5;
}
* {
box-sizing: border-box;
}
body {
margin: 0;
background: var(--bg);
font-family: 'Inter', 'Segoe UI', Roboto, Helvetica, sans-serif;
color: var(--text-primary);
display: flex;
justify-content: center;
padding: 48px 20px;
}
.app-shell {
width: min(960px, 100%);
background: var(--surface);
border-radius: 20px;
padding: 36px 40px 44px;
box-shadow: var(--shadow);
display: flex;
flex-direction: column;
gap: 28px;
}
h1 {
margin: 0;
text-align: center;
font-size: 30px;
font-weight: 700;
letter-spacing: 0.01em;
}
.panel {
display: flex;
flex-direction: column;
gap: 10px;
}
.field {
display: flex;
flex-direction: column;
gap: 8px;
}
.field-label {
font-weight: 600;
font-size: 15px;
color: var(--text-primary);
}
.text-input {
width: 100%;
min-height: 140px;
max-height: 240px;
border: 1px solid rgba(31, 39, 66, 0.14);
border-radius: 12px;
padding: 14px 16px;
font-size: 15px;
line-height: 1.6;
font-family: inherit;
background: #f9faff;
transition: border-color 0.2s, box-shadow 0.2s;
resize: vertical;
}
.text-input:focus {
outline: none;
border-color: var(--accent);
box-shadow: 0 0 0 3px rgba(85, 98, 255, 0.18);
background: #fff;
}
#streamingPreviewContainer {
border-radius: 14px;
border: 1px solid var(--border);
background: linear-gradient(135deg, #eef2ff 0%, #f7f9ff 100%);
padding: 18px 20px;
box-shadow: inset 0 1px 2px rgba(85, 98, 255, 0.12);
}
#streamingPreviewHeader {
font-weight: 600;
color: var(--text-primary);
display: flex;
align-items: center;
gap: 10px;
font-size: 14px;
margin-bottom: 8px;
}
#streamingPreviewNote {
font-weight: 400;
font-size: 12px;
color: var(--text-muted);
}
#streamingPreview {
min-height: 70px;
padding: 10px 12px;
border-radius: 10px;
background: rgba(255, 255, 255, 0.9);
border: 1px solid rgba(85, 98, 255, 0.25);
font-family: 'Courier New', Courier, monospace;
font-size: 14px;
line-height: 1.5;
color: var(--text-primary);
white-space: pre-wrap;
}
#streamingPreview.streaming-active::after {
content: "";
display: inline-block;
width: 2px;
height: 1.1em;
background: var(--accent);
margin-left: 2px;
animation: previewCaret 0.9s steps(1) infinite;
vertical-align: bottom;
}
@keyframes previewCaret {
0%, 50% {
opacity: 1;
}
51%, 100% {
opacity: 0;
}
}
.control-panel {
display: flex;
flex-direction: column;
gap: 18px;
}
.inline-field {
display: flex;
flex-direction: column;
gap: 6px;
}
.select-control {
width: 220px;
border: 1px solid rgba(31, 39, 66, 0.14);
border-radius: 10px;
padding: 8px 12px;
font-size: 14px;
font-family: inherit;
background: #fbfcff;
color: var(--text-primary);
transition: border-color 0.2s, box-shadow 0.2s;
}
.select-control:focus {
outline: none;
border-color: var(--accent);
box-shadow: 0 0 0 3px rgba(85, 98, 255, 0.18);
background: #fff;
}
.control-row {
display: flex;
align-items: center;
flex-wrap: wrap;
gap: 20px 28px;
}
.range-control {
display: flex;
align-items: center;
gap: 12px;
font-size: 14px;
color: var(--text-primary);
}
.range-control input[type="range"] {
width: 200px;
accent-color: var(--accent);
}
.range-value {
font-weight: 600;
color: var(--text-primary);
min-width: 42px;
text-align: right;
}
#playback {
background: var(--accent);
color: #fff;
border: none;
padding: 10px 24px;
border-radius: 999px;
cursor: pointer;
font-weight: 600;
font-size: 14px;
box-shadow: 0 8px 16px rgba(85, 98, 255, 0.25);
transition: transform 0.15s, box-shadow 0.15s, background 0.15s;
}
#playback:hover {
transform: translateY(-1px);
box-shadow: 0 10px 20px rgba(85, 98, 255, 0.28);
}
#playback:active {
transform: translateY(0);
}
#playback.playing {
background: var(--accent-strong);
}
.secondary-btn {
border: 1px solid rgba(31, 39, 66, 0.18);
background: #f1f3ff;
color: var(--text-primary);
padding: 8px 18px;
border-radius: 999px;
cursor: pointer;
font-size: 13px;
font-weight: 500;
transition: background 0.15s, border-color 0.15s;
}
.secondary-btn:hover {
background: #e6e9ff;
border-color: rgba(31, 39, 66, 0.26);
}
.secondary-btn:disabled {
opacity: 0.55;
cursor: not-allowed;
}
.metrics {
display: flex;
flex-wrap: wrap;
gap: 16px 32px;
font-size: 14px;
color: var(--text-muted);
}
.metrics span {
display: flex;
align-items: baseline;
gap: 6px;
}
.metrics span strong {
color: var(--text-primary);
font-weight: 600;
}
.metric-unit {
color: var(--text-muted);
font-size: 13px;
}
#logOutput {
max-height: 260px;
overflow-y: auto;
background: #f7f9ff;
color: var(--text-primary);
padding: 16px 18px;
border: 1px solid rgba(31, 39, 66, 0.12);
border-radius: 12px;
font-size: 13px;
line-height: 1.6;
box-shadow: inset 0 1px 2px rgba(15, 23, 42, 0.06);
font-family: 'Fira Code', 'Courier New', Courier, monospace;
margin-top: 0px;
}
@media (max-width: 720px) {
.app-shell {
padding: 28px 20px 36px;
gap: 24px;
}
.select-control {
width: 100%;
}
.control-row {
flex-direction: column;
align-items: flex-start;
gap: 16px;
}
#playback {
width: 100%;
text-align: center;
}
}
</style>
<body>
<div class="app-shell">
<h1>VibeVoice-Realtime TTS Demo</h1>
<section class="panel">
<label class="field">
<span class="field-label">Text</span>
<textarea
id="prompt"
class="text-input"
rows="4"
>Enter your text here and click "Start" to instantly hear the VibeVoice-Realtime TTS output audio.</textarea>
</label>
<div id="streamingPreviewContainer">
<div id="streamingPreviewHeader">
<span>Streaming Input Text</span>
</div>
<div id="streamingPreview" aria-live="polite">This area will display the streaming input text in real time.</div>
</div>
</section>
<span class="helper-text">This demo requires the full text to be provided upfront. The model then receives the text via streaming input during synthesis.<br>
For non-punctuation special characters, applying text normalization before processing often yields better results.</span>
<section class="panel control-panel">
<div class="inline-field">
<span class="field-label">Speaker</span>
<select id="voiceSelect" class="select-control">
<option value="">Loading...</option>
</select>
</div>
<div class="control-row">
<label class="range-control">
<span>CFG</span>
<input id="cfgScale" type="range" min="1" max="3" step="0.05" value="1.5" />
<span class="range-value" id="cfgValue">1.5</span>
</label>
<label class="range-control">
<span>Inference Steps</span>
<input id="inferenceSteps" type="range" min="1" max="20" step="1" value="5" />
<span class="range-value" id="stepsValue">5</span>
</label>
<button id="resetControls" type="button" class="secondary-btn">Reset Controls</button>
</div>
<div class="control-row">
<button id="playback">Start</button>
<button id="saveAudio" type="button" class="secondary-btn" disabled>Save</button>
</div>
</section>
<section class="panel">
<div class="metrics">
<span>Model Generated Audio<strong id="modelGenerated">0.00</strong><span class="metric-unit">s</span></span>
<span>Audio Played<strong id="playbackElapsed">0.00</strong><span class="metric-unit">s</span></span>
</div>
</section>
<section class="panel">
<span class="field-label">Runtime Logs</span>
<pre id="logOutput"></pre>
</section>
</div>
<script>
(() => {
const SAMPLE_RATE = 24_000;
const BUFFER_SIZE = 2048;
const PREBUFFER_SEC = 0.1;
let audioCtx = null;
let scriptNode = null;
let socket = null;
let buffer = new Float32Array(0);
let isPlaying = false;
let hasStartedPlayback = false;
let silentFrameCount = 0;
const promptInput = document.getElementById('prompt');
const streamingPreview = document.getElementById('streamingPreview');
const controlBtn = document.getElementById('playback');
const cfgSelect = document.getElementById('cfgScale');
const stepsSelect = document.getElementById('inferenceSteps');
const voiceSelect = document.getElementById('voiceSelect');
const cfgValueLabel = document.getElementById('cfgValue');
const stepsValueLabel = document.getElementById('stepsValue');
const modelGeneratedLabel = document.getElementById('modelGenerated');
const playbackElapsedLabel = document.getElementById('playbackElapsed');
const logOutput = document.getElementById('logOutput');
const resetBtn = document.getElementById('resetControls');
const saveBtn = document.getElementById('saveAudio');
let playbackTimer = null;
let lastPlaybackElapsed = 0;
let playbackSamples = 0;
let modelGeneratedTotal = 0;
let firstBrowserChunkLogged = false;
let playbackStartedLogged = false;
const logEntries = [];
let logSequence = 0;
let recordedChunks = [];
let recordedSamples = 0;
let recordingComplete = false;
let downloadUrl = null;
const revokeDownloadUrl = () => {
if (downloadUrl) {
URL.revokeObjectURL(downloadUrl);
downloadUrl = null;
}
};
const updateSaveButtonState = () => {
if (!saveBtn) {
return;
}
saveBtn.disabled = recordedSamples === 0 || !recordingComplete;
};
const clearRecordedChunks = () => {
recordedChunks = [];
recordedSamples = 0;
recordingComplete = false;
revokeDownloadUrl();
updateSaveButtonState();
};
const createWavBlob = () => {
if (!recordedSamples) {
return null;
}
const wavBuffer = new ArrayBuffer(44 + recordedSamples * 2);
const view = new DataView(wavBuffer);
const writeString = (offset, str) => {
for (let i = 0; i < str.length; i += 1) {
view.setUint8(offset + i, str.charCodeAt(i));
}
};
writeString(0, 'RIFF');
view.setUint32(4, 36 + recordedSamples * 2, true);
writeString(8, 'WAVE');
writeString(12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
view.setUint16(22, 1, true);
view.setUint32(24, SAMPLE_RATE, true);
view.setUint32(28, SAMPLE_RATE * 2, true);
view.setUint16(32, 2, true);
view.setUint16(34, 16, true);
writeString(36, 'data');
view.setUint32(40, recordedSamples * 2, true);
const pcmData = new Int16Array(wavBuffer, 44, recordedSamples);
let offset = 0;
recordedChunks.forEach(chunk => {
const chunkData = new Int16Array(chunk);
pcmData.set(chunkData, offset);
offset += chunkData.length;
});
return new Blob([wavBuffer], { type: 'audio/wav' });
};
const updateCfgDisplay = () => {
cfgValueLabel.textContent = Number(cfgSelect.value).toFixed(3);
};
const updateStepsDisplay = () => {
stepsValueLabel.textContent = Number(stepsSelect.value).toString();
};
cfgSelect.addEventListener('input', updateCfgDisplay);
stepsSelect.addEventListener('input', updateStepsDisplay);
updateCfgDisplay();
updateStepsDisplay();
const pad2 = value => value.toString().padStart(2, '0');
const pad3 = value => value.toString().padStart(3, '0');
const formatLocalTimestamp = () => {
const d = new Date();
const year = d.getFullYear();
const month = pad2(d.getMonth() + 1);
const day = pad2(d.getDate());
const hours = pad2(d.getHours());
const minutes = pad2(d.getMinutes());
const seconds = pad2(d.getSeconds());
const millis = pad3(d.getMilliseconds());
return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}.${millis}`;
};
const formatSeconds = raw => {
const value = Number(raw);
return Number.isFinite(value) ? value.toFixed(2) : '0.00';
};
const parseTimestamp = value => {
if (!value) {
return new Date();
}
if (/\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}/.test(value)) {
return new Date(value.replace(' ', 'T'));
}
return new Date(value);
};
const setModelGenerated = value => {
const numeric = Number(value);
if (!Number.isFinite(numeric)) {
return;
}
modelGeneratedTotal = Math.max(0, numeric);
modelGeneratedLabel.textContent = formatSeconds(modelGeneratedTotal);
};
const setPlaybackElapsed = value => {
const capped = Math.min(modelGeneratedTotal, Math.max(0, value));
lastPlaybackElapsed = capped;
playbackElapsedLabel.textContent = formatSeconds(lastPlaybackElapsed);
};
const STREAMING_WPM = 180;
const STREAMING_INTERVAL_MS = 60000 / STREAMING_WPM;
let previewTimeoutId = null;
let previewTokens = [];
let previewIndex = 0;
let previewActive = false;
const clearPreviewTimer = () => {
if (previewTimeoutId) {
clearTimeout(previewTimeoutId);
previewTimeoutId = null;
}
};
const setPreviewIdle = message => {
if (!streamingPreview) {
return;
}
streamingPreview.classList.remove('streaming-active');
streamingPreview.textContent = message;
};
const schedulePreviewTick = () => {
if (!streamingPreview) {
return;
}
if (previewIndex >= previewTokens.length) {
streamingPreview.classList.remove('streaming-active');
return;
}
streamingPreview.classList.add('streaming-active');
streamingPreview.textContent += previewTokens[previewIndex];
previewIndex += 1;
previewTimeoutId = setTimeout(schedulePreviewTick, STREAMING_INTERVAL_MS);
};
const updateStreamingPreview = () => {
if (!streamingPreview) {
return;
}
clearPreviewTimer();
previewIndex = 0;
const source = (promptInput?.value || '').trimEnd();
streamingPreview.textContent = '';
previewTokens = source.match(/\S+\s*/g) || [];
schedulePreviewTick();
};
const clearLogs = () => {
if (logOutput) {
logOutput.textContent = '';
}
logEntries.length = 0;
modelGeneratedTotal = 0;
setModelGenerated(0);
};
const appendLog = (message, timestamp) => {
if (!logOutput) {
return;
}
const finalTimestamp = timestamp || formatLocalTimestamp();
const entry = {
timestamp: finalTimestamp,
date: parseTimestamp(finalTimestamp),
message,
seq: logSequence += 1,
};
logEntries.push(entry);
logEntries.sort((a, b) => {
const diff = a.date.getTime() - b.date.getTime();
return diff !== 0 ? diff : a.seq - b.seq;
});
if (logEntries.length > 400) {
logEntries.splice(0, logEntries.length - 400);
}
logOutput.textContent = logEntries
.map(item => `[${item.timestamp}] ${item.message}`)
.join('\n');
logOutput.scrollTop = logOutput.scrollHeight;
};
const handleSaveClick = () => {
if (!recordedSamples) {
appendLog('[Frontend] Save requested but no audio received yet');
return;
}
const wavBlob = createWavBlob();
if (!wavBlob) {
appendLog('[Error] Failed to assemble WAV data for download');
return;
}
revokeDownloadUrl();
downloadUrl = URL.createObjectURL(wavBlob);
const link = document.createElement('a');
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
link.href = downloadUrl;
link.download = `vibevoice_realtime_audio_${timestamp}.wav`;
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
appendLog('[Frontend] Audio download triggered');
};
const stopPlaybackTimer = () => {
if (playbackTimer) {
clearInterval(playbackTimer);
playbackTimer = null;
}
};
const startPlaybackTimer = () => {
stopPlaybackTimer();
playbackTimer = setInterval(() => {
setPlaybackElapsed(playbackSamples / SAMPLE_RATE);
}, 250);
};
const loadVoices = async () => {
try {
voiceSelect.disabled = true;
const response = await fetch('/config');
if (!response.ok) {
throw new Error(`Failed to fetch config: ${response.status}`);
}
const data = await response.json();
const voices = Array.isArray(data.voices) ? data.voices : [];
voiceSelect.innerHTML = '';
if (voices.length === 0) {
const option = document.createElement('option');
option.value = '';
option.textContent = 'No voices available';
voiceSelect.appendChild(option);
voiceSelect.disabled = true;
appendLog('[Error] No voice presets available');
return;
}
voices.forEach(voice => {
const option = document.createElement('option');
option.value = voice;
option.textContent = voice;
voiceSelect.appendChild(option);
});
if (data.default_voice && voices.includes(data.default_voice)) {
voiceSelect.value = data.default_voice;
}
voiceSelect.disabled = false;
appendLog(`[Frontend] Loaded ${voices.length} voice presets`);
} catch (err) {
console.error('Failed to load voices', err);
voiceSelect.innerHTML = '';
const option = document.createElement('option');
option.value = '';
option.textContent = 'Load failed';
voiceSelect.appendChild(option);
voiceSelect.disabled = true;
appendLog('[Error] Failed to load voice presets');
}
};
loadVoices();
resetBtn.addEventListener('click', () => {
cfgSelect.value = '1.5';
stepsSelect.value = '5';
updateCfgDisplay();
updateStepsDisplay();
appendLog('[Frontend] Controls reset to defaults (CFG=1.5, Steps=5)');
});
if (promptInput) {
promptInput.addEventListener('input', () => {
if (previewActive) {
updateStreamingPreview();
}
});
}
const handleLogMessage = raw => {
let payload;
try {
payload = JSON.parse(raw);
} catch (err) {
appendLog(`[Error] Failed to parse log message: ${raw}`);
return;
}
if (!payload || payload.type !== 'log') {
appendLog(`[Log] ${raw}`);
return;
}
const { event, data = {}, timestamp } = payload;
switch (event) {
case 'backend_request_received': {
const cfg = typeof data.cfg_scale === 'number' ? data.cfg_scale.toFixed(3) : data.cfg_scale;
const steps = data.inference_steps ?? 'default';
const voice = data.voice || 'default';
const textLength = data.text_length ?? 0;
appendLog(`[Backend] Received request`, timestamp);
break;
}
case 'backend_first_chunk_sent':
appendLog('[Backend] Sent first audio chunk', timestamp);
break;
case 'model_progress':
if (typeof data.generated_sec !== 'undefined') {
const generated = Number(data.generated_sec);
if (Number.isFinite(generated)) {
setModelGenerated(generated);
}
}
return;
case 'generation_error':
appendLog(`[Error] Generation error: ${data.message || 'Unknown error'}`, timestamp);
break;
case 'backend_error':
appendLog(`[Error] Backend error: ${data.message || 'Unknown error'}`, timestamp);
break;
case 'client_disconnected':
appendLog('[Frontend] Client disconnected', timestamp);
break;
case 'backend_stream_complete':
appendLog('[Backend] Backend finished', timestamp);
recordingComplete = true;
updateSaveButtonState();
break;
default:
appendLog(`[Log] Event ${event}`, timestamp);
break;
}
};
const updateButtonLabel = () => {
controlBtn.textContent = isPlaying ? 'Stop' : 'Start';
controlBtn.classList.toggle('playing', isPlaying);
};
const appendAudio = chunk => {
const merged = new Float32Array(buffer.length + chunk.length);
merged.set(buffer, 0);
merged.set(chunk, buffer.length);
buffer = merged;
};
const pullAudio = frameCount => {
const available = buffer.length;
if (available === 0) {
return new Float32Array(frameCount);
}
if (available <= frameCount) {
const chunk = buffer;
buffer = new Float32Array(0);
if (chunk.length < frameCount) {
const padded = new Float32Array(frameCount);
padded.set(chunk, 0);
return padded;
}
return chunk;
}
const chunk = buffer.subarray(0, frameCount);
buffer = buffer.subarray(frameCount);
return chunk;
};
const closeSocket = () => {
if (socket && (socket.readyState === WebSocket.OPEN || socket.readyState === WebSocket.CONNECTING)) {
socket.close();
}
socket = null;
};
const resetPlaybackFlags = (resetSamples = true) => {
buffer = new Float32Array(0);
if (resetSamples) {
playbackSamples = 0;
setPlaybackElapsed(0);
}
hasStartedPlayback = false;
silentFrameCount = 0;
firstBrowserChunkLogged = false;
playbackStartedLogged = false;
};
const teardownAudio = () => {
if (scriptNode) {
try { scriptNode.disconnect(); } catch (err) { console.warn('disconnect error', err); }
scriptNode.onaudioprocess = null;
}
if (audioCtx) {
try { audioCtx.close(); } catch (err) { console.warn('audioCtx.close error', err); }
}
audioCtx = null;
scriptNode = null;
};
const resetState = (resetSamples = true) => {
closeSocket();
teardownAudio();
resetPlaybackFlags(resetSamples);
isPlaying = false;
stopPlaybackTimer();
};
const createAudioChain = () => {
teardownAudio();
resetPlaybackFlags();
audioCtx = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: SAMPLE_RATE });
scriptNode = audioCtx.createScriptProcessor(BUFFER_SIZE, 0, 1);
const minBufferSamples = Math.floor(audioCtx.sampleRate * PREBUFFER_SEC);
scriptNode.onaudioprocess = event => {
const output = event.outputBuffer.getChannelData(0);
const needPrebuffer = !hasStartedPlayback;
const socketClosed = !socket || socket.readyState === WebSocket.CLOSED || socket.readyState === WebSocket.CLOSING;
if (needPrebuffer) {
if (buffer.length >= minBufferSamples || socketClosed) {
hasStartedPlayback = true;
if (!playbackStartedLogged) {
playbackStartedLogged = true;
appendLog('[Frontend] Browser started to play audio');
startPlaybackTimer();
}
} else {
output.fill(0);
return;
}
}
const chunk = pullAudio(output.length);
output.set(chunk);
if (hasStartedPlayback) {
playbackSamples += output.length;
}
if (socketClosed && buffer.length === 0 && chunk.every(sample => sample === 0)) {
silentFrameCount += 1;
if (silentFrameCount >= 4) {
stop();
}
} else {
silentFrameCount = 0;
}
};
scriptNode.connect(audioCtx.destination);
};
const start = () => {
if (isPlaying) {
return;
}
const textValue = promptInput?.value || '';
const cfgValue = Number(cfgSelect.value);
const stepsValue = Number(stepsSelect.value);
const voiceValue = voiceSelect.value || '';
clearLogs();
const cfgDisplay = Number.isFinite(cfgValue) ? cfgValue.toFixed(3) : 'default';
const stepsDisplay = Number.isFinite(stepsValue) ? stepsValue : 'default';
appendLog(`[Frontend] Start button clicked, CFG=${cfgDisplay}, Steps=${stepsDisplay}, Speaker=${voiceValue || 'default'}`);
setModelGenerated(0);
setPlaybackElapsed(0);
resetState(true);
clearRecordedChunks();
isPlaying = true;
previewActive = true;
updateStreamingPreview();
updateButtonLabel();
createAudioChain();
const params = new URLSearchParams();
params.set('text', textValue);
if (!Number.isNaN(cfgValue)) {
params.set('cfg', cfgValue.toFixed(3));
}
if (!Number.isNaN(stepsValue)) {
params.set('steps', stepsValue.toString());
}
if (voiceValue) {
params.set('voice', voiceValue);
}
const wsUrl = `${location.origin.replace(/^http/, 'ws')}/stream?${params.toString()}`;
socket = new WebSocket(wsUrl);
socket.binaryType = 'arraybuffer';
socket.onmessage = event => {
if (typeof event.data === 'string') {
handleLogMessage(event.data);
return;
}
if (!(event.data instanceof ArrayBuffer)) {
return;
}
const rawBuffer = event.data.slice(0);
const view = new DataView(rawBuffer);
const floatChunk = new Float32Array(view.byteLength / 2);
for (let i = 0; i < floatChunk.length; i += 1) {
floatChunk[i] = view.getInt16(i * 2, true) / 32768;
}
appendAudio(floatChunk);
recordedChunks.push(rawBuffer);
recordedSamples += floatChunk.length;
updateSaveButtonState();
if (!firstBrowserChunkLogged) {
firstBrowserChunkLogged = true;
appendLog('[Frontend] Received first audio chunk');
}
};
socket.onerror = err => {
console.error('WebSocket error', err);
appendLog(`[Error] WebSocket error: ${err?.message || err}`);
stop();
};
socket.onclose = () => {
socket = null;
if (recordedSamples > 0) {
recordingComplete = true;
updateSaveButtonState();
}
};
};
const stop = () => {
if (!isPlaying) {
resetState(false);
updateButtonLabel();
return;
}
resetState(false);
setPlaybackElapsed(Math.min(lastPlaybackElapsed, modelGeneratedTotal));
appendLog('[Frontend] Playback stopped');
if (recordedSamples > 0) {
recordingComplete = true;
updateSaveButtonState();
}
previewActive = false;
clearPreviewTimer();
streamingPreview?.classList.remove('streaming-active');
updateButtonLabel();
};
controlBtn.addEventListener('click', () => {
if (isPlaying) {
stop();
} else {
start();
}
});
if (saveBtn) {
saveBtn.addEventListener('click', handleSaveClick);
}
updateButtonLabel();
updateSaveButtonState();
window.addEventListener('beforeunload', () => {
resetState();
clearPreviewTimer();
revokeDownloadUrl();
});
})();
</script>
</body>
</html>