| import {serve} from "https://deno.land/std/http/server.ts"; |
| import {EdgeSpeechTTS} from "https://esm.sh/@lobehub/tts@1"; |
|
|
| async function synthesizeSpeech(model: string, voice: string, text: string) { |
| let voiceName; |
| let rate = 0; |
| let pitch = 0; |
|
|
| if (!model.includes("Neural")) { |
| switch (model) { |
| case "ava": |
| voiceName = "en-US-AvaMultilingualNeural"; |
| break; |
| case "andrew": |
| voiceName = "en-US-AndrewMultilingualNeural"; |
| break; |
| case "emma": |
| voiceName = "en-US-EmmaMultilingualNeural"; |
| break; |
| case "brian": |
| voiceName = "en-US-BrianMultilingualNeural"; |
| break; |
| case "vivienne": |
| voiceName = "fr-FR-VivienneMultilingualNeural"; |
| break; |
| case "remy": |
| voiceName = "fr-FR-RemyMultilingualNeural"; |
| break; |
| case "seraphina": |
| voiceName = "de-DE-SeraphinaMultilingualNeural"; |
| break; |
| case "florian": |
| voiceName = "de-DE-FlorianMultilingualNeural"; |
| break; |
| case "dmitry": |
| voiceName = "ru-RU-DmitryNeural"; |
| break; |
| case "svetlana": |
| voiceName = "ru-RU-SvetlanaNeural"; |
| break; |
| default: |
| voiceName = "en-US-BrianMultilingualNeural"; |
| break; |
| } |
| } else { |
| voiceName = model; |
| const params = Object.fromEntries(voice.split("|").map((p) => p.split(":") as [string, string])); |
| rate = Number(params["rate"] || 0); |
| pitch = Number(params["pitch"] || 0); |
| } |
|
|
| const tts = new EdgeSpeechTTS(); |
|
|
| const payload = { |
| input: text, options: { |
| rate: rate, pitch: pitch, voice: voiceName |
| }, |
| }; |
| const response = await tts.create(payload); |
| const mp3Buffer = new Uint8Array(await response.arrayBuffer()); |
| return new Response(mp3Buffer, { |
| headers: {"Content-Type": "audio/mpeg"}, |
| }); |
| } |
|
|
| function validateContentType(req: Request, expected: string) { |
| const contentType = req.headers.get("Content-Type"); |
| if (contentType !== expected) { |
| console.log(`Invalid Content-Type ${contentType}, expected ${expected}`); |
| return new Response("Bad Request", {status: 400}); |
| } |
| } |
|
|
| async function handleDebugRequest() { |
| const voice = "rate:0.0|pitch:0.0"; |
| const model = "en-US-BrianMultilingualNeural"; |
| const text = "Приветик! Надеюсь ты меня хорошо слышишь? Алё?!"; |
| console.log(`model=${model}, voice=${voice}, text=${text}`); |
| return synthesizeSpeech(model, voice, text); |
| } |
|
|
| async function handleSynthesisRequest(req: Request) { |
| if (req.method !== "POST") { |
| return new Response("Method Not Allowed", {status: 405}); |
| } |
| const invalidContentType = validateContentType(req, "application/json"); |
| if (invalidContentType) return invalidContentType; |
| const {model, input, voice} = await req.json(); |
| return synthesizeSpeech(model, voice, input); |
| } |
|
|
|
|
| async function handleDemoRequest(req: Request) { |
| const html = `<!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8" /> |
| <meta content="width=device-width, initial-scale=1.0" name="viewport" /> |
| <title>tts</title> |
| <style> |
| body { |
| background-color: #121212; |
| color: #e0e0e0; |
| font-family: Arial, sans-serif; |
| margin: 0; |
| padding: 20px; |
| } |
| |
| .container { |
| max-width: 800px; |
| margin: 0 auto; |
| padding: 20px; |
| background-color: #1e1e1e; |
| border-radius: 8px; |
| box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2); |
| } |
| |
| .input-area, .output-area { |
| margin-bottom: 20px; |
| } |
| |
| .slider-container, .textarea-container, .dropdown-container { |
| margin-bottom: 20px; |
| } |
| |
| label { |
| display: block; |
| margin-bottom: 8px; |
| font-weight: bold; |
| } |
| |
| input[type="range"] { |
| width: 100%; |
| } |
| |
| .slider-value { |
| text-align: center; |
| margin-top: 8px; |
| } |
| |
| textarea { |
| max-width: 780px; |
| width: calc(100% - 20px); |
| height: 100px; |
| padding: 10px; |
| border: 1px solid #333; |
| border-radius: 4px; |
| background-color: #2e2e2e; |
| color: #e0e0e0; |
| resize: none; |
| } |
| |
| select { |
| width: 100%; |
| padding: 10px; |
| border: 1px solid #333; |
| border-radius: 4px; |
| background-color: #2e2e2e; |
| color: #e0e0e0; |
| } |
| |
| button { |
| width: 100%; |
| padding: 10px; |
| border: none; |
| border-radius: 4px; |
| background-color: #6200ea; |
| color: #fff; |
| font-size: 16px; |
| cursor: pointer; |
| transition: background-color 0.3s; |
| } |
| |
| button:hover { |
| background-color: #3700b3; |
| } |
| |
| h1 { |
| font-size: 24px; |
| margin-bottom: 20px; |
| } |
| |
| a { |
| color: #bb86fc; |
| text-decoration: none; |
| } |
| |
| a:hover { |
| text-decoration: underline; |
| } |
| |
| #audioPlayerContainer { |
| text-align: center; |
| } |
| |
| audio { |
| width: 100%; |
| max-width: 600px; |
| margin: 10px 0; |
| } |
| |
| a { |
| display: block; |
| margin: 10px 0; |
| } |
| pre { |
| color: #94c890; |
| background: #000000; |
| padding: 5px 10px; |
| margin: 0; |
| font-size: 1.12em; |
| } |
| </style> |
| |
| </head> |
| <body> |
| <div class="container"> |
| <div class="input-area"> |
| <div class="textarea-container"> |
| <label for="inputText">текст:</label |
| ><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea> |
| </div> |
| <div class="dropdown-container"> |
| <label for="voiceSelect">голос:</label> |
| <select id="voiceSelect"></select> |
| </div> |
| <button id="synthesizeButton">синтезировать</button> |
| </div> |
| <div class="output-area"> |
| <div id="audioPlayerContainer"></div> |
| </div> |
| <details> |
| <summary>api</summary> |
| <p>получить список голосов:</p> |
| <pre id="apiVoices"></pre> |
| <p>post-запрос для синтеза голоса из текста:</p> |
| <pre id="apiExamples"></pre> |
| </details> |
| </div> |
| <script> |
| let audio = null; |
| |
| document.getElementById('synthesizeButton').addEventListener('click', () => { |
| const text = document.getElementById('inputText').value || 'приветик! давай поболтаем немного?'; |
| const rate = '0.0'; |
| const pitch = '0.0'; |
| const voice = \`rate:\${rate}|pitch:\${pitch}\`; |
| const model = document.getElementById('voiceSelect').value; |
| |
| if (audio) { |
| audio.pause(); |
| audio.currentTime = 0; |
| } |
| |
| fetch('/v1/audio/speech', { |
| method: 'POST', |
| headers: { 'Content-Type': 'application/json' }, |
| body: JSON.stringify({ model, input: text, voice }) |
| }) |
| .then(response => response.blob()) |
| .then(blob => { |
| const audioUrl = URL.createObjectURL(blob); |
| const audioPlayerContainer = document.getElementById('audioPlayerContainer'); |
| |
| if (audio) { |
| audio.pause(); |
| audioPlayerContainer.innerHTML = ''; |
| } |
| |
| audio = new Audio(audioUrl); |
| audio.controls = true; |
| audioPlayerContainer.appendChild(audio); |
| |
| const downloadLink = document.createElement('a'); |
| downloadLink.href = audioUrl; |
| downloadLink.download = 'synthesized_voice.mp3'; |
| downloadLink.textContent = 'скачать аудио'; |
| downloadLink.style.display = 'block'; |
| downloadLink.style.marginTop = '10px'; |
| |
| audioPlayerContainer.appendChild(downloadLink); |
| audio.play(); |
| }); |
| |
| }); |
| |
| async function fetchModels() { |
| try { |
| const response = await fetch('/v1/audio/models'); |
| const models = await response.json(); |
| const voiceSelect = document.getElementById('voiceSelect'); |
| |
| models.forEach((model, index) => { |
| const option = document.createElement('option'); |
| option.value = model.model; |
| option.textContent = model.model; |
| if (index === 1) {option.selected = true;} |
| voiceSelect.appendChild(option); |
| }); |
| } catch (error) { |
| console.error('ошибка при получении списка моделей:', error); |
| } |
| } |
| fetchModels(); |
| |
| function createApiExamples() { |
| const apiExamples = document.getElementById('apiExamples'); |
| const apiVoices = document.getElementById('apiVoices'); |
| const currentUrl = window.location.origin; |
| const voices_pre = \`curl \${currentUrl}/v1/audio/models\`; |
| const examples_pre = \`curl \${currentUrl}/v1/audio/speech \\\\\\\\ |
| -H 'content-type: application/json' \\\\\\\\ |
| --data-raw '{"model":"brian","input":"привет! хрю-хрю!","voice":"rate:0|pitch:0"}' \\\\\\\\ |
| -o tts_voice.mp3 |
| \`; |
| apiVoices.textContent = voices_pre.replace(/\\\\\\\\/g, '\\\\'); |
| apiExamples.textContent = examples_pre.replace(/\\\\\\\\/g, '\\\\'); |
| |
| } |
| createApiExamples(); |
| </script> |
| </body></html>`; |
|
|
| return new Response(html, { |
| headers: {"Content-Type": "text/html"}, |
| }); |
| } |
|
|
| async function handleVoiceList() { |
| let voices = [{model: 'ava', gender: 'female'}, {model: 'andrew', gender: 'male'}, {model: 'emma', gender: 'female'}, {model: 'brian', gender: 'male'}, {model: 'vivienne', gender: 'female'}, {model: 'remy', gender: 'male'}, { |
| model: 'seraphina', gender: 'female' |
| }, {model: 'florian', gender: 'male'}, {model: 'dmitry', gender: 'male'}, {model: 'svetlana', gender: 'female'}]; |
|
|
| const sortedVoiceList = voices.sort((a, b) => { |
| if (a.gender === 'male' && b.gender === 'female') return -1; |
| if (a.gender === 'female' && b.gender === 'male') return 1; |
| return 0; |
| }); |
|
|
| return new Response(JSON.stringify(sortedVoiceList), { |
| headers: {"Content-Type": "application/json"}, |
| }); |
|
|
| } |
|
|
|
|
| serve(async (req) => { |
| try { |
| const url = new URL(req.url); |
|
|
| if (url.pathname === "/") { |
| return handleDemoRequest(req); |
| } |
| if (url.pathname === "/v1/audio/models") { |
| return handleVoiceList(); |
| } |
| if (url.pathname === "/tts") { |
| return handleDebugRequest(); |
| } |
|
|
| if (url.pathname !== "/v1/audio/speech") { |
| console.log(`Unhandled path ${url.pathname}`); |
| return new Response("Not Found", {status: 404}); |
| } |
|
|
| return handleSynthesisRequest(req); |
| } catch (err) { |
| console.error(`Error processing request: ${err.message}`); |
| return new Response(`Internal Server Error\n${err.message}`, { |
| status: 500, |
| }); |
| } |
| }, { port: 7860 }); |