Spaces:

getapi
/

mstts

Paused

App Files Files Community

mstts / app.ts

getapi

Update app.ts

77b7e19 verified over 1 year ago

raw

history blame contribute delete

10.9 kB

	import {serve} from "https://deno.land/std/http/server.ts";
	import {EdgeSpeechTTS} from "https://esm.sh/@lobehub/tts@1";

	async function synthesizeSpeech(model: string, voice: string, text: string) {
	let voiceName;
	let rate = 0;
	let pitch = 0;

	if (!model.includes("Neural")) {
	switch (model) {
	case "ava":
	voiceName = "en-US-AvaMultilingualNeural";
	break;
	case "andrew":
	voiceName = "en-US-AndrewMultilingualNeural";
	break;
	case "emma":
	voiceName = "en-US-EmmaMultilingualNeural";
	break;
	case "brian":
	voiceName = "en-US-BrianMultilingualNeural";
	break;
	case "vivienne":
	voiceName = "fr-FR-VivienneMultilingualNeural";
	break;
	case "remy":
	voiceName = "fr-FR-RemyMultilingualNeural";
	break;
	case "seraphina":
	voiceName = "de-DE-SeraphinaMultilingualNeural";
	break;
	case "florian":
	voiceName = "de-DE-FlorianMultilingualNeural";
	break;
	case "dmitry":
	voiceName = "ru-RU-DmitryNeural";
	break;
	case "svetlana":
	voiceName = "ru-RU-SvetlanaNeural";
	break;
	default:
	voiceName = "en-US-BrianMultilingualNeural";
	break;
	}
	} else {
	voiceName = model;
	const params = Object.fromEntries(voice.split("\|").map((p) => p.split(":") as [string, string]));
	rate = Number(params["rate"] \|\| 0);
	pitch = Number(params["pitch"] \|\| 0);
	}

	const tts = new EdgeSpeechTTS();

	const payload = {
	input: text, options: {
	rate: rate, pitch: pitch, voice: voiceName
	},
	};
	const response = await tts.create(payload);
	const mp3Buffer = new Uint8Array(await response.arrayBuffer());
	return new Response(mp3Buffer, {
	headers: {"Content-Type": "audio/mpeg"},
	});
	}

	function validateContentType(req: Request, expected: string) {
	const contentType = req.headers.get("Content-Type");
	if (contentType !== expected) {
	console.log(`Invalid Content-Type ${contentType}, expected ${expected}`);
	return new Response("Bad Request", {status: 400});
	}
	}

	async function handleDebugRequest() {
	const voice = "rate:0.0\|pitch:0.0";
	const model = "en-US-BrianMultilingualNeural";
	const text = "Приветик! Надеюсь ты меня хорошо слышишь? Алё?!";
	console.log(`model=${model}, voice=${voice}, text=${text}`);
	return synthesizeSpeech(model, voice, text);
	}

	async function handleSynthesisRequest(req: Request) {
	if (req.method !== "POST") {
	return new Response("Method Not Allowed", {status: 405});
	}
	const invalidContentType = validateContentType(req, "application/json");
	if (invalidContentType) return invalidContentType;
	const {model, input, voice} = await req.json();
	return synthesizeSpeech(model, voice, input);
	}


	async function handleDemoRequest(req: Request) {
	const html = `<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8" />
	<meta content="width=device-width, initial-scale=1.0" name="viewport" />
	<title>tts</title>
	<style>
	body {
	background-color: #121212;
	color: #e0e0e0;
	font-family: Arial, sans-serif;
	margin: 0;
	padding: 20px;
	}

	.container {
	max-width: 800px;
	margin: 0 auto;
	padding: 20px;
	background-color: #1e1e1e;
	border-radius: 8px;
	box-shadow: 0 4px 8px rgba(0, 0, 0, 0.2);
	}

	.input-area, .output-area {
	margin-bottom: 20px;
	}

	.slider-container, .textarea-container, .dropdown-container {
	margin-bottom: 20px;
	}

	label {
	display: block;
	margin-bottom: 8px;
	font-weight: bold;
	}

	input[type="range"] {
	width: 100%;
	}

	.slider-value {
	text-align: center;
	margin-top: 8px;
	}

	textarea {
	max-width: 780px;
	width: calc(100% - 20px);
	height: 100px;
	padding: 10px;
	border: 1px solid #333;
	border-radius: 4px;
	background-color: #2e2e2e;
	color: #e0e0e0;
	resize: none;
	}

	select {
	width: 100%;
	padding: 10px;
	border: 1px solid #333;
	border-radius: 4px;
	background-color: #2e2e2e;
	color: #e0e0e0;
	}

	button {
	width: 100%;
	padding: 10px;
	border: none;
	border-radius: 4px;
	background-color: #6200ea;
	color: #fff;
	font-size: 16px;
	cursor: pointer;
	transition: background-color 0.3s;
	}

	button:hover {
	background-color: #3700b3;
	}

	h1 {
	font-size: 24px;
	margin-bottom: 20px;
	}

	a {
	color: #bb86fc;
	text-decoration: none;
	}

	a:hover {
	text-decoration: underline;
	}

	#audioPlayerContainer {
	text-align: center;
	}

	audio {
	width: 100%;
	max-width: 600px;
	margin: 10px 0;
	}

	a {
	display: block;
	margin: 10px 0;
	}
	pre {
	color: #94c890;
	background: #000000;
	padding: 5px 10px;
	margin: 0;
	font-size: 1.12em;
	}
	</style>

	</head>
	<body>
	<div class="container">
	<div class="input-area">
	<div class="textarea-container">
	<label for="inputText">текст:</label
	><textarea id="inputText">Привет, хочешь я расскажу сказку?</textarea>
	</div>
	<div class="dropdown-container">
	<label for="voiceSelect">голос:</label>
	<select id="voiceSelect"></select>
	</div>
	<button id="synthesizeButton">синтезировать</button>
	</div>
	<div class="output-area">
	<div id="audioPlayerContainer"></div>
	</div>
	<details>
	<summary>api</summary>
	<p>получить список голосов:</p>
	<pre id="apiVoices"></pre>
	<p>post-запрос для синтеза голоса из текста:</p>
	<pre id="apiExamples"></pre>
	</details>
	</div>
	<script>
	let audio = null;

	document.getElementById('synthesizeButton').addEventListener('click', () => {
	const text = document.getElementById('inputText').value \|\| 'приветик! давай поболтаем немного?';
	const rate = '0.0';
	const pitch = '0.0';
	const voice = \`rate:\${rate}\|pitch:\${pitch}\`;
	const model = document.getElementById('voiceSelect').value;

	if (audio) {
	audio.pause();
	audio.currentTime = 0;
	}

	fetch('/v1/audio/speech', {
	method: 'POST',
	headers: { 'Content-Type': 'application/json' },
	body: JSON.stringify({ model, input: text, voice })
	})
	.then(response => response.blob())
	.then(blob => {
	const audioUrl = URL.createObjectURL(blob);
	const audioPlayerContainer = document.getElementById('audioPlayerContainer');

	if (audio) {
	audio.pause();
	audioPlayerContainer.innerHTML = '';
	}

	audio = new Audio(audioUrl);
	audio.controls = true;
	audioPlayerContainer.appendChild(audio);

	const downloadLink = document.createElement('a');
	downloadLink.href = audioUrl;
	downloadLink.download = 'synthesized_voice.mp3';
	downloadLink.textContent = 'скачать аудио';
	downloadLink.style.display = 'block';
	downloadLink.style.marginTop = '10px';

	audioPlayerContainer.appendChild(downloadLink);
	audio.play();
	});

	});

	async function fetchModels() {
	try {
	const response = await fetch('/v1/audio/models');
	const models = await response.json();
	const voiceSelect = document.getElementById('voiceSelect');

	models.forEach((model, index) => {
	const option = document.createElement('option');
	option.value = model.model;
	option.textContent = model.model;
	if (index === 1) {option.selected = true;}
	voiceSelect.appendChild(option);
	});
	} catch (error) {
	console.error('ошибка при получении списка моделей:', error);
	}
	}
	fetchModels();

	function createApiExamples() {
	const apiExamples = document.getElementById('apiExamples');
	const apiVoices = document.getElementById('apiVoices');
	const currentUrl = window.location.origin;
	const voices_pre = \`curl \${currentUrl}/v1/audio/models\`;
	const examples_pre = \`curl \${currentUrl}/v1/audio/speech \\\\\\\\
	-H 'content-type: application/json' \\\\\\\\
	--data-raw '{"model":"brian","input":"привет! хрю-хрю!","voice":"rate:0\|pitch:0"}' \\\\\\\\
	-o tts_voice.mp3
	\`;
	apiVoices.textContent = voices_pre.replace(/\\\\\\\\/g, '\\\\');
	apiExamples.textContent = examples_pre.replace(/\\\\\\\\/g, '\\\\');

	}
	createApiExamples();
	</script>
	</body></html>`;

	return new Response(html, {
	headers: {"Content-Type": "text/html"},
	});
	}

	async function handleVoiceList() {
	let voices = [{model: 'ava', gender: 'female'}, {model: 'andrew', gender: 'male'}, {model: 'emma', gender: 'female'}, {model: 'brian', gender: 'male'}, {model: 'vivienne', gender: 'female'}, {model: 'remy', gender: 'male'}, {
	model: 'seraphina', gender: 'female'
	}, {model: 'florian', gender: 'male'}, {model: 'dmitry', gender: 'male'}, {model: 'svetlana', gender: 'female'}];

	const sortedVoiceList = voices.sort((a, b) => {
	if (a.gender === 'male' && b.gender === 'female') return -1;
	if (a.gender === 'female' && b.gender === 'male') return 1;
	return 0;
	});

	return new Response(JSON.stringify(sortedVoiceList), {
	headers: {"Content-Type": "application/json"},
	});

	}


	serve(async (req) => {
	try {
	const url = new URL(req.url);

	if (url.pathname === "/") {
	return handleDemoRequest(req);
	}
	if (url.pathname === "/v1/audio/models") {
	return handleVoiceList();
	}
	if (url.pathname === "/tts") {
	return handleDebugRequest();
	}

	if (url.pathname !== "/v1/audio/speech") {
	console.log(`Unhandled path ${url.pathname}`);
	return new Response("Not Found", {status: 404});
	}

	return handleSynthesisRequest(req);
	} catch (err) {
	console.error(`Error processing request: ${err.message}`);
	return new Response(`Internal Server Error\n${err.message}`, {
	status: 500,
	});
	}
	}, { port: 7860 });