rewardpilot-web-ui / utils /voice_assistant.py
sammy786's picture
Update utils/voice_assistant.py
f7a5fec verified
raw
history blame
5.91 kB
"""
ElevenLabs Voice Assistant for RewardPilot
Converts AI text responses to natural speech
"""
import os
import logging
from typing import Optional, List, Dict
import io
logger = logging.getLogger(__name__)
# Check if ElevenLabs is available
try:
from elevenlabs.types import Voice, VoiceSettings
from elevenlabs.client import ElevenLabs
ELEVENLABS_AVAILABLE = True
except ImportError:
ELEVENLABS_AVAILABLE = False
logger.warning("ElevenLabs not installed. Voice features will be disabled.")
class VoiceAssistant:
"""Handle text-to-speech conversion using ElevenLabs"""
def __init__(self):
self.api_key = os.getenv("ELEVENLABS_API_KEY")
self.enabled = ELEVENLABS_AVAILABLE and bool(self.api_key)
if self.enabled:
self.client = ElevenLabs(api_key=self.api_key)
logger.info("βœ… ElevenLabs Voice Assistant initialized")
else:
logger.warning("⚠️ ElevenLabs disabled (missing API key or library)")
# Voice configurations
self.voices = {
"Rachel": {
"voice_id": "21m00Tcm4TlvDq8ikWAM", # Professional female
"description": "Clear, professional female voice"
},
"Adam": {
"voice_id": "pNInz6obpgDQGcFmaJgB", # Deep male
"description": "Deep, authoritative male voice"
},
"Bella": {
"voice_id": "EXAVITQu4vr4xnSDxMaL", # Friendly female
"description": "Warm, friendly female voice"
},
"Antoni": {
"voice_id": "ErXwobaYiN019PkySvjV", # Well-rounded male
"description": "Well-rounded, versatile male voice"
},
"Elli": {
"voice_id": "MF3mGyEYCl7XYWbV9V6O", # Young female
"description": "Young, energetic female voice"
}
}
def text_to_speech(
self,
text: str,
voice_name: str = "Rachel",
model: str = "eleven_turbo_v2"
) -> Optional[bytes]:
"""
Convert text to speech audio
Args:
text: Text to convert
voice_name: Name of voice to use
model: ElevenLabs model (eleven_turbo_v2 is fastest)
Returns:
Audio bytes or None if failed
"""
if not self.enabled:
logger.warning("Voice generation skipped (ElevenLabs not enabled)")
return None
if not text or len(text.strip()) == 0:
logger.warning("Empty text provided for voice generation")
return None
# Limit text length to avoid API errors
if len(text) > 2500:
text = text[:2500] + "..."
logger.info(f"Text truncated to 2500 characters for voice generation")
try:
voice_config = self.voices.get(voice_name, self.voices["Rachel"])
voice_id = voice_config["voice_id"]
logger.info(f"🎀 Generating speech with {voice_name} ({len(text)} chars)")
# Generate audio using ElevenLabs
audio = self.client.generate(
text=text,
voice=Voice(
voice_id=voice_id,
settings=VoiceSettings(
stability=0.5, # Balance between consistency and expressiveness
similarity_boost=0.75, # How closely to match the original voice
style=0.0, # Exaggeration level
use_speaker_boost=True # Enhance clarity
)
),
model=model
)
# Convert generator to bytes
audio_bytes = b"".join(audio)
logger.info(f"βœ… Generated {len(audio_bytes)} bytes of audio")
return audio_bytes
except Exception as e:
logger.error(f"❌ Voice generation failed: {e}")
return None
def get_voice_list(self) -> List[Dict[str, str]]:
"""Get list of available voices"""
return [
{"name": name, "description": config["description"]}
for name, config in self.voices.items()
]
def create_audio_summary(self, recommendation_data: dict) -> str:
"""
Create a concise audio-friendly summary of recommendation
Args:
recommendation_data: Normalized recommendation data
Returns:
Audio-optimized text
"""
card = recommendation_data.get('recommended_card', 'Unknown Card')
rewards = recommendation_data.get('rewards_earned', 0)
rate = recommendation_data.get('rewards_rate', 'N/A')
merchant = recommendation_data.get('merchant', 'this merchant')
reasoning = recommendation_data.get('reasoning', '')
# Create concise, natural-sounding summary
summary = f"For your purchase at {merchant}, I recommend using your {card}. "
summary += f"You'll earn {rewards:.2f} dollars in rewards at a rate of {rate}. "
# Add simplified reasoning (first sentence only)
if reasoning:
first_sentence = reasoning.split('.')[0].strip()
if first_sentence and len(first_sentence) > 20:
summary += f"{first_sentence}. "
# Add warnings if present
warnings = recommendation_data.get('warnings', [])
if warnings:
summary += "Important note: " + warnings[0]
return summary
# Global instance
voice_assistant = VoiceAssistant()
def get_voice_assistant() -> VoiceAssistant:
"""Get the global voice assistant instance"""
return voice_assistant