|
|
""" |
|
|
ElevenLabs Voice Assistant for RewardPilot |
|
|
Converts AI text responses to natural speech |
|
|
""" |
|
|
|
|
|
import os |
|
|
import logging |
|
|
from typing import Optional, List, Dict |
|
|
import io |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
try: |
|
|
from elevenlabs.types import Voice, VoiceSettings |
|
|
from elevenlabs.client import ElevenLabs |
|
|
ELEVENLABS_AVAILABLE = True |
|
|
except ImportError: |
|
|
ELEVENLABS_AVAILABLE = False |
|
|
logger.warning("ElevenLabs not installed. Voice features will be disabled.") |
|
|
|
|
|
|
|
|
class VoiceAssistant: |
|
|
"""Handle text-to-speech conversion using ElevenLabs""" |
|
|
|
|
|
def __init__(self): |
|
|
self.api_key = os.getenv("ELEVENLABS_API_KEY") |
|
|
self.enabled = ELEVENLABS_AVAILABLE and bool(self.api_key) |
|
|
|
|
|
if self.enabled: |
|
|
self.client = ElevenLabs(api_key=self.api_key) |
|
|
logger.info("β
ElevenLabs Voice Assistant initialized") |
|
|
else: |
|
|
logger.warning("β οΈ ElevenLabs disabled (missing API key or library)") |
|
|
|
|
|
|
|
|
self.voices = { |
|
|
"Rachel": { |
|
|
"voice_id": "21m00Tcm4TlvDq8ikWAM", |
|
|
"description": "Clear, professional female voice" |
|
|
}, |
|
|
"Adam": { |
|
|
"voice_id": "pNInz6obpgDQGcFmaJgB", |
|
|
"description": "Deep, authoritative male voice" |
|
|
}, |
|
|
"Bella": { |
|
|
"voice_id": "EXAVITQu4vr4xnSDxMaL", |
|
|
"description": "Warm, friendly female voice" |
|
|
}, |
|
|
"Antoni": { |
|
|
"voice_id": "ErXwobaYiN019PkySvjV", |
|
|
"description": "Well-rounded, versatile male voice" |
|
|
}, |
|
|
"Elli": { |
|
|
"voice_id": "MF3mGyEYCl7XYWbV9V6O", |
|
|
"description": "Young, energetic female voice" |
|
|
} |
|
|
} |
|
|
|
|
|
def text_to_speech( |
|
|
self, |
|
|
text: str, |
|
|
voice_name: str = "Rachel", |
|
|
model: str = "eleven_turbo_v2" |
|
|
) -> Optional[bytes]: |
|
|
""" |
|
|
Convert text to speech audio |
|
|
|
|
|
Args: |
|
|
text: Text to convert |
|
|
voice_name: Name of voice to use |
|
|
model: ElevenLabs model (eleven_turbo_v2 is fastest) |
|
|
|
|
|
Returns: |
|
|
Audio bytes or None if failed |
|
|
""" |
|
|
if not self.enabled: |
|
|
logger.warning("Voice generation skipped (ElevenLabs not enabled)") |
|
|
return None |
|
|
|
|
|
if not text or len(text.strip()) == 0: |
|
|
logger.warning("Empty text provided for voice generation") |
|
|
return None |
|
|
|
|
|
|
|
|
if len(text) > 2500: |
|
|
text = text[:2500] + "..." |
|
|
logger.info(f"Text truncated to 2500 characters for voice generation") |
|
|
|
|
|
try: |
|
|
voice_config = self.voices.get(voice_name, self.voices["Rachel"]) |
|
|
voice_id = voice_config["voice_id"] |
|
|
|
|
|
logger.info(f"π€ Generating speech with {voice_name} ({len(text)} chars)") |
|
|
|
|
|
|
|
|
audio = self.client.generate( |
|
|
text=text, |
|
|
voice=Voice( |
|
|
voice_id=voice_id, |
|
|
settings=VoiceSettings( |
|
|
stability=0.5, |
|
|
similarity_boost=0.75, |
|
|
style=0.0, |
|
|
use_speaker_boost=True |
|
|
) |
|
|
), |
|
|
model=model |
|
|
) |
|
|
|
|
|
|
|
|
audio_bytes = b"".join(audio) |
|
|
|
|
|
logger.info(f"β
Generated {len(audio_bytes)} bytes of audio") |
|
|
return audio_bytes |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Voice generation failed: {e}") |
|
|
return None |
|
|
|
|
|
def get_voice_list(self) -> List[Dict[str, str]]: |
|
|
"""Get list of available voices""" |
|
|
return [ |
|
|
{"name": name, "description": config["description"]} |
|
|
for name, config in self.voices.items() |
|
|
] |
|
|
|
|
|
def create_audio_summary(self, recommendation_data: dict) -> str: |
|
|
""" |
|
|
Create a concise audio-friendly summary of recommendation |
|
|
|
|
|
Args: |
|
|
recommendation_data: Normalized recommendation data |
|
|
|
|
|
Returns: |
|
|
Audio-optimized text |
|
|
""" |
|
|
card = recommendation_data.get('recommended_card', 'Unknown Card') |
|
|
rewards = recommendation_data.get('rewards_earned', 0) |
|
|
rate = recommendation_data.get('rewards_rate', 'N/A') |
|
|
merchant = recommendation_data.get('merchant', 'this merchant') |
|
|
reasoning = recommendation_data.get('reasoning', '') |
|
|
|
|
|
|
|
|
summary = f"For your purchase at {merchant}, I recommend using your {card}. " |
|
|
summary += f"You'll earn {rewards:.2f} dollars in rewards at a rate of {rate}. " |
|
|
|
|
|
|
|
|
if reasoning: |
|
|
first_sentence = reasoning.split('.')[0].strip() |
|
|
if first_sentence and len(first_sentence) > 20: |
|
|
summary += f"{first_sentence}. " |
|
|
|
|
|
|
|
|
warnings = recommendation_data.get('warnings', []) |
|
|
if warnings: |
|
|
summary += "Important note: " + warnings[0] |
|
|
|
|
|
return summary |
|
|
|
|
|
|
|
|
|
|
|
voice_assistant = VoiceAssistant() |
|
|
|
|
|
|
|
|
def get_voice_assistant() -> VoiceAssistant: |
|
|
"""Get the global voice assistant instance""" |
|
|
return voice_assistant |