|
|
import io |
|
|
import base64 |
|
|
import json |
|
|
import os |
|
|
from pathlib import Path |
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import openai |
|
|
from fastrtc import ( |
|
|
AdditionalOutputs, |
|
|
ReplyOnStopWords, |
|
|
Stream, |
|
|
get_stt_model, |
|
|
get_twilio_turn_credentials, |
|
|
) |
|
|
|
|
|
class SambanovaVoiceService: |
|
|
"""Dịch vụ Sambanova AI với Voice Streaming hoàn chỉnh""" |
|
|
|
|
|
def __init__(self, tts_service=None): |
|
|
|
|
|
self.client = openai.OpenAI( |
|
|
api_key=os.environ.get("SAMBANOVA_API_KEY"), |
|
|
base_url="https://api.sambanova.ai/v1", |
|
|
) |
|
|
|
|
|
self.stt_model = get_stt_model() |
|
|
|
|
|
self.tts_service = tts_service |
|
|
print("✅ Sambanova Voice Service initialized với TTS") |
|
|
|
|
|
def get_available_models(self): |
|
|
"""Lấy danh sách model có sẵn""" |
|
|
return [ |
|
|
"Meta-Llama-3.1-8B-Instruct", |
|
|
"Meta-Llama-3.1-70B-Instruct" |
|
|
] |
|
|
|
|
|
def generate_response(self, messages, model="Meta-Llama-3.1-8B-Instruct", temperature=0.1, top_p=0.1): |
|
|
"""Generate response từ Sambanova API""" |
|
|
try: |
|
|
response = self.client.chat.completions.create( |
|
|
model=model, |
|
|
messages=messages, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
max_tokens=1024, |
|
|
) |
|
|
return response.choices[0].message.content |
|
|
except Exception as e: |
|
|
print(f"❌ Sambanova API Error: {e}") |
|
|
return f"Xin lỗi, có lỗi xảy ra: {str(e)}" |
|
|
|
|
|
def stream_generate_response(self, messages, model="Meta-Llama-3.1-8B-Instruct", temperature=0.1, top_p=0.1): |
|
|
"""Stream response từ Sambanova API""" |
|
|
try: |
|
|
response = self.client.chat.completions.create( |
|
|
model=model, |
|
|
messages=messages, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
max_tokens=1024, |
|
|
stream=True |
|
|
) |
|
|
|
|
|
full_response = "" |
|
|
for chunk in response: |
|
|
if chunk.choices[0].delta.content: |
|
|
text_chunk = chunk.choices[0].delta.content |
|
|
full_response += text_chunk |
|
|
yield text_chunk, full_response |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"❌ Lỗi: {str(e)}" |
|
|
yield error_msg, error_msg |
|
|
|
|
|
def speech_to_text(self, audio): |
|
|
"""Chuyển speech thành text""" |
|
|
try: |
|
|
text = self.stt_model.stt(audio) |
|
|
print(f"🎤 STT Result: {text}") |
|
|
return text |
|
|
except Exception as e: |
|
|
print(f"❌ STT Error: {e}") |
|
|
return "" |
|
|
|
|
|
def text_to_speech(self, text, language='vi'): |
|
|
"""Chuyển text thành speech sử dụng TTS service""" |
|
|
if self.tts_service is None: |
|
|
print("❌ TTS service chưa được khởi tạo") |
|
|
return None |
|
|
|
|
|
try: |
|
|
audio_bytes = self.tts_service.text_to_speech(text, language) |
|
|
if audio_bytes: |
|
|
|
|
|
filename = f"tts_{int(time.time())}.mp3" |
|
|
filepath = self.tts_service.save_tts_audio(audio_bytes, filename) |
|
|
return filepath |
|
|
return None |
|
|
except Exception as e: |
|
|
print(f"❌ TTS Error: {e}") |
|
|
return None |
|
|
|
|
|
def generate_response_with_voice(self, messages, model="Meta-Llama-3.1-8B-Instruct", language='vi'): |
|
|
"""Generate response và chuyển thành voice""" |
|
|
try: |
|
|
|
|
|
text_response = self.generate_response(messages, model) |
|
|
|
|
|
|
|
|
audio_filepath = self.text_to_speech(text_response, language) |
|
|
|
|
|
return { |
|
|
"text": text_response, |
|
|
"audio": audio_filepath, |
|
|
"audio_bytes": self.tts_service.text_to_speech(text_response, language) if audio_filepath else None |
|
|
} |
|
|
except Exception as e: |
|
|
print(f"❌ Error in generate_response_with_voice: {e}") |
|
|
return { |
|
|
"text": f"Xin lỗi, có lỗi xảy ra: {str(e)}", |
|
|
"audio": None, |
|
|
"audio_bytes": None |
|
|
} |