File size: 2,023 Bytes
2368c75 1b365e4 2368c75 1b365e4 2368c75 1b365e4 2368c75 1b365e4 2368c75 1b365e4 2368c75 1b365e4 2368c75 1b365e4 2368c75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
import speech_recognition as sr
from transformers import pipeline
import requests
# Load the chatbot model from Hugging Face
chatbot = pipeline("conversational", model="facebook/blenderbot-400M-distill")
# Function to convert speech to text using SpeechRecognition
def speech_to_text():
recognizer = sr.Recognizer()
with sr.Microphone() as source:
st.info("Listening...")
audio = recognizer.listen(source)
try:
text = recognizer.recognize_google(audio)
return text
except sr.UnknownValueError:
return "Sorry, I could not understand the audio."
except sr.RequestError:
return "Speech recognition service is not available."
# Function to generate avatar video using D-ID API
def generate_avatar_video(text_response):
api_url = "https://api.d-id.com/talk"
headers = {
"Authorization": "Bearer YOUR_API_KEY", # Replace with your D-ID API Key
"Content-Type": "application/json"
}
payload = {
"script": {
"type": "text",
"input": text_response
},
"source": {
"avatar_id": "your_avatar_id" # Replace with the desired avatar ID
}
}
response = requests.post(api_url, headers=headers, json=payload)
video_url = response.json().get("result_url")
return video_url
# Streamlit app interface
st.title("🗣️ Voice-Enabled Live Video Chatbot")
# Button to start recording
if st.button("Speak"):
user_input = speech_to_text()
if user_input:
st.write(f"**You:** {user_input}")
# Generate chatbot response using NLP model
bot_response = chatbot(user_input)
response_text = bot_response[0]["generated_text"]
st.write(f"**Bot:** {response_text}")
# Generate avatar video with the bot's response
video_url = generate_avatar_video(response_text)
# Display the video response
st.video(video_url) |