File size: 2,631 Bytes
0197ed3 7494646 adb5e2a 0197ed3 adb5e2a 0197ed3 adb5e2a 7494646 adb5e2a 7494646 adb5e2a 7494646 adb5e2a 7494646 adb5e2a 7494646 adb5e2a 7494646 adb5e2a 7494646 adb5e2a 7494646 0197ed3 adb5e2a 0197ed3 adb5e2a 0197ed3 7494646 adb5e2a 0197ed3 adb5e2a 0197ed3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
import speech_recognition as sr
import torch
import os
from transformers import pipeline
from gtts import gTTS
import time
# Load ASR Model (Whisper)
device = "cuda" if torch.cuda.is_available() else "cpu"
speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)
# Initialize Speech Recognition
recognizer = sr.Recognizer()
# Function to Play Audio Prompt
def play_audio(text):
tts = gTTS(text=text, lang='en')
filename = "prompt.mp3"
tts.save(filename)
os.system(f"mpg321 {filename}" if os.name != "nt" else f"start {filename}") # Works on Linux & Windows
time.sleep(2) # Give some time for the speech to play
# Function to Capture Name
def capture_name(audio):
play_audio("Tell me your name")
try:
text = speech_to_text(audio)["text"]
return f"π€ Name Captured: {text}", "Please provide your email address."
except Exception as e:
return f"β Error: {str(e)}", ""
# Function to Capture Email
def capture_email(audio):
play_audio("Please provide your email address")
try:
text = speech_to_text(audio)["text"]
return f"π§ Email Captured: {text}"
except Exception as e:
return f"β Error: {str(e)}"
# Gradio Interface
def gradio_interface():
with gr.Blocks() as demo:
gr.Markdown("<h1 style='text-align: center;'>π½οΈ AI Dining Assistant</h1>")
with gr.Column():
gr.Image("/mnt/data/image.png", elem_id="header_image", show_label=False) # Upload the image you provided
gr.Markdown("<p style='text-align: center;'>Press the mic button to start...</p>")
gr.Markdown("#### π€ Step 1: Tell me your name")
mic_button = gr.Button("ποΈ Tap to Speak Your Name")
audio_input_name = gr.Audio(type="filepath", visible=False)
name_output = gr.Textbox(label="Your Name:")
email_prompt_output = gr.Textbox(label="Next Step:", interactive=False)
mic_button.click(capture_name, inputs=audio_input_name, outputs=[name_output, email_prompt_output])
gr.Markdown("#### π€ Step 2: Provide your email")
mic_button_email = gr.Button("ποΈ Tap to Speak Your Email")
audio_input_email = gr.Audio(type="filepath", visible=False)
email_output = gr.Textbox(label="Your Email:")
mic_button_email.click(capture_email, inputs=audio_input_email, outputs=email_output)
return demo
# Launch the Gradio Interface
demo = gradio_interface()
demo.launch(debug=True)
|