Spaces:

englissi
/

AST_blue_1

Sleeping

File size: 5,566 Bytes

6ebe78d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc0fa32
 
 
 
6ebe78d
cc0fa32
6ebe78d
 
cc0fa32
 
6ebe78d
 
 
 
 
 
 
 
 
 
 
 
bfef320
6ebe78d
 
bfef320
6ebe78d
 
 
 
 
 
 
 
 
 
 
 
 
42b6fe3
bfef320
 
 
42b6fe3
bfef320
6ebe78d
 
 
 
 
 
 
bfef320
6ebe78d
 
 
 
 
bfef320
6ebe78d
bfef320
42b6fe3
6ebe78d
 
955be1e

import gradio as gr
import speech_recognition as sr
from pydub import AudioSegment
import os
from gtts import gTTS

def transcribe_audio(audio):
    recognizer = sr.Recognizer()
    audio = AudioSegment.from_file(audio)
    audio.export("temp.wav", format="wav")
    with sr.AudioFile("temp.wav") as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data)
        except sr.UnknownValueError:
            text = "Sorry, I could not understand the audio."
        except sr.RequestError:
            text = "Sorry, there was an error with the speech recognition service."
    os.remove("temp.wav")
    return text

def speak_text(text):
    tts = gTTS(text)
    tts.save("question.mp3")
    return "question.mp3"

questions = [
    {"context": "Let's talk about the story, My Dad's birthday party. Today is your dad’s birthday. So you and your family will have a party tonight.", "question": "What will your mom do for the party?", "label": "Transcription of Mom's action", "answer": "She will cook the dinner."},
    {"context": "Look at the page 12 and 13.", "question": "What is she doing?", "label": "Transcription of Mom's action", "answer": "She is cooking."},
    {"context": "How about your brother?", "question": "What will he do for the party?", "label": "Transcription of Brother's action", "answer": "He will sing a birthday song."},
    {"context": "Look at the picture.", "question": "What is he doing?", "label": "Transcription of Brother's action", "answer": "He is singing."},
    {"context": "Okay, next,", "question": "How about you? What will you do for the party?", "label": "Transcription of Your action", "answer": "I will write a birthday card."},
    {"context": "Let's move on to the story 'Owls are special'. Owls are nocturnal.", "question": "When do they sleep?", "label": "Transcription of sleep time", "answer": "They sleep during the day."},
    {"context": "Look at the page 21, they have very special eyes.", "question": "So, what can they do with their special eyes?", "label": "Transcription of eye ability", "answer": "They can see well at night."},
    {"context": "Now, these questions are based on the story 'I will go shopping'. You have many things to buy. First, you need to buy a book. So, you will go to the bookstore. The bookstore is far away.", "question": "How will you go to the bookstore?", "label": "Transcription of transport method", "answer": "I will take the bus."},
    {"context": "Now you need to buy some bread.", "question": "Where will you go?", "label": "Transcription of place", "answer": "I will go to the bakery."},
    {"context": "Let's talk about the story 'Guide dogs'. Blind people can't see. But, they can still do things.", "question": "How can they do?", "label": "Transcription of how blind people do things", "answer": "They have guide dogs."},
    {"context": "Guide dogs help blind people with everything. For example, they bring things for them. They help them. They open doors for them.", "question": "What else can they do for them?", "label": "Transcription of other abilities", "answer": "They can turn the TV on and off."}
]
current_question = 0
responses = []

def next_question():
    global current_question
    if current_question < len(questions):
        context = questions[current_question].get("context", "")
        question = questions[current_question]["question"]
        full_text = f"{context} {question}"
        question_audio = speak_text(full_text)
        current_question += 1
        return gr.update(value=question_audio, visible=True), gr.update(visible=True), questions[current_question-1]["label"], gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
    else:
        final_results = evaluate_responses()
        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=final_results, visible=True), gr.update(visible=True)

def save_response(audio):
    transcription = transcribe_audio(audio)
    responses.append(transcription)
    return transcription

def evaluate_responses():
    result = "<h2>Your Responses:</h2><br>"
    for i, question in enumerate(questions):
        user_response = responses[i] if i < len(responses) else "No response"
        result += f"<b>Q:</b> {question['question']}<br><b>Your Answer:</b> {user_response}<br><br>"
    return result

def reset():
    global current_question, responses
    current_question = 0
    responses = []
    return next_question()

with gr.Blocks() as demo:
    gr.Markdown("### Interactive Questions")

    question_audio = gr.Audio(label="Question", visible=False)
    audio_input = gr.Audio(type="filepath", label="Your answer", visible=True)
    transcription_output = gr.Textbox(label="Transcription", visible=True)
    btn_next = gr.Button("Next", visible=True)
    btn_restart = gr.Button("Restart", visible=False)
    final_output = gr.HTML(visible=False)

    def load_first_question():
        return next_question()

    demo.load(load_first_question, outputs=[question_audio, audio_input, transcription_output, btn_next, final_output, btn_restart])

    btn_next.click(next_question, outputs=[question_audio, audio_input, transcription_output, btn_next, final_output, btn_restart])
    btn_restart.click(reset, outputs=[question_audio, audio_input, transcription_output, btn_next, final_output, btn_restart])
    audio_input.change(save_response, inputs=audio_input, outputs=transcription_output)

    demo.launch()