import gradio as gr import speech_recognition as sr from pydub import AudioSegment import os from gtts import gTTS def transcribe_audio(audio): recognizer = sr.Recognizer() audio = AudioSegment.from_file(audio) audio.export("temp.wav", format="wav") with sr.AudioFile("temp.wav") as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data) except sr.UnknownValueError: text = "Sorry, I could not understand the audio." except sr.RequestError: text = "Sorry, there was an error with the speech recognition service." os.remove("temp.wav") return text def speak_text(text): tts = gTTS(text) tts.save("question.mp3") return "question.mp3" questions = [ {"context": "Let's talk about the story, My Dad's birthday party. Today is your dad’s birthday. So you and your family will have a party tonight.", "question": "What will your mom do for the party?", "label": "Transcription of Mom's action", "answer": "She will cook the dinner."}, {"context": "Look at the page 12 and 13.", "question": "What is she doing?", "label": "Transcription of Mom's action", "answer": "She is cooking."}, {"context": "How about your brother?", "question": "What will he do for the party?", "label": "Transcription of Brother's action", "answer": "He will sing a birthday song."}, {"context": "Look at the picture.", "question": "What is he doing?", "label": "Transcription of Brother's action", "answer": "He is singing."}, {"context": "Okay, next,", "question": "How about you? What will you do for the party?", "label": "Transcription of Your action", "answer": "I will write a birthday card."}, {"context": "Let's move on to the story 'Owls are special'. Owls are nocturnal.", "question": "When do they sleep?", "label": "Transcription of sleep time", "answer": "They sleep during the day."}, {"context": "Look at the page 21, they have very special eyes.", "question": "So, what can they do with their special eyes?", "label": "Transcription of eye ability", "answer": "They can see well at night."}, {"context": "Now, these questions are based on the story 'I will go shopping'. You have many things to buy. First, you need to buy a book. So, you will go to the bookstore. The bookstore is far away.", "question": "How will you go to the bookstore?", "label": "Transcription of transport method", "answer": "I will take the bus."}, {"context": "Now you need to buy some bread.", "question": "Where will you go?", "label": "Transcription of place", "answer": "I will go to the bakery."}, {"context": "Let's talk about the story 'Guide dogs'. Blind people can't see. But, they can still do things.", "question": "How can they do?", "label": "Transcription of how blind people do things", "answer": "They have guide dogs."}, {"context": "Guide dogs help blind people with everything. For example, they bring things for them. They help them. They open doors for them.", "question": "What else can they do for them?", "label": "Transcription of other abilities", "answer": "They can turn the TV on and off."} ] current_question = 0 responses = [] def next_question(): global current_question if current_question < len(questions): context = questions[current_question].get("context", "") question = questions[current_question]["question"] full_text = f"{context} {question}" question_audio = speak_text(full_text) current_question += 1 return gr.update(value=question_audio, visible=True), gr.update(visible=True), questions[current_question-1]["label"], gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) else: final_results = evaluate_responses() return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=final_results, visible=True), gr.update(visible=True) def save_response(audio): transcription = transcribe_audio(audio) responses.append(transcription) return transcription def evaluate_responses(): result = "