import streamlit as st import requests from pydub import AudioSegment from pydub.playback import play from io import BytesIO import os from gtts import gTTS import speech_recognition as sr from groq import Groq import tempfile # Set up Groq API groq_api_key = os.getenv("GROQ_API_KEY") client = Groq(api_key=groq_api_key) def process_text(text): # Use Groq to generate a response try: chat_completion = client.chat.completions.create( messages=[ {"role": "user", "content": text} ], model="llama3-8b-8192", ) return chat_completion.choices[0].message.content except Exception as e: return f"Error fetching Groq data: {e}" def text_to_speech(text, lang='ur'): # Convert text to speech tts = gTTS(text=text, lang=lang) audio_file = BytesIO() tts.write_to_fp(audio_file) audio_file.seek(0) return audio_file def audio_to_text(audio_file): # Convert audio to text recognizer = sr.Recognizer() with tempfile.NamedTemporaryFile(delete=False) as temp_file: temp_file.write(audio_file.read()) temp_file.seek(0) with sr.AudioFile(temp_file.name) as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data, language='ur') return text except sr.UnknownValueError: return "Could not understand audio" except sr.RequestError as e: return f"Could not request results; {e}" # Streamlit UI st.title("Urdu Voice Assistant") mode = st.radio("Choose input method", ("Real-time Voice", "Upload Voice File")) if mode == "Real-time Voice": st.write("Click the button and start speaking.") if st.button("Start Recording"): st.write("Recording... Please wait.") recognizer = sr.Recognizer() with sr.Microphone() as source: audio_data = recognizer.listen(source) st.write("Processing...") try: text = recognizer.recognize_google(audio_data, language='ur') st.write(f"You said: {text}") # Get response from Groq response_text = process_text(text) st.write(f"Response: {response_text}") # Convert response to audio audio_file = text_to_speech(response_text) st.audio(audio_file, format='audio/mp3') except sr.UnknownValueError: st.write("Sorry, could not understand the audio.") except sr.RequestError as e: st.write(f"Sorry, there was an error with the request: {e}") elif mode == "Upload Voice File": uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3"]) if uploaded_file: st.write("Processing...") # Convert uploaded file to WAV format if needed if uploaded_file.type == "audio/mpeg": audio = AudioSegment.from_mp3(uploaded_file) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file: audio.export(temp_file.name, format="wav") with open(temp_file.name, "rb") as temp_file_content: text = audio_to_text(temp_file_content) else: text = audio_to_text(uploaded_file) st.write(f"Transcribed Text: {text}") # Get response from Groq response_text = process_text(text) st.write(f"Response: {response_text}") # Convert response to audio audio_file = text_to_speech(response_text) st.audio(audio_file, format='audio/mp3')