import gradio as gr from google.generativeai import GenerativeModel, configure from gtts import gTTS import speech_recognition as sr import os import tempfile import torch from torchvision import models, transforms from PIL import Image import json # ✅ Load API key from environment variable GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY") if not GOOGLE_API_KEY: raise ValueError("❌ Missing API Key! Please set GEMINI_API_KEY as an environment variable.") # ✅ Configure Gemini securely configure(api_key=GOOGLE_API_KEY) gemini_model = GenerativeModel("models/gemini-1.5-flash") def transcribe_audio(audio_path): recognizer = sr.Recognizer() with sr.AudioFile(audio_path) as source: audio = recognizer.record(source) try: return recognizer.recognize_google(audio, language='pa-IN') except sr.UnknownValueError: return "❌ ਆਵਾਜ਼ ਨੂੰ ਸਮਝਿਆ ਨਹੀਂ ਜਾ ਸਕਿਆ।" except sr.RequestError: return "❌ ਗੂਗਲ ਸਪੀਚ ਐਪੀਆਈ ਨਾਲ ਕਨੇਕਟ ਨਹੀਂ ਹੋ ਸਕਿਆ।" def get_gemini_response(query): try: response = gemini_model.generate_content(f"ਪੰਜਾਬੀ ਵਿੱਚ ਜਵਾਬ ਦਿਓ: {query}") return response.text.replace('*', '') except Exception as e: return f"❌ Gemini ਤਰਫੋਂ ਗਲਤੀ: {str(e)}" def text_to_speech(text, lang='pa'): tts = gTTS(text=text, lang=lang) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(temp_file.name) return temp_file.name # --------------------------- # Combined Function # --------------------------- def handle_voice_query(audio_file): query = transcribe_audio(audio_file) response = get_gemini_response(query) audio_path = text_to_speech(response) return query, response, audio_path with gr.Blocks() as demo: gr.Markdown("# 🗣️ **ਆਵਾਜ਼ ਰਾਹੀਂ ਪੁੱਛੋ**") gr.Markdown("### ਆਪਣਾ ਸਵਾਲ ਆਵਾਜ਼ ਰਾਹੀਂ ਪੁੱਛੋ (ਪੰਜਾਬੀ ਵਿੱਚ)") audio_input = gr.Audio(type="filepath", label="🎤 ਸਵਾਲ ਬੋਲੋ") query_text = gr.Textbox(label="🔍 ਬੋਲਿਆ ਗਿਆ ਸਵਾਲ") gemini_response = gr.Textbox(label="📜 Gemini ਜਵਾਬ") audio_output = gr.Audio(label="🔊 ਆਵਾਜ਼ੀ ਜਵਾਬ") submit_btn = gr.Button("➡️ ਜਵਾਬ ਲਵੋ") submit_btn.click(fn=handle_voice_query, inputs=[audio_input], outputs=[query_text, gemini_response, audio_output]) demo.launch()