from transformers import pipeline import gradio as gr from huggingface_hub import HfFolder import requests import asyncio from gtts import gTTS from dotenv import load_dotenv import os load_dotenv() model_id = "sanchit-gandhi/whisper-small-dv" # update with your model id pipe = pipeline("automatic-speech-recognition", model=model_id) hugging_face_token=os.getenv("HUGGING_FACE_TOKEN") async def query(text, model_id="tiiuae/falcon-7b-instruct"): api_url = f"https://api-inference.huggingface.co/models/{model_id}" headers = {"Authorization": f"Bearer {hugging_face_token}"} payload = {"inputs": text} print(f"Querying...: {text}") loop = asyncio.get_event_loop() response = await loop.run_in_executor(None, lambda: requests.post(api_url, headers=headers, json=payload)) print("\n") print("\n") print(response.json()) print("\n") return response.json()[0]["generated_text"].split("\n")[1] async def transcribe_speech(filepath): output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "english", }, # update with the language you've fine-tuned on chunk_length_s=30, batch_size=8, ) return await query(output["text"]) def final(filepath): answer=asyncio.run(transcribe_speech(filepath)) return answer def main(filepath): response=final(filepath) print(response) myobj = gTTS(text=response, lang='en', slow=False) myobj.save(filepath) return filepath # return response mic_transcribe = gr.Interface( fn=main, inputs=gr.Audio(sources="microphone", type="filepath"), outputs="audio", ) file_transcribe = gr.Interface( fn=main, inputs=gr.Audio(sources="upload", type="filepath"), outputs="audio", ) demo=gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) demo.launch(debug=True,share=True)