voice-assistant / app.py
RidhamChitre's picture
Upload folder using huggingface_hub
204cf04 verified
raw
history blame
No virus
2 kB
from transformers import pipeline
import gradio as gr
from huggingface_hub import HfFolder
import requests
import asyncio
from gtts import gTTS
from dotenv import load_dotenv
import os
load_dotenv()
model_id = "sanchit-gandhi/whisper-small-dv" # update with your model id
pipe = pipeline("automatic-speech-recognition", model=model_id)
hugging_face_token=os.getenv("HUGGING_FACE_TOKEN")
async def query(text, model_id="tiiuae/falcon-7b-instruct"):
api_url = f"https://api-inference.huggingface.co/models/{model_id}"
headers = {"Authorization": f"Bearer {hugging_face_token}"}
payload = {"inputs": text}
print(f"Querying...: {text}")
loop = asyncio.get_event_loop()
response = await loop.run_in_executor(None, lambda: requests.post(api_url, headers=headers, json=payload))
print("\n")
print("\n")
print(response.json())
print("\n")
return response.json()[0]["generated_text"].split("\n")[1]
async def transcribe_speech(filepath):
output = pipe(
filepath,
max_new_tokens=256,
generate_kwargs={
"task": "transcribe",
"language": "english",
}, # update with the language you've fine-tuned on
chunk_length_s=30,
batch_size=8,
)
return await query(output["text"])
def final(filepath):
answer=asyncio.run(transcribe_speech(filepath))
return answer
def main(filepath):
response=final(filepath)
print(response)
myobj = gTTS(text=response, lang='en', slow=False)
myobj.save(filepath)
return filepath
# return response
mic_transcribe = gr.Interface(
fn=main,
inputs=gr.Audio(sources="microphone", type="filepath"),
outputs="audio",
)
file_transcribe = gr.Interface(
fn=main,
inputs=gr.Audio(sources="upload", type="filepath"),
outputs="audio",
)
demo=gr.TabbedInterface(
[mic_transcribe, file_transcribe],
["Transcribe Microphone", "Transcribe Audio File"],
)
demo.launch(debug=True,share=True)