Spaces:

RidhamChitre
/

voice-assistant

Sleeping

App Files Files Community

voice-assistant / app.py

RidhamChitre

Upload folder using huggingface_hub

204cf04 verified 5 months ago

raw

history blame

No virus

2 kB

	from transformers import pipeline
	import gradio as gr
	from huggingface_hub import HfFolder
	import requests
	import asyncio
	from gtts import gTTS
	from dotenv import load_dotenv
	import os

	load_dotenv()
	model_id = "sanchit-gandhi/whisper-small-dv" # update with your model id
	pipe = pipeline("automatic-speech-recognition", model=model_id)
	hugging_face_token=os.getenv("HUGGING_FACE_TOKEN")
	async def query(text, model_id="tiiuae/falcon-7b-instruct"):
	api_url = f"https://api-inference.huggingface.co/models/{model_id}"
	headers = {"Authorization": f"Bearer {hugging_face_token}"}
	payload = {"inputs": text}

	print(f"Querying...: {text}")
	loop = asyncio.get_event_loop()
	response = await loop.run_in_executor(None, lambda: requests.post(api_url, headers=headers, json=payload))
	print("\n")
	print("\n")
	print(response.json())
	print("\n")
	return response.json()[0]["generated_text"].split("\n")[1]

	async def transcribe_speech(filepath):
	output = pipe(
	filepath,
	max_new_tokens=256,
	generate_kwargs={
	"task": "transcribe",
	"language": "english",
	}, # update with the language you've fine-tuned on
	chunk_length_s=30,
	batch_size=8,
	)
	return await query(output["text"])


	def final(filepath):
	answer=asyncio.run(transcribe_speech(filepath))
	return answer

	def main(filepath):
	response=final(filepath)
	print(response)
	myobj = gTTS(text=response, lang='en', slow=False)
	myobj.save(filepath)
	return filepath
	# return response

	mic_transcribe = gr.Interface(
	fn=main,
	inputs=gr.Audio(sources="microphone", type="filepath"),
	outputs="audio",
	)

	file_transcribe = gr.Interface(
	fn=main,
	inputs=gr.Audio(sources="upload", type="filepath"),
	outputs="audio",
	)


	demo=gr.TabbedInterface(
	[mic_transcribe, file_transcribe],
	["Transcribe Microphone", "Transcribe Audio File"],
	)

	demo.launch(debug=True,share=True)