Spaces:

akjedidtz
/

AIBOT

Runtime error

App Files Files Community

AIBOT / app.py

akjedidtz

Create app.py

3651420 verified 8 months ago

raw

history blame contribute delete

3.76 kB







	import speech_recognition as sr
	from gtts import gTTS
	from pydub import AudioSegment
	from IPython.display import Audio

	import torch
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import soundfile as sf

	# Setup device and dtype
	device = "cuda:0" if torch.cuda.is_available() else "cpu"





	import os
	from groq import Groq

	# Initialize the Groq client with the API key
	client = Groq(
	api_key="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",
	)











	#@@##

	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	# Load model and processor
	model_id = "openai/whisper-medium"
	model = AutoModelForSpeechSeq2Seq.from_pretrained(
	model_id,
	torch_dtype=torch_dtype,
	low_cpu_mem_usage=True,
	use_safetensors=True
	)
	model.to(device)
	processor = AutoProcessor.from_pretrained(model_id)

	from transformers import pipeline
	from gtts import gTTS
	import gradio as gr
	import torch

	# Load ASR pipeline
	asr_pipe =pipeline(
	"automatic-speech-recognition",
	model=model,
	tokenizer=processor.tokenizer,
	feature_extractor=processor.feature_extractor,
	torch_dtype=torch_dtype,
	device=device,
	)

	# Initialize Groq client
	client = Groq(
	api_key="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"
	)

	# Text-to-Speech function
	def text_to_speech(text):
	try:
	# Convert text to speech using gTTS
	tts = gTTS(text, lang='hi')
	tts.save("response.mp3")
	return "response.mp3" # Return the MP3 file path for playback in Gradio
	except Exception as e:
	print(f"Text-to-speech error: {e}")
	return None

	# Function to process audio, get model response, and return TTS output
	def process_audio(audio):
	# Convert audio to text
	print("Converting audio to text...")
	result = asr_pipe(audio, generate_kwargs={"language": "urdu"})

	# Check if audio-to-text conversion was successful
	if "text" in result and result["text"].strip():
	user_ques = result["text"]
	print("Audio-to-text conversion successful. User Question:", user_ques)

	# Prepare messages for model input
	messages = [
	{
	"role": "system",
	"content": "You are a helpful assistant named SSk BOT that stands for (sehar bot) who mostly answers in Roman Urdu. Be professional. No emojis; just Urdu written in English letters, and if you receive a prompt in Urdu font, answer only in English (Roman Urdu).",
	},
	{
	"role": "user",
	"content": user_ques,
	}
	]

	# Get response from Groq model
	print("Getting response from the model...")
	response = client.chat.completions.create(
	messages=messages,
	model="gemma2-9b-it",
	)

	# Extract model's response
	model_response = response['choices'][0]['message']['content']
	print("Model:", model_response)

	# Convert model's response to speech
	audio_path = text_to_speech(model_response)
	return model_response, audio_path

	else:
	print("Audio-to-text conversion failed or produced no text.")
	return "Audio-to-text conversion failed or no text was detected.", None

	# Gradio interface
	interface = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(type="filepath"),
	outputs=[gr.Textbox(label="Model Response"), gr.Audio(label="Response Audio")],
	title="Real-time ASR to Language Model Response",
	description="Upload an audio file in Urdu, get a text response from the model, and hear the response in English."
	)

	# Launch the Gradio Interface
	interface.launch()