Spaces:

Samarth991
/

LLAMA-QA-AudioFiles

Sleeping

App Files Files Community

LLAMA-QA-AudioFiles / whisper_app.py

Samarth991

application to run llama-7b on Audio files

2faf743 about 1 year ago

raw

history blame

2.7 kB

	import os
	import torch as th
	import whisper
	from whisper.audio import SAMPLE_RATE
	from tenacity import retry, wait_random
	import openai
	import requests
	import time
	# os.environ['OPENAI_API_KEY'] = "sk-<API KEY>"

	class WHISPERModel:
	def __init__(self, model_name='small', device='cuda',openai_flag=False):
	self.device = device
	self.openai_flag = openai_flag
	self.model = whisper.load_model(model_name, device=self.device)

	def get_info(self, audio_data, conv_duration=30):
	clip_audio = whisper.pad_or_trim(audio_data, length=SAMPLE_RATE * conv_duration)
	result = self.model.transcribe(clip_audio)
	return result['language']

	def speech_to_text(self, audio_path):
	self.logger.info("Reading url {}".format(audio_path))
	text_data = dict()
	audio_duration = 0
	conv_language = ""
	r = requests.get(audio_path)
	if r.status_code == 200:
	try:
	audio = whisper.load_audio(audio_path)
	conv_language = self.get_info(audio)
	if conv_language !='en':
	res = self.model.transcribe(audio,task='translate')
	if self.openai_flag:
	res['text'] = self.translate_text(res['text'], orginal_text=conv_language, convert_to='English')
	else:
	res = self.model.transcribe(audio)
	audio_duration = audio.shape[0] / SAMPLE_RATE
	text_data['text'] = res['text']
	text_data['duration'] = audio_duration
	text_data['language'] = conv_language
	except IOError as err:
	raise f"Issue in loading audio {audio_path}"
	else:
	raise("Unable to reach for URL {}".format(audio_path))
	return text_data



	@retry(wait=wait_random(min=5, max=10))
	def translate_text(self, text, orginal_text='ar', convert_to='english'):
	prompt = f'Translate the following {orginal_text} text to {convert_to}:\n\n{orginal_text}: ' + text + '\n{convert_to}:'
	# Generate response using ChatGPT
	response = openai.Completion.create(
	engine='text-davinci-003',
	prompt=prompt,
	max_tokens=100,
	n=1,
	stop=None,
	temperature=0.7
	)
	# Extract the translated English text from the response
	translation = response.choices[0].text.strip()
	return translation

	if __name__ == '__main__':
	url = "https://prypto-api.aswat.co/surveillance/recordings/5f53c28b-3504-4b8b-9db5-0c8b69a96233.mp3"
	audio2text = WHISPERModel()
	text = audio2text.speech_to_text(url)