whisper_fileStream

Runtime error

App Files Files Community

whisper_fileStream / app.py

mskov

Update app.py

8c29eb3 over 1 year ago

raw

history blame

No virus

3.87 kB


	'''
	This script calls the ada model from openai api to predict the next few words.
	'''
	import os
	os.system("pip install --upgrade pip")
	from pprint import pprint
	os.system("pip install git+https://github.com/openai/whisper.git")
	import sys
	print("Sys: ", sys.executable)
	os.system("pip install openai")
	import openai
	import gradio as gr
	import whisper
	from transformers import pipeline
	import torch
	from transformers import AutoModelForCausalLM
	from transformers import AutoTokenizer
	import time
	# import streaming.py
	# from next_word_prediction import GPT2




	#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
	#tokenizer = AutoTokenizer.from_pretrained("gpt2")

	### /code snippet


	# get gpt2 model
	#generator = pipeline('text-generation', model='gpt2')

	# whisper model specification
	model = whisper.load_model("tiny")



	def inference(audio, state=""):

	#time.sleep(2)
	#text = p(audio)["text"]
	#state += text + " "
	# load audio data
	audio = whisper.load_audio(audio)
	# ensure sample is in correct format for inference
	audio = whisper.pad_or_trim(audio)

	# generate a log-mel spetrogram of the audio data
	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	_, probs = model.detect_language(mel)

	# decode audio data
	options = whisper.DecodingOptions(fp16 = False)
	# transcribe speech to text
	result = whisper.decode(model, mel, options)
	print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))

	PROMPT = """The following is an incomplete transcript of a brief conversation.
	Predict the next few words int he transcript to complete the sentence.
	A few examples of transcripts and predictions are provided below:
	Transcript: Tomorrow night we're going out to
	Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend
	Transcript: I would like to order a cheeseburger with a side of
	Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup
	Transcript: My friend Savanah is
	Prediction: An elecrical engineer, A marine biologist, A classical musician
	Transcript: I need to buy a birthday
	Prediction: Present, Gift, Cake, Card
	Given these examples, predict the next few words in the following sentence:
	"""
	text = PROMPT + result.text

	openai.api_key = os.environ["Openai_APIkey"]

	response = openai.Completion.create(
	model="text-ada-001",
	#model="text-curie-001",
	prompt=text,
	temperature=0.9,
	max_tokens=8,
	n=5)

	infers = []
	temp = []
	infered=[]
	for i in range(5):
	print("print1 ", response['choices'][i]['text'])
	temp.append(response['choices'][i]['text'])
	print("print2: infers ", infers)
	print("print3: Responses ", response)
	print("Object type of response: ", type(response))
	#infered = list(map(lambda x: x.split(',')[0], infers))
	#print("Infered type is: ", type(infered))
	infers = list(map(lambda x: x.replace("\n", ""), temp))
	infered = list(map(lambda x: x.split(','), infers))



	tempStr = str(infers)
	infer = tempStr.split(",")
	print("Infer type is: ", type(infer))

	# result.text
	#return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
	return result.text, state, infered[3]



	# get audio from microphone

	gr.Blocks(
	fn=inference,
	inputs=[
	gr.inputs.Audio(source="microphone", type="filepath"),
	"state"
	],
	outputs=[
	"textbox",
	"state",
	"textbox"
	],
	live=True).launch()