whisper_fileStream

Runtime error

App Files Files Community

whisper_fileStream / app.py

mskov

Update app.py

ed8df2e over 1 year ago

raw

history blame

No virus

3.63 kB


	'''
	This script calls the ada model from openai api to predict the next few words.
	'''
	import os
	os.system("pip install --upgrade pip")
	from pprint import pprint
	os.system("pip install git+https://github.com/openai/whisper.git")
	import sys
	print("Sys: ", sys.executable)
	os.system("pip install openai")
	import openai
	import gradio as gr
	import whisper
	from transformers import pipeline
	import torch
	from transformers import AutoModelForCausalLM
	from transformers import AutoTokenizer
	import time
	# import streaming.py
	# from next_word_prediction import GPT2




	#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
	#tokenizer = AutoTokenizer.from_pretrained("gpt2")

	### /code snippet


	# get gpt2 model
	#generator = pipeline('text-generation', model='gpt2')

	# whisper model specification
	model = whisper.load_model("tiny")



	def inference(audio, state=""):

	#time.sleep(2)
	#text = p(audio)["text"]
	#state += text + " "
	# load audio data
	audio = whisper.load_audio(audio)
	# ensure sample is in correct format for inference
	audio = whisper.pad_or_trim(audio)

	# generate a log-mel spetrogram of the audio data
	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	_, probs = model.detect_language(mel)

	# decode audio data
	options = whisper.DecodingOptions(fp16 = False)
	# transcribe speech to text
	result = whisper.decode(model, mel, options)
	print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))

	PROMPT = """The following is an incomplete transcript of a brief conversation. Predict a list of the next most probable words to complete the sentence.
	Some examples:
	Transcript1: Tomorrow night we're going out to
	Predictions1: the movies, a restaurant, a baseball game, the theater, a party for a friend
	Transcript2: I would like to order a cheeseburger with a side of
	Predictions2: french fries, milkshake, apple slices, salad, extra catsup
	Transcript3: My friend Savanah is
	Predictions3: an electrical engineer, a marine biologist, a classical musician
	Transcript4: I need to buy a birthday
	Predictions4: present, gift, cake, card

	Transcript5: """
	text = PROMPT + result.text + "Prediction5: "

	openai.api_key = os.environ["Openai_APIkey"]

	response = openai.Completion.create(
	model="text-ada-001",
	#model="text-curie-001",
	prompt=text,
	temperature=1,
	max_tokens=8,
	n=5)

	infers = []
	temp = []
	infered=[]
	for i in range(5):
	print("print1 ", response['choices'][i]['text'])
	temp.append(response['choices'][i]['text'])
	print("print2: infers ", infers)
	print("print3: Responses ", response)
	print("Object type of response: ", type(response))
	#infered = list(map(lambda x: x.split(',')[0], infers))
	#print("Infered type is: ", type(infered))
	infers = list(map(lambda x: x.replace("\n", ""), temp))
	#infered = list(map(lambda x: x.split(','), infers))




	# result.text
	#return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
	return result.text, state, infers



	# get audio from microphone

	gr.Interface(
	fn=inference,
	inputs=[
	gr.inputs.Audio(source="microphone", type="filepath"),
	"state"
	],
	outputs=[
	"textbox",
	"state",
	"textbox"
	],
	live=True).launch()