whisper_fileStream

Runtime error

App Files Files Community

whisper_fileStream / app.py

mskov

Update app.py

9cdcc72 over 1 year ago

raw

history blame

2.7 kB


	'''
	This script calls the ada model from openai api to predict the next few words.
	'''
	import os
	#import openai
	import os
	from pprint import pprint
	os.system("pip install git+https://github.com/openai/whisper.git")
	import gradio as gr
	import whisper
	from transformers import pipeline
	import torch
	from transformers import AutoModelForCausalLM
	from transformers import AutoTokenizer
	import time
	# import streaming.py
	# from next_word_prediction import GPT2




	#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
	#tokenizer = AutoTokenizer.from_pretrained("gpt2")

	### /code snippet


	# get gpt2 model
	generator = pipeline('text-generation', model='gpt2')

	# whisper model specification
	model = whisper.load_model("tiny")



	def inference(audio, state=""):

	#time.sleep(2)
	#text = p(audio)["text"]
	#state += text + " "
	# load audio data
	audio = whisper.load_audio(audio)
	# ensure sample is in correct format for inference
	audio = whisper.pad_or_trim(audio)

	# generate a log-mel spetrogram of the audio data
	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	_, probs = model.detect_language(mel)

	# decode audio data
	options = whisper.DecodingOptions(fp16 = False)
	# transcribe speech to text
	result = whisper.decode(model, mel, options)

	PROMPT = """The following is a transcript of a conversation. Predict a few nouns, verbs, or adjectives that may be used next. Predict the next few words as a list of options.
	A few examples are provided below and then the current transcript is provided.
	Examples:
	Transcript: Tomorrow night we're going out to
	Next: The Movies, A Restaurant, A Baseball Game, The Theater, A Party
	Transcript: I would like to order a cheeseburger with a side of
	Next: Fries, Milkshake, Apples, Salad, Katsup
	"""
	text = PROMPT + result.text

	openai.api_key = os.environ["Openai_APIkey"]

	response = openai.Completion.create(
	model="text-ada-001",
	prompt=text,
	temperature=1,
	max_tokens=4,
	n=4)
	for i in range(4):
	print(response['choices'][i]['text'])


	# result.text
	#return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
	return result.text, state, response



	# get audio from microphone

	gr.Interface(
	fn=inference,
	inputs=[
	gr.inputs.Audio(source="microphone", type="filepath"),
	"state"
	],
	outputs=[
	"textbox",
	"state",
	"textbox"
	],
	live=True).launch()