whisper_fileStream

Running

App Files Files Community

whisper_fileStream / app.py

Firefly777a

Update app.py

4a60ca6 over 1 year ago

raw history blame

No virus

3.76 kB


	'''
	This script calls the model from openai api to predict the next few words.
	'''
	import os
	# os.system("pip install --upgrade pip")
	from pprint import pprint
	# os.system("pip install git+https://github.com/openai/whisper.git")
	import sys
	# print("Sys: ", sys.executable)
	# os.system("pip install openai")
	import openai
	import gradio as gr
	import whisper
	from transformers import pipeline
	import torch
	from transformers import AutoModelForCausalLM
	from transformers import AutoTokenizer
	import time

	EXAMPLE_PROMPT = """This is a tool for helping someone with memory issues remember the next word.

	The predictions follow a few rules:
	1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
	2) The predictions do not repeat themselves.
	3) The predictions focus on suggesting nouns, adjectives, and verbs.
	4) The predictions are related to the context in the transcript.

	EXAMPLES:
	Transcript: Tomorrow night we're going out to
	Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend
	Transcript: I would like to order a cheeseburger with a side of
	Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup
	Transcript: My friend Savanah is
	Prediction: An elecrical engineer, A marine biologist, A classical musician
	Transcript: I need to buy a birthday
	Prediction: Present, Gift, Cake, Card
	Transcript: """

	# whisper model specification
	model = whisper.load_model("tiny")

	# openai.api_key = os.environ["Openai_APIkey"]

	def debug_inference(audio, prompt, model, temperature, state=""):
	breakpoint()
	# load audio data
	audio = whisper.load_audio(audio)
	# ensure sample is in correct format for inference
	audio = whisper.pad_or_trim(audio)

	# generate a log-mel spetrogram of the audio data
	mel = whisper.log_mel_spectrogram(audio)

	_, probs = model.detect_language(mel)

	# decode audio data
	options = whisper.DecodingOptions(fp16 = False)
	# transcribe speech to text
	result = whisper.decode(model, mel, options)
	print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))

	text = prompt + result.text + "\nPrediction: "

	response = openai.Completion.create(
	model=model,
	prompt=text,
	temperature=temperature,
	max_tokens=8,
	n=5)

	infers = []
	temp = []
	infered=[]
	for i in range(5):
	print("print1 ", response['choices'][i]['text'])
	temp.append(response['choices'][i]['text'])
	print("print2: infers ", infers)
	print("print3: Responses ", response)
	print("Object type of response: ", type(response))
	#infered = list(map(lambda x: x.split(',')[0], infers))
	#print("Infered type is: ", type(infered))
	infers = list(map(lambda x: x.replace("\n", ""), temp))
	#infered = list(map(lambda x: x.split(','), infers))

	return result.text, state, infers, text

	# get audio from microphone
	gr.Interface(
	fn=debug_inference,
	inputs=[gr.inputs.Audio(source="microphone", type="filepath"),
	gr.inputs.Textbox(lines=15, placeholder="Enter a prompt here"),
	gr.inputs.Dropdown(["text-ada-001", "text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"], label="Model"),
	gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.8, step=0.1, label="Temperature"),
	"state"
	],
	outputs=["textbox","state","textbox", "textbox"],
	examples=[["example_in-the-mood-to-eat.m4a", EXAMPLE_PROMPT, "text-ada-001", 0.8, ""],["","","",0.9,""]],
	live=False).launch()