mskov's picture
Update app.py
ed8df2e
raw
history blame
No virus
3.63 kB
'''
This script calls the ada model from openai api to predict the next few words.
'''
import os
os.system("pip install --upgrade pip")
from pprint import pprint
os.system("pip install git+https://github.com/openai/whisper.git")
import sys
print("Sys: ", sys.executable)
os.system("pip install openai")
import openai
import gradio as gr
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time
# import streaming.py
# from next_word_prediction import GPT2
#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
#tokenizer = AutoTokenizer.from_pretrained("gpt2")
### /code snippet
# get gpt2 model
#generator = pipeline('text-generation', model='gpt2')
# whisper model specification
model = whisper.load_model("tiny")
def inference(audio, state=""):
#time.sleep(2)
#text = p(audio)["text"]
#state += text + " "
# load audio data
audio = whisper.load_audio(audio)
# ensure sample is in correct format for inference
audio = whisper.pad_or_trim(audio)
# generate a log-mel spetrogram of the audio data
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
# decode audio data
options = whisper.DecodingOptions(fp16 = False)
# transcribe speech to text
result = whisper.decode(model, mel, options)
print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text))
PROMPT = """The following is an incomplete transcript of a brief conversation. Predict a list of the next most probable words to complete the sentence.
Some examples:
Transcript1: Tomorrow night we're going out to
Predictions1: the movies, a restaurant, a baseball game, the theater, a party for a friend
Transcript2: I would like to order a cheeseburger with a side of
Predictions2: french fries, milkshake, apple slices, salad, extra catsup
Transcript3: My friend Savanah is
Predictions3: an electrical engineer, a marine biologist, a classical musician
Transcript4: I need to buy a birthday
Predictions4: present, gift, cake, card
Transcript5: """
text = PROMPT + result.text + "Prediction5: "
openai.api_key = os.environ["Openai_APIkey"]
response = openai.Completion.create(
model="text-ada-001",
#model="text-curie-001",
prompt=text,
temperature=1,
max_tokens=8,
n=5)
infers = []
temp = []
infered=[]
for i in range(5):
print("print1 ", response['choices'][i]['text'])
temp.append(response['choices'][i]['text'])
print("print2: infers ", infers)
print("print3: Responses ", response)
print("Object type of response: ", type(response))
#infered = list(map(lambda x: x.split(',')[0], infers))
#print("Infered type is: ", type(infered))
infers = list(map(lambda x: x.replace("\n", ""), temp))
#infered = list(map(lambda x: x.split(','), infers))
# result.text
#return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
return result.text, state, infers
# get audio from microphone
gr.Interface(
fn=inference,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath"),
"state"
],
outputs=[
"textbox",
"state",
"textbox"
],
live=True).launch()