''' This script calls the model from openai api to predict the next few words. ''' import os # os.system("pip install --upgrade pip") from pprint import pprint # os.system("pip install git+https://github.com/openai/whisper.git") import sys # print("Sys: ", sys.executable) # os.system("pip install openai") import openai import gradio as gr import whisper from transformers import pipeline import torch from transformers import AutoModelForCausalLM from transformers import AutoTokenizer import time EXAMPLE_PROMPT = """This is a tool for helping someone with memory issues remember the next word. The predictions follow a few rules: 1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was. 2) The predictions do not repeat themselves. 3) The predictions focus on suggesting nouns, adjectives, and verbs. 4) The predictions are related to the context in the transcript. EXAMPLES: Transcript: Tomorrow night we're going out to Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend Transcript: I would like to order a cheeseburger with a side of Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup Transcript: My friend Savanah is Prediction: An elecrical engineer, A marine biologist, A classical musician Transcript: I need to buy a birthday Prediction: Present, Gift, Cake, Card Transcript: """ # whisper model specification model = whisper.load_model("tiny") # openai.api_key = os.environ["Openai_APIkey"] def debug_inference(audio, prompt, model, temperature, state=""): breakpoint() # load audio data audio = whisper.load_audio(audio) # ensure sample is in correct format for inference audio = whisper.pad_or_trim(audio) # generate a log-mel spetrogram of the audio data mel = whisper.log_mel_spectrogram(audio) _, probs = model.detect_language(mel) # decode audio data options = whisper.DecodingOptions(fp16 = False) # transcribe speech to text result = whisper.decode(model, mel, options) print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text)) text = prompt + result.text + "\nPrediction: " response = openai.Completion.create( model=model, prompt=text, temperature=temperature, max_tokens=8, n=5) infers = [] temp = [] infered=[] for i in range(5): print("print1 ", response['choices'][i]['text']) temp.append(response['choices'][i]['text']) print("print2: infers ", infers) print("print3: Responses ", response) print("Object type of response: ", type(response)) #infered = list(map(lambda x: x.split(',')[0], infers)) #print("Infered type is: ", type(infered)) infers = list(map(lambda x: x.replace("\n", ""), temp)) #infered = list(map(lambda x: x.split(','), infers)) return result.text, state, infers, text # get audio from microphone gr.Interface( fn=debug_inference, inputs=[gr.inputs.Audio(source="microphone", type="filepath"), gr.inputs.Textbox(lines=15, placeholder="Enter a prompt here"), gr.inputs.Dropdown(["text-ada-001", "text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"], label="Model"), gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.8, step=0.1, label="Temperature"), "state" ], outputs=["textbox","state","textbox", "textbox"], examples=[["example_in-the-mood-to-eat.m4a", EXAMPLE_PROMPT, "text-ada-001", 0.8, ""],["","","",0.9,""]], live=False).launch()