''' This script calls the ada model from openai api to predict the next few words. ''' import os os.system("pip install --upgrade pip") from pprint import pprint os.system("pip install git+https://github.com/openai/whisper.git") import sys print("Sys: ", sys.executable) os.system("pip install openai") import openai import gradio as gr import whisper from transformers import pipeline import torch from transformers import AutoModelForCausalLM from transformers import AutoTokenizer import time # import streaming.py # from next_word_prediction import GPT2 #gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True) #tokenizer = AutoTokenizer.from_pretrained("gpt2") ### /code snippet # get gpt2 model #generator = pipeline('text-generation', model='gpt2') # whisper model specification model = whisper.load_model("tiny") def inference(audio, state=""): #time.sleep(2) #text = p(audio)["text"] #state += text + " " # load audio data audio = whisper.load_audio(audio) # ensure sample is in correct format for inference audio = whisper.pad_or_trim(audio) # generate a log-mel spetrogram of the audio data mel = whisper.log_mel_spectrogram(audio).to(model.device) _, probs = model.detect_language(mel) # decode audio data options = whisper.DecodingOptions(fp16 = False) # transcribe speech to text result = whisper.decode(model, mel, options) print("result pre gp model from whisper: ", result, ".text ", result.text, "and the data type: ", type(result.text)) PROMPT = """The following is an incomplete transcript of a brief conversation. Predict a list of the next most probable words to complete the sentence. Some examples: Transcript1: Tomorrow night we're going out to Predictions1: the movies, a restaurant, a baseball game, the theater, a party for a friend Transcript2: I would like to order a cheeseburger with a side of Predictions2: french fries, milkshake, apple slices, salad, extra catsup Transcript3: My friend Savanah is Predictions3: an electrical engineer, a marine biologist, a classical musician Transcript4: I need to buy a birthday Predictions4: present, gift, cake, card Transcript5: """ text = PROMPT + result.text + "Prediction5: " openai.api_key = os.environ["Openai_APIkey"] response = openai.Completion.create( model="text-ada-001", #model="text-curie-001", prompt=text, temperature=1, max_tokens=8, n=5) infers = [] temp = [] infered=[] for i in range(5): print("print1 ", response['choices'][i]['text']) temp.append(response['choices'][i]['text']) print("print2: infers ", infers) print("print3: Responses ", response) print("Object type of response: ", type(response)) #infered = list(map(lambda x: x.split(',')[0], infers)) #print("Infered type is: ", type(infered)) infers = list(map(lambda x: x.replace("\n", ""), temp)) #infered = list(map(lambda x: x.split(','), infers)) # result.text #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True) return result.text, state, infers # get audio from microphone gr.Interface( fn=inference, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), "state" ], outputs=[ "textbox", "state", "textbox" ], live=True).launch()