''' This script calls the ada model from openai api to predict the next few words. ''' import os os.system("pip install --upgrade pip") from pprint import pprint os.system("pip install git+https://github.com/openai/whisper.git") import openai import gradio as gr import whisper from transformers import pipeline import torch from transformers import AutoModelForCausalLM from transformers import AutoTokenizer import time # import streaming.py # from next_word_prediction import GPT2 #gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True) #tokenizer = AutoTokenizer.from_pretrained("gpt2") ### /code snippet # get gpt2 model generator = pipeline('text-generation', model='gpt2') # whisper model specification model = whisper.load_model("tiny") def inference(audio, state=""): #time.sleep(2) #text = p(audio)["text"] #state += text + " " # load audio data audio = whisper.load_audio(audio) # ensure sample is in correct format for inference audio = whisper.pad_or_trim(audio) # generate a log-mel spetrogram of the audio data mel = whisper.log_mel_spectrogram(audio).to(model.device) _, probs = model.detect_language(mel) # decode audio data options = whisper.DecodingOptions(fp16 = False) # transcribe speech to text result = whisper.decode(model, mel, options) print("result pre gp model from whisper: ", result, ".text ", result.text) PROMPT = """The following is a transcript of a conversation. Predict the next few words in the conversation as a List of options. A few examples are provided below and then the current transcript is provided. Examples: Transcript: Tomorrow night we're going out to Next: The Movies to watch a blockbuster, A Restaurant for dinner, A Baseball Game, The Theater, A Party for a friend Transcript: I would like to order a cheeseburger with a side of Next: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup Transcript: My friend Savanah is Next: An elecrical engineer, A marine biologist, A classical musician Transcript: I need to buy a birthday Next: Present, Gift, Cake, Card """ text = PROMPT + result.text openai.api_key = os.environ["Openai_APIkey"] response = openai.Completion.create( model="text-ada-001", #model="text-curie-001", prompt=text, temperature=0.3, max_tokens=8, n=5) infers =[] for i in range(5): print("print1 ", response['choices'][i]['text']) infers.append(response['choices'][i]['text']) print("print2: infers ", infers) print("print3: Responses ", response) print("Object type of response: ", type(response)) # result.text #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True) return result.text, state, infers # get audio from microphone gr.Interface( fn=inference, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), "state" ], outputs=[ "textbox", "state", "textbox" ], live=True).launch()