''' This script calls the ada model from openai api to predict the next few words. ''' import os #import openai import os from pprint import pprint os.system("pip install git+https://github.com/openai/whisper.git") import gradio as gr import whisper from transformers import pipeline import torch from transformers import AutoModelForCausalLM from transformers import AutoTokenizer import time # import streaming.py # from next_word_prediction import GPT2 #gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True) #tokenizer = AutoTokenizer.from_pretrained("gpt2") ### /code snippet # get gpt2 model generator = pipeline('text-generation', model='gpt2') # whisper model specification model = whisper.load_model("tiny") def inference(audio, state=""): #time.sleep(2) #text = p(audio)["text"] #state += text + " " # load audio data audio = whisper.load_audio(audio) # ensure sample is in correct format for inference audio = whisper.pad_or_trim(audio) # generate a log-mel spetrogram of the audio data mel = whisper.log_mel_spectrogram(audio).to(model.device) _, probs = model.detect_language(mel) # decode audio data options = whisper.DecodingOptions(fp16 = False) # transcribe speech to text result = whisper.decode(model, mel, options) PROMPT = """The following is a transcript of a conversation. Predict a few nouns, verbs, or adjectives that may be used next. Predict the next few words as a list of options. A few examples are provided below and then the current transcript is provided. Examples: Transcript: Tomorrow night we're going out to Next: The Movies, A Restaurant, A Baseball Game, The Theater, A Party Transcript: I would like to order a cheeseburger with a side of Next: Fries, Milkshake, Apples, Salad, Katsup """ text = PROMPT + result.text openai.api_key = os.environ["Openai_APIkey"] response = openai.Completion.create( model="text-ada-001", prompt=text, temperature=1, max_tokens=4, n=4) for i in range(4): print(response['choices'][i]['text']) # result.text #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True) return result.text, state, response # get audio from microphone gr.Interface( fn=inference, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), "state" ], outputs=[ "textbox", "state", "textbox" ], live=True).launch()