''' This script calls the model from openai api to predict the next few words in a conversation. ''' import os import sys import openai import gradio as gr os.system("pip install git+https://github.com/openai/whisper.git") import whisper from transformers import pipeline import torch from transformers import AutoModelForCausalLM from transformers import AutoTokenizer import time import pandas as pd EXAMPLE_PROMPT = """This is a tool for helping someone with memory issues remember the next word. The predictions follow a few rules: 1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was. 2) The predictions do not repeat themselves. 3) The predictions focus on suggesting nouns, adjectives, and verbs. 4) The predictions are related to the context in the transcript. 5) The predictions are ordered from most likely to least likely. 6) Five unique predictions are made per transcript. EXAMPLES: Transcript: Tomorrow night we're going out to Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend Transcript: I would like to order a cheeseburger with a side of Prediction: French fries, Milkshake, Apple slices, Side salad, Extra catsup Transcript: My friend Savanah is Prediction: An electrical engineer, A marine biologist, A classical musician, A developer, A product manager Transcript: I need to buy a birthday Prediction: Present, Gift, Cake, Card, balloon Transcript: """ # whisper model specification asr_model = whisper.load_model("tiny") openai.api_key = os.environ["Openai_APIkey"] # Transcribe function def transcribe(audio_file): print("Transcribing") transcription = asr_model.transcribe(audio_file)["text"] #transcription = asr_model.transcribe(audio_file) return transcription def inference(audio, latest): # Transcribe with Whisper print("The audio is:", audio) transcript = transcribe(audio) if transcript != None: latest.append(transcript) tscript = EXAMPLE_PROMPT + str(transcript) + "\nPrediction: " else: tscript = EXAMPLE_PROMPT ptint("tscript ------- ", tscript) response = openai.Completion.create( model="text-davinci-003", prompt=tscript, temperature=0.8, max_tokens=18, n=5) #infers = [] #infers = [] temp = [] inferred=[] for i in range(5): print("print1 ", response['choices'][i]['text']) temp.append(response['choices'][i]['text']) print("print2: infers ", inferred) print("print3: Responses ", response) print("Object type of response: ", type(response)) #infered = list(map(lambda x: x.split(',')[0], infers)) #print("Infered type is: ", type(infered)) inferred = list(map(lambda x: x.replace("\n", ""), temp)) #infered = list(map(lambda x: x.split(','), infers)) infers = pd.Series(inferred) infersNew = infers.str.split(",", n=-1, expand=True) print("USEAGE: ", response['usage']['completion_tokens']) #infers.drop_duplicates(keep='first', inplace=True) print("Infers DataType ", type(infers), "Infers after drop: ", infers, "Infers at 0: ", infers[0]) res = [] op1 = infersNew[0][0] op2 = infersNew[1][0] op3 = infersNew[2][0] try: op4 = infersNew[3][0] except KeyError: op4 = infersNew[0][1] try: op5 = infersNew[4][0] except KeyError: op5 = infersNew[1][1] print("INFERS TYPE: ", type(infers), "INFERS ", infers) convoState = latest #infersStr = str(infers) return transcript, op1, op2, op3, op4, op5, convoState def appendPrediction(val, convoState): convoState.append(val) return convoState # get audio from microphone with gr.Blocks() as face: with gr.Row(): convoState = gr.State([""]) with gr.Column(): audio = gr.Audio(source="microphone", type="filepath") #promptText = gr.Textbox(lines=15, placeholder="Enter a prompt here") #dropChoice = gr.Dropdown(choices=["text-ada-001", "text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"], label="Model") #sliderChoice = gr.Slider(minimum=0.0, maximum=1.0, default=0.8, step=0.1, label="Temperature") transcribe_btn = gr.Button(value="Transcribe") with gr.Column(): script = gr.Textbox(label="Transcribed text") #options = gr.Textbox(label="Predictions") option1 = gr.Button(value=" ") option2 = gr.Button(value=" ") option3 = gr.Button(value=" ") option4 = gr.Button(value=" ") option5 = gr.Button(value=" ") #options = gr.Dataset(components=[gr.Radio], samples=["One", "Two", "Three", "Four", "Five"]) '''options = gr.Dataset(components=[gr.Textbox(visible=False)], label="Text Dataset", samples=[ ["One"], ["Two"], ["Three"], ["Four"], ["Five"], ], )''' #options = gr.Radio(choices=["One", "Two", "Three", "Four", "Five"]) latestConvo = gr.Textbox(label="Running conversation") #transcribe_btn.click(inference) transcribe_btn.click(fn=inference, inputs=[audio, convoState], outputs=[script, option1, option2, option3, option4, option5, latestConvo]) option1.click(fn=appendPrediction, inputs=[option1, convoState], outputs=[latestConvo]) option2.click(fn=appendPrediction, inputs=[option2, convoState], outputs=[latestConvo]) option3.click(fn=appendPrediction, inputs=[option3, convoState], outputs=[latestConvo]) option4.click(fn=appendPrediction, inputs=[option4, convoState], outputs=[latestConvo]) option5.click(fn=appendPrediction, inputs=[option5, convoState], outputs=[latestConvo]) #examples = gr.Examples(examples=["Sedan, Truck, SUV", "Dalmaion, Shepherd, Lab, Mutt"], inputs=[options]) face.launch()