Spaces:
Runtime error
Runtime error
''' | |
This script calls the model from openai api to predict the next few words in a conversation. | |
''' | |
import os | |
import sys | |
import openai | |
import gradio as gr | |
os.system("pip install git+https://github.com/openai/whisper.git") | |
import whisper | |
from transformers import pipeline | |
import torch | |
from transformers import AutoModelForCausalLM | |
from transformers import AutoTokenizer | |
import time | |
import pandas as pd | |
EXAMPLE_PROMPT = """This is a tool for helping someone with memory issues remember the next word. | |
The predictions follow a few rules: | |
1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was. | |
2) The predictions do not repeat themselves. | |
3) The predictions focus on suggesting nouns, adjectives, and verbs. | |
4) The predictions are related to the context in the transcript. | |
5) The predictions are ordered from most likely to least likely. | |
6) Five unique predictions are made per transcript. | |
EXAMPLES: | |
Transcript: Tomorrow night we're going out to | |
Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend | |
Transcript: I would like to order a cheeseburger with a side of | |
Prediction: French fries, Milkshake, Apple slices, Side salad, Extra catsup | |
Transcript: My friend Savanah is | |
Prediction: An electrical engineer, A marine biologist, A classical musician, A developer, A product manager | |
Transcript: I need to buy a birthday | |
Prediction: Present, Gift, Cake, Card, balloon | |
Transcript: """ | |
# whisper model specification | |
asr_model = whisper.load_model("tiny") | |
openai.api_key = os.environ["Openai_APIkey"] | |
# Transcribe function | |
def transcribe(audio_file): | |
print("Transcribing") | |
transcription = asr_model.transcribe(audio_file)["text"] | |
#transcription = asr_model.transcribe(audio_file) | |
return transcription | |
def inference(audio, latest): | |
# Transcribe with Whisper | |
print("The audio is:", audio) | |
transcript = transcribe(audio) | |
if transcript != None: | |
latest.append(transcript) | |
tscript = EXAMPLE_PROMPT + str(transcript) + "\nPrediction: " | |
else: tscript = EXAMPLE_PROMPT | |
ptint("tscript ------- ", tscript) | |
response = openai.Completion.create( | |
model="text-davinci-003", | |
prompt=tscript, | |
temperature=0.8, | |
max_tokens=18, | |
n=5) | |
#infers = [] | |
#infers = [] | |
temp = [] | |
inferred=[] | |
for i in range(5): | |
print("print1 ", response['choices'][i]['text']) | |
temp.append(response['choices'][i]['text']) | |
print("print2: infers ", inferred) | |
print("print3: Responses ", response) | |
print("Object type of response: ", type(response)) | |
#infered = list(map(lambda x: x.split(',')[0], infers)) | |
#print("Infered type is: ", type(infered)) | |
inferred = list(map(lambda x: x.replace("\n", ""), temp)) | |
#infered = list(map(lambda x: x.split(','), infers)) | |
infers = pd.Series(inferred) | |
infersNew = infers.str.split(",", n=-1, expand=True) | |
print("USEAGE: ", response['usage']['completion_tokens']) | |
#infers.drop_duplicates(keep='first', inplace=True) | |
print("Infers DataType ", type(infers), "Infers after drop: ", infers, "Infers at 0: ", infers[0]) | |
res = [] | |
op1 = infersNew[0][0] | |
op2 = infersNew[1][0] | |
op3 = infersNew[2][0] | |
try: | |
op4 = infersNew[3][0] | |
except KeyError: | |
op4 = infersNew[0][1] | |
try: | |
op5 = infersNew[4][0] | |
except KeyError: | |
op5 = infersNew[1][1] | |
print("INFERS TYPE: ", type(infers), "INFERS ", infers) | |
convoState = latest | |
#infersStr = str(infers) | |
return transcript, op1, op2, op3, op4, op5, convoState | |
def appendPrediction(val, convoState): | |
convoState.append(val) | |
return convoState | |
# get audio from microphone | |
with gr.Blocks() as face: | |
with gr.Row(): | |
convoState = gr.State([""]) | |
with gr.Column(): | |
audio = gr.Audio(source="microphone", type="filepath") | |
#promptText = gr.Textbox(lines=15, placeholder="Enter a prompt here") | |
#dropChoice = gr.Dropdown(choices=["text-ada-001", "text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"], label="Model") | |
#sliderChoice = gr.Slider(minimum=0.0, maximum=1.0, default=0.8, step=0.1, label="Temperature") | |
transcribe_btn = gr.Button(value="Transcribe") | |
with gr.Column(): | |
script = gr.Textbox(label="Transcribed text") | |
#options = gr.Textbox(label="Predictions") | |
option1 = gr.Button(value=" ") | |
option2 = gr.Button(value=" ") | |
option3 = gr.Button(value=" ") | |
option4 = gr.Button(value=" ") | |
option5 = gr.Button(value=" ") | |
#options = gr.Dataset(components=[gr.Radio], samples=["One", "Two", "Three", "Four", "Five"]) | |
'''options = gr.Dataset(components=[gr.Textbox(visible=False)], | |
label="Text Dataset", | |
samples=[ | |
["One"], | |
["Two"], | |
["Three"], | |
["Four"], | |
["Five"], | |
], | |
)''' | |
#options = gr.Radio(choices=["One", "Two", "Three", "Four", "Five"]) | |
latestConvo = gr.Textbox(label="Running conversation") | |
#transcribe_btn.click(inference) | |
transcribe_btn.click(fn=inference, inputs=[audio, convoState], outputs=[script, option1, option2, option3, option4, option5, latestConvo]) | |
option1.click(fn=appendPrediction, inputs=[option1, convoState], outputs=[latestConvo]) | |
option2.click(fn=appendPrediction, inputs=[option2, convoState], outputs=[latestConvo]) | |
option3.click(fn=appendPrediction, inputs=[option3, convoState], outputs=[latestConvo]) | |
option4.click(fn=appendPrediction, inputs=[option4, convoState], outputs=[latestConvo]) | |
option5.click(fn=appendPrediction, inputs=[option5, convoState], outputs=[latestConvo]) | |
#examples = gr.Examples(examples=["Sedan, Truck, SUV", "Dalmaion, Shepherd, Lab, Mutt"], inputs=[options]) | |
face.launch() |