File size: 6,242 Bytes
c8eb530
8ab0364
 
 
 
 
e912c09
9cdcc72
9f81930
9cdcc72
 
 
 
 
 
34ad65a
9cdcc72
d2b25d2
1575629
 
 
 
 
 
e1dedcd
 
9cdcc72
1575629
 
 
 
e1dedcd
1575629
e1dedcd
1575629
e1dedcd
 
1575629
3721bac
1575629
 
 
 
 
3721bac
1575629
 
 
20b392a
 
1575629
 
6da1c0b
1575629
 
 
17ef5ad
6f01fbc
5a4b240
c440214
 
20b392a
c440214
 
a458b72
c440214
9cdcc72
f350d00
20b392a
f350d00
dea7b6e
f617c7f
45ffe72
34ad65a
27053c2
bbff215
e5bae86
dcd5d11
f617c7f
 
d795229
e5bae86
f617c7f
deea3a0
c4563cf
 
e5bae86
4328889
3060edf
e5bae86
359c476
e1b34be
6f10fe5
ff0615d
4fc1e37
433885e
 
ff0615d
36f13d0
 
ef960af
 
 
 
743df42
4d77a90
743df42
 
dcd5d11
 
5206882
6a2ee5c
7d9e72a
dcd5d11
9b622fc
d795229
dcd5d11
 
 
 
 
 
04abba1
 
 
 
 
 
 
b2a7a90
 
 
04abba1
c02123b
04abba1
dcd5d11
 
 
 
 
 
72fa6e5
3999636
72fa6e5
 
6765628
 
 
 
 
72fa6e5
3999636
1120aac
04abba1
 
b2a7a90
3f39071
1251c48
 
 
 
f00c1c8
3721bac
8ab0364
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

'''
This script calls the model from openai api to predict the next few words in a conversation.
'''
import os
import sys
import openai
import gradio as gr
os.system("pip install git+https://github.com/openai/whisper.git")
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time
import pandas as pd


EXAMPLE_PROMPT = """This is a tool for helping someone with memory issues remember the next word. 
The predictions follow a few rules:
1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
2) The predictions do not repeat themselves.
3) The predictions focus on suggesting nouns, adjectives, and verbs.
4) The predictions are related to the context in the transcript.
5) The predictions are ordered from most likely to least likely.
6) Five unique predictions are made per transcript.
    
EXAMPLES:
Transcript: Tomorrow night we're going out to 
Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend   
Transcript: I would like to order a cheeseburger with a side of
Prediction: French fries, Milkshake, Apple slices, Side salad, Extra catsup 
Transcript: My friend Savanah is
Prediction: An electrical engineer, A marine biologist, A classical musician, A developer, A product manager 
Transcript: I need to buy a birthday
Prediction: Present, Gift, Cake, Card, balloon
Transcript:  """


# whisper model specification
asr_model = whisper.load_model("tiny")

openai.api_key = os.environ["Openai_APIkey"]

    
# Transcribe function
def transcribe(audio_file):
    print("Transcribing")
    transcription = asr_model.transcribe(audio_file)["text"]
    #transcription = asr_model.transcribe(audio_file)
    return transcription

def inference(audio, latest):
    # Transcribe with Whisper
    print("The audio is:", audio)
    transcript = transcribe(audio)

    if transcript != None:
        latest.append(transcript)
        #tscript = EXAMPLE_PROMPT + str(transcript) + "\nPrediction: "
        tscript = EXAMPLE_PROMPT + latest + "\nPrediction: "
    else: tscript = EXAMPLE_PROMPT

        
    print("tscript ------- ", tscript)
    
    response = openai.Completion.create(
                        model="text-davinci-003",
                        prompt=tscript,
                        temperature=0.8,
                        max_tokens=18,
                        n=5)

    #infers = []
    #infers = []
    temp = []
    inferred=[]
    
    for i in range(5):
        print("print1 ", response['choices'][i]['text'])
        temp.append(response['choices'][i]['text'])
        print("print2: infers ", inferred)
        print("print3: Responses ", response)
        print("Object type of response: ", type(response))
        #infered = list(map(lambda x: x.split(',')[0], infers))
        #print("Infered type is: ", type(infered))
        inferred = list(map(lambda x: x.replace("\n", ""), temp))
        #infered = list(map(lambda x: x.split(','), infers))

    infers = pd.Series(inferred)
    infersNew = infers.str.split(",", n=-1, expand=True) 
    print("USEAGE: ", response['usage']['completion_tokens'])

    #infers.drop_duplicates(keep='first', inplace=True)
    print("Infers DataType ", type(infers), "Infers after drop: ", infers, "Infers at 0: ", infers[0])
    res = []
    
    op1 = infersNew[0][0]
    op2 = infersNew[1][0]
    op3 = infersNew[2][0]
    try:
        op4 = infersNew[3][0]
    except KeyError:
         op4 = infersNew[0][1]
    try:
        op5 = infersNew[4][0]
    except KeyError:
         op5 = infersNew[1][1]
    
    
    print("INFERS TYPE: ", type(infers), "INFERS ", infers)

    convoState = latest
    #infersStr = str(infers)
        

    return transcript, op1, op2, op3, op4, op5, convoState
    
def appendPrediction(val, convoState):
    convoState.append(val)
    return convoState
    
# get audio from microphone 
with gr.Blocks() as face:
    
    with gr.Row():
        convoState = gr.State([""])
        with gr.Column():
            audio = gr.Audio(source="microphone", type="filepath")
            #promptText = gr.Textbox(lines=15, placeholder="Enter a prompt here")
            #dropChoice = gr.Dropdown(choices=["text-ada-001", "text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"], label="Model")
            #sliderChoice = gr.Slider(minimum=0.0, maximum=1.0, default=0.8, step=0.1, label="Temperature")
            transcribe_btn = gr.Button(value="Transcribe")
        with gr.Column():
            script = gr.Textbox(label="Transcribed text")
            #options = gr.Textbox(label="Predictions")
            option1 = gr.Button(value="  ")
            option2 = gr.Button(value="  ")
            option3 = gr.Button(value="  ")
            option4 = gr.Button(value="  ")
            option5 = gr.Button(value="  ")
            #options = gr.Dataset(components=[gr.Radio], samples=["One", "Two", "Three", "Four", "Five"])
            '''options = gr.Dataset(components=[gr.Textbox(visible=False)],
                label="Text Dataset",
                samples=[
                ["One"],
                ["Two"],
                ["Three"],
                ["Four"],
                ["Five"],
                ],
            )'''
            #options = gr.Radio(choices=["One", "Two", "Three", "Four", "Five"])
            latestConvo = gr.Textbox(label="Running conversation")
            #transcribe_btn.click(inference)
    transcribe_btn.click(fn=inference, inputs=[audio, convoState], outputs=[script, option1, option2, option3, option4, option5, latestConvo])
    option1.click(fn=appendPrediction, inputs=[option1, convoState], outputs=[latestConvo])
    option2.click(fn=appendPrediction, inputs=[option2, convoState], outputs=[latestConvo])
    option3.click(fn=appendPrediction, inputs=[option3, convoState], outputs=[latestConvo])
    option4.click(fn=appendPrediction, inputs=[option4, convoState], outputs=[latestConvo])
    option5.click(fn=appendPrediction, inputs=[option5, convoState], outputs=[latestConvo])
    #examples = gr.Examples(examples=["Sedan, Truck, SUV", "Dalmaion, Shepherd, Lab, Mutt"], inputs=[options])


face.launch()