File size: 2,698 Bytes
c8eb530
546a5e2
 
 
 
9cdcc72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

'''
This script calls the ada model from openai api to predict the next few words.
'''
import os
#import openai
import os
from pprint import pprint
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time
# import streaming.py
# from next_word_prediction import GPT2




#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
#tokenizer = AutoTokenizer.from_pretrained("gpt2")

### /code snippet


# get gpt2 model
generator = pipeline('text-generation', model='gpt2')

# whisper model specification 
model = whisper.load_model("tiny")


        
def inference(audio, state=""):

    #time.sleep(2)
    #text = p(audio)["text"]
    #state += text + " "
    # load audio data
    audio = whisper.load_audio(audio)
    # ensure sample is in correct format for inference
    audio = whisper.pad_or_trim(audio)

    # generate a log-mel spetrogram of the audio data
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    
    _, probs = model.detect_language(mel)

    # decode audio data
    options = whisper.DecodingOptions(fp16 = False)
    # transcribe speech to text
    result = whisper.decode(model, mel, options)

    PROMPT = """The following is a transcript of a conversation. Predict a few nouns, verbs, or adjectives that may be used next. Predict the next few words as a list of options. 
    A few examples are provided below and then the current transcript is provided.
    Examples:
    Transcript: Tomorrow night we're going out to 
    Next: The Movies, A Restaurant, A Baseball Game, The Theater, A Party   
    Transcript: I would like to order a cheeseburger with a side of
    Next: Fries, Milkshake, Apples, Salad, Katsup 
    """
    text = PROMPT + result.text
    
    openai.api_key = os.environ["Openai_APIkey"]
    
    response = openai.Completion.create(
                        model="text-ada-001",
                        prompt=text,
                        temperature=1,
                        max_tokens=4,
                        n=4)
    for i in range(4):
        print(response['choices'][i]['text'])
    

    # result.text
    #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
    return result.text, state, response



# get audio from microphone 

gr.Interface(
        fn=inference, 
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath"), 
        "state"
    ],
    outputs=[
        "textbox",
        "state",
        "textbox"
    ],
    live=True).launch()