whisper_fileStream

Runtime error

File size: 3,304 Bytes

c8eb530
546a5e2
 
 
 
dbdec86
9cdcc72
 
1e60dab
9cdcc72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
caf9da2
9cdcc72
7a3e7bd
9cdcc72
 
 
1f4b646
9cdcc72
1f4b646
3667eab
1f4b646
3667eab
1f4b646
9cdcc72
 
 
 
 
 
3667eab
 
9cdcc72
359f955
e139dcd
f617c7f
d2fbcad
f617c7f
 
4bbda62
f617c7f
 
deea3a0
4bbda62
9cdcc72
 
 
 
4bbda62
9cdcc72


'''
This script calls the ada model from openai api to predict the next few words.
'''
import os
os.system("pip install --upgrade pip")
from pprint import pprint
os.system("pip install git+https://github.com/openai/whisper.git")
import openai
import gradio as gr
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time
# import streaming.py
# from next_word_prediction import GPT2




#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
#tokenizer = AutoTokenizer.from_pretrained("gpt2")

### /code snippet


# get gpt2 model
generator = pipeline('text-generation', model='gpt2')

# whisper model specification 
model = whisper.load_model("tiny")


        
def inference(audio, state=""):

    #time.sleep(2)
    #text = p(audio)["text"]
    #state += text + " "
    # load audio data
    audio = whisper.load_audio(audio)
    # ensure sample is in correct format for inference
    audio = whisper.pad_or_trim(audio)

    # generate a log-mel spetrogram of the audio data
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    
    _, probs = model.detect_language(mel)

    # decode audio data
    options = whisper.DecodingOptions(fp16 = False)
    # transcribe speech to text
    result = whisper.decode(model, mel, options)
    print("result pre gp model from whisper: ", result, ".text ", result.text)

    PROMPT = """The following is a transcript of a conversation. Predict the next few words in the conversation as a List of options. 
    A few examples are provided below and then the current transcript is provided.
    Examples:
    Transcript: Tomorrow night we're going out to 
    Next: The Movies to watch a blockbuster, A Restaurant for dinner, A Baseball Game, The Theater, A Party for a friend   
    Transcript: I would like to order a cheeseburger with a side of
    Next: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup 
    Transcript: My friend Savanah is
    Next: An elecrical engineer, A marine biologist, A classical musician 
    Transcript: I need to buy a birthday
    Next: Present, Gift, Cake, Card
    """
    text = PROMPT + result.text
    
    openai.api_key = os.environ["Openai_APIkey"]
    
    response = openai.Completion.create(
                        model="text-ada-001",
                        #model="text-curie-001",
                        prompt=text,
                        temperature=0.3,
                        max_tokens=8,
                        n=5)
    infers =[]
    for i in range(5):
        print("print1 ", response['choices'][i]['text'])
        infers.append(response['choices'][i]['text'])
        print("print2: infers ", infers)
        print("print3: Responses ", response)
        print("Object type of response: ", type(response))
        
    

    # result.text
    #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
    return result.text, state, infers



# get audio from microphone 

gr.Interface(
        fn=inference, 
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath"), 
        "state"
    ],
    outputs=[
        "textbox",
        "state",
        "textbox"
    ],
    live=True).launch()