whisper_fileStream

Runtime error

File size: 2,698 Bytes

c8eb530
546a5e2
 
 
 
9cdcc72


'''
This script calls the ada model from openai api to predict the next few words.
'''
import os
#import openai
import os
from pprint import pprint
os.system("pip install git+https://github.com/openai/whisper.git")
import gradio as gr
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time
# import streaming.py
# from next_word_prediction import GPT2




#gpt2 = AutoModelForCausalLM.from_pretrained("gpt2", return_dict_in_generate=True)
#tokenizer = AutoTokenizer.from_pretrained("gpt2")

### /code snippet


# get gpt2 model
generator = pipeline('text-generation', model='gpt2')

# whisper model specification 
model = whisper.load_model("tiny")


        
def inference(audio, state=""):

    #time.sleep(2)
    #text = p(audio)["text"]
    #state += text + " "
    # load audio data
    audio = whisper.load_audio(audio)
    # ensure sample is in correct format for inference
    audio = whisper.pad_or_trim(audio)

    # generate a log-mel spetrogram of the audio data
    mel = whisper.log_mel_spectrogram(audio).to(model.device)
    
    _, probs = model.detect_language(mel)

    # decode audio data
    options = whisper.DecodingOptions(fp16 = False)
    # transcribe speech to text
    result = whisper.decode(model, mel, options)

    PROMPT = """The following is a transcript of a conversation. Predict a few nouns, verbs, or adjectives that may be used next. Predict the next few words as a list of options. 
    A few examples are provided below and then the current transcript is provided.
    Examples:
    Transcript: Tomorrow night we're going out to 
    Next: The Movies, A Restaurant, A Baseball Game, The Theater, A Party   
    Transcript: I would like to order a cheeseburger with a side of
    Next: Fries, Milkshake, Apples, Salad, Katsup 
    """
    text = PROMPT + result.text
    
    openai.api_key = os.environ["Openai_APIkey"]
    
    response = openai.Completion.create(
                        model="text-ada-001",
                        prompt=text,
                        temperature=1,
                        max_tokens=4,
                        n=4)
    for i in range(4):
        print(response['choices'][i]['text'])
    

    # result.text
    #return getText, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
    return result.text, state, response



# get audio from microphone 

gr.Interface(
        fn=inference, 
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath"), 
        "state"
    ],
    outputs=[
        "textbox",
        "state",
        "textbox"
    ],
    live=True).launch()