whisper_fileStream

Runtime error

File size: 4,077 Bytes

c8eb530
8ab0364
 
 
 
 
e912c09
9cdcc72
9f81930
9cdcc72
 
 
 
 
 
 
d2b25d2
1575629
 
 
 
 
 
9cdcc72
1575629
 
 
 
 
 
 
 
 
 
 
3721bac
1575629
 
 
 
 
3721bac
1575629
 
 
 
 
 
17ef5ad
1575629
 
 
17ef5ad
6f01fbc
5a4b240
9cdcc72
1575629
9cdcc72
 
1575629
9cdcc72
1575629
e139dcd
f617c7f
45ffe72
0a1b459
d795229
8ab0364
f617c7f
 
d795229
f617c7f
 
deea3a0
c4563cf
 
dea79d3
4328889
7d9e72a
 
 
9b622fc
d795229
7d9e72a
8ab0364
04abba1
 
 
 
 
 
 
 
 
 
 
 
 
21207ca
f58257c
1120aac
04abba1
 
c891e39
f00c1c8
3721bac
8ab0364


'''
This script calls the model from openai api to predict the next few words in a conversation.
'''
import os
import sys
import openai
import gradio as gr
os.system("pip install git+https://github.com/openai/whisper.git")
import whisper
from transformers import pipeline
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import time


EXAMPLE_PROMPT = """This is a tool for helping someone with memory issues remember the next word. 
The predictions follow a few rules:
1) The predictions are suggestions of ways to continue the transcript as if someone forgot what the next word was.
2) The predictions do not repeat themselves.
3) The predictions focus on suggesting nouns, adjectives, and verbs.
4) The predictions are related to the context in the transcript.
    
EXAMPLES:
Transcript: Tomorrow night we're going out to 
Prediction: The Movies, A Restaurant, A Baseball Game, The Theater, A Party for a friend   
Transcript: I would like to order a cheeseburger with a side of
Prediction: Frnech fries, Milkshake, Apple slices, Side salad, Extra katsup 
Transcript: My friend Savanah is
Prediction: An elecrical engineer, A marine biologist, A classical musician 
Transcript: I need to buy a birthday
Prediction: Present, Gift, Cake, Card
Transcript: """


# whisper model specification
asr_model = whisper.load_model("tiny")

openai.api_key = os.environ["Openai_APIkey"]

    
# Transcribe function
def transcribe(audio_file):
    print("Transcribing")
    transcription = asr_model.transcribe(audio_file)["text"]
    return transcription

def inference(audio, prompt, model, temperature, latest):
    # Transcribe with Whisper
    print("The audio is:", audio)
    transcript = transcribe(audio)

    if transcript != None:
        latest.append(transcript)
    
    text = prompt + transcript + "\nPrediction: "
    
    response = openai.Completion.create(
                        model=model,
                        prompt=text,
                        temperature=temperature,
                        max_tokens=8,
                        n=5)

    infers = []
    temp = []
    #infered=[]
    for i in range(5):
        print("print1 ", response['choices'][i]['text'])
        temp.append(response['choices'][i]['text'])
        print("print2: infers ", infers)
        print("print3: Responses ", response)
        print("Object type of response: ", type(response))
        #infered = list(map(lambda x: x.split(',')[0], infers))
        #print("Infered type is: ", type(infered))
        infers = list(map(lambda x: x.replace("\n", ""), temp))
        #infered = list(map(lambda x: x.split(','), infers))
    
    convoState = latest
    infersStr = str(infers)
        

    return transcript, infersStr, convoState

# get audio from microphone 
with gr.Blocks() as face:
    
    with gr.Row():
        convoState = gr.State([""])
        with gr.Column():
            audio = gr.Audio(source="microphone", type="filepath")
            promptText = gr.Textbox(lines=15, placeholder="Enter a prompt here")
            dropChoice = gr.Dropdown(choices=["text-ada-001", "text-davinci-002", "text-davinci-003", "gpt-3.5-turbo"], label="Model")
            sliderChoice = gr.Slider(minimum=0.0, maximum=1.0, default=0.8, step=0.1, label="Temperature")
            transcribe_btn = gr.Button(value="Transcribe")
        with gr.Column():
            script = gr.Textbox(label="Transcribed text")
            #options = gr.Textbox(label="Predictions")
            options = gr.Dataset(components=Radio, samples=["One", "Two", "Three","Four", "Five"]))
            #options = gr.Radio(choices=["One", "Two", "Three", "Four", "Five"])
            latestConvo = gr.Textbox(label="Running conversation")
            #transcribe_btn.click(inference)
    transcribe_btn.click(fn=inference, inputs=[audio, promptText, dropChoice, sliderChoice, convoState], outputs=[script, options, latestConvo])
    #examples = gr.Examples(examples=["Sedan, Truck, SUV", "Dalmaion, Shepherd, Lab, Mutt"], inputs=[options])


face.launch()