File size: 3,067 Bytes
0e621c0
 
 
 
 
 
 
 
 
 
1cc4666
81033d4
 
 
 
0958811
dadeb6e
81033d4
0e621c0
81033d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e621c0
 
 
 
 
 
 
 
 
1cc4666
 
 
0e621c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1cc4666
 
 
0e621c0
 
 
 
1cc4666
32d8e11
0e621c0
 
55f025c
0958811
0e621c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import whisper
import gradio as gr
import openai 
import os

openai.api_key = os.environ["OPENAI_API_KEY"]

model = whisper.load_model("small")


#option 1
def transcribe(audio):
    model = whisper.load_model("base")
    result = model.transcribe(audio)
    return result["text"]

#option 2
# def transcribe(audio):
    
#     #time.sleep(3)
#     # load audio and pad/trim it to fit 30 seconds
#     audio = whisper.load_audio(audio)
#     audio = whisper.pad_or_trim(audio)

#     # make log-Mel spectrogram and move to the same device as the model
#     mel = whisper.log_mel_spectrogram(audio).to(model.device)

#     # detect the spoken language
#     _, probs = model.detect_language(mel)
#     print(f"Detected language: {max(probs, key=probs.get)}")

#     # decode the audio
#     options = whisper.DecodingOptions(fp16 = False)
#     result = whisper.decode(model, mel, options)
#     return result.text
    
    
def process_text(input_text):
    # Apply your function here to process the input text
    output_text = input_text.upper()
    return output_text

def get_completion(prompt, model='gpt-3.5-turbo'):
    messages = [
        {"role": "system", "content": """You are a .... You are provided with the transcription of a ... .  \
    Extract the following information from the transcription, replace curly brackets {} with relevant extracted information ... \
    ...the rest of your prompt... 
    
         """
        },
        {"role": "user", "content": prompt}
        ]
    response = openai.ChatCompletion.create(
        model = model, 
        messages = messages, 
        temperature = 0, 
        
    ) 
    return response.choices[0].message['content']

with gr.Blocks() as demo:
    
    gr.Markdown("""
    # Title  <br>
    
    Description 
    
    """)

    
    title = "title"
    audio = gr.Audio(type="filepath")
    
    b1 = gr.Button("Transcribe audio")
    b2 = gr.Button("<Placeholder for the prompted action>")
    # b3 = gr.Button("Email report to your doctor")


    text1 = gr.Textbox(lines=5)
    text2 = gr.Textbox(lines=5)

    prompt = text1
    
  
    
    b1.click(transcribe, inputs=audio, outputs=text1)
    b2.click(get_completion, inputs=text1, outputs=text2)


    # b1.click(transcribe, inputs=audio, outputs=text1)
    # b2.click(get_completion, inputs=prompt, outputs=text2)



demo.launch()

#demo.launch(share=True, auth=("username", "password"))

# In this example, the process_text function just converts the input text to uppercase, but you can replace it with your desired function. The Gradio Blocks interface will have two buttons: "Transcribe audio" and "Process text". The first button transcribes the audio and fills the first textbox, and the second button processes the text from the first textbox and fills the second textbox.


# gr.Interface(
#     title = 'OpenAI Whisper ASR Gradio Web UI', 
#     fn=transcribe, 
#     inputs=[
#         gr.inputs.Audio(source="microphone", type="filepath")
#     ],
#     outputs=[
#         "textbox"
#     ],
    
#     live=True).launch()