File size: 2,943 Bytes
38a7da0
 
a990f25
 
08fbba1
 
 
 
38a7da0
3ae5946
 
3f3a4e9
3ae5946
38a7da0
 
4579397
 
 
 
 
 
 
 
 
 
 
 
869e886
4579397
 
 
 
38a7da0
 
84d3dde
 
 
 
 
c279ea6
 
 
 
 
 
 
 
 
 
84d3dde
 
 
aed1a1c
473819b
84d3dde
 
 
08fbba1
473819b
 
 
08fbba1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c279ea6
 
 
 
 
 
 
 
84d3dde
08fbba1
84d3dde
 
 
 
 
 
 
 
 
 
 
 
 
2f8eb98
84d3dde
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import whisper
import gradio as gr
import openai 
import os

openai.api_key = os.environ["OPENAI_API_KEY"]

#openai.api_key = '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'

import whisper
import gradio as gr

model = whisper.load_model("small")

def transcribe(audio):
    
    #time.sleep(3)
    # load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)

    # make log-Mel spectrogram and move to the same device as the model
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # detect the spoken language
    _, probs = model.detect_language(mel)
    print(f"Detected language: {max(probs, key=probs.get)}")

    # decode the audio
    options = whisper.DecodingOptions(fp16 = False)
    result = whisper.decode(model, mel, options)
    return result.text
    
    
def process_text(input_text):
    # Apply your function here to process the input text
    output_text = input_text.upper()
    return output_text

def get_completion(prompt, model='gpt-3.5-turbo'):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model = model, 
        messages = messages, 
        temperature = 0, 
        
    ) 
    return response.choices[0].message['content']

demo = gr.Blocks()

with demo:
    audio = gr.Audio(source="microphone", type="filepath")
    
    b1 = gr.Button("Transcribe audio")
    b2 = gr.Button("Process text")


    text1 = gr.Textbox()
    text2 = gr.Textbox()

    prompt = f"""
    You are a world class nurse practitioner. You are provided with text delimited by triple quotes. \
    Summarize the text and put it in a table format with rows as follows: \ 
        
    1. Patient identification: 
    2. Chief complaint: 
    3. Medical history: 
    4. Family history: 
    5. Social history: 
    6. Review of systems: 
    7. Current medications: 
    8. Vaccination status: 
    9. Emotional well-being: 
    10. Patient concerns and expectations: 
    
    \"\"\"{text1}\"\"\"
    """
    
    b1.click(transcribe, inputs=audio, outputs=text1)
    b2.click(get_completion, outputs=text2)


    # b1.click(transcribe, inputs=audio, outputs=text1)
    # b2.click(get_completion, inputs=prompt, outputs=text2)



demo.launch()

# In this example, the process_text function just converts the input text to uppercase, but you can replace it with your desired function. The Gradio Blocks interface will have two buttons: "Transcribe audio" and "Process text". The first button transcribes the audio and fills the first textbox, and the second button processes the text from the first textbox and fills the second textbox.


# gr.Interface(
#     title = 'OpenAI Whisper ASR Gradio Web UI', 
#     fn=transcribe, 
#     inputs=[
#         gr.inputs.Audio(source="microphone", type="filepath")
#     ],
#     outputs=[
#         "textbox"
#     ],
    
#     live=True).launch()