sensahin commited on
Commit
abdf131
1 Parent(s): 550fc79
Files changed (2) hide show
  1. app.py +173 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,7 +1,176 @@
 
 
1
  import gradio as gr
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system("pip install git+https://github.com/openai/whisper.git")
3
  import gradio as gr
4
+ import whisper
5
 
6
+ model = whisper.load_model("small")
 
7
 
8
+
9
+
10
+ def inference(audio):
11
+ audio = whisper.load_audio(audio)
12
+ audio = whisper.pad_or_trim(audio)
13
+
14
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
15
+
16
+ _, probs = model.detect_language(mel)
17
+
18
+ options = whisper.DecodingOptions(fp16 = False)
19
+ result = whisper.decode(model, mel, options)
20
+
21
+ print(result.text)
22
+ return result.text
23
+
24
+
25
+ title="Whisper"
26
+
27
+ description="Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification."
28
+
29
+ css = """
30
+ .gradio-container {
31
+ font-family: 'IBM Plex Sans', sans-serif;
32
+ }
33
+ .gr-button {
34
+ color: white;
35
+ border-color: black;
36
+ background: black;
37
+ }
38
+ input[type='range'] {
39
+ accent-color: black;
40
+ }
41
+ .dark input[type='range'] {
42
+ accent-color: #dfdfdf;
43
+ }
44
+ .container {
45
+ max-width: 730px;
46
+ margin: auto;
47
+ padding-top: 1.5rem;
48
+ }
49
+
50
+ .details:hover {
51
+ text-decoration: underline;
52
+ }
53
+ .gr-button {
54
+ white-space: nowrap;
55
+ }
56
+ .gr-button:focus {
57
+ border-color: rgb(147 197 253 / var(--tw-border-opacity));
58
+ outline: none;
59
+ box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
60
+ --tw-border-opacity: 1;
61
+ --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
62
+ --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
63
+ --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
64
+ --tw-ring-opacity: .5;
65
+ }
66
+ .footer {
67
+ margin-bottom: 45px;
68
+ margin-top: 35px;
69
+ text-align: center;
70
+ border-bottom: 1px solid #e5e5e5;
71
+ }
72
+ .footer>p {
73
+ font-size: .8rem;
74
+ display: inline-block;
75
+ padding: 0 10px;
76
+ transform: translateY(10px);
77
+ background: white;
78
+ }
79
+ .dark .footer {
80
+ border-color: #303030;
81
+ }
82
+ .dark .footer>p {
83
+ background: #0b0f19;
84
+ }
85
+ .prompt h4{
86
+ margin: 1.25em 0 .25em 0;
87
+ font-weight: bold;
88
+ font-size: 115%;
89
+ }
90
+ """
91
+
92
+ block = gr.Blocks(css=css)
93
+
94
+
95
+
96
+ with block:
97
+ gr.HTML(
98
+ """
99
+ <div style="text-align: center; max-width: 650px; margin: 0 auto;">
100
+ <div
101
+ style="
102
+ display: inline-flex;
103
+ align-items: center;
104
+ gap: 0.8rem;
105
+ font-size: 1.75rem;
106
+ "
107
+ >
108
+ <svg
109
+ width="0.65em"
110
+ height="0.65em"
111
+ viewBox="0 0 115 115"
112
+ fill="none"
113
+ xmlns="http://www.w3.org/2000/svg"
114
+ >
115
+ <rect width="23" height="23" fill="white"></rect>
116
+ <rect y="69" width="23" height="23" fill="white"></rect>
117
+ <rect x="23" width="23" height="23" fill="#AEAEAE"></rect>
118
+ <rect x="23" y="69" width="23" height="23" fill="#AEAEAE"></rect>
119
+ <rect x="46" width="23" height="23" fill="white"></rect>
120
+ <rect x="46" y="69" width="23" height="23" fill="white"></rect>
121
+ <rect x="69" width="23" height="23" fill="black"></rect>
122
+ <rect x="69" y="69" width="23" height="23" fill="black"></rect>
123
+ <rect x="92" width="23" height="23" fill="#D9D9D9"></rect>
124
+ <rect x="92" y="69" width="23" height="23" fill="#AEAEAE"></rect>
125
+ <rect x="115" y="46" width="23" height="23" fill="white"></rect>
126
+ <rect x="115" y="115" width="23" height="23" fill="white"></rect>
127
+ <rect x="115" y="69" width="23" height="23" fill="#D9D9D9"></rect>
128
+ <rect x="92" y="46" width="23" height="23" fill="#AEAEAE"></rect>
129
+ <rect x="92" y="115" width="23" height="23" fill="#AEAEAE"></rect>
130
+ <rect x="92" y="69" width="23" height="23" fill="white"></rect>
131
+ <rect x="69" y="46" width="23" height="23" fill="white"></rect>
132
+ <rect x="69" y="115" width="23" height="23" fill="white"></rect>
133
+ <rect x="69" y="69" width="23" height="23" fill="#D9D9D9"></rect>
134
+ <rect x="46" y="46" width="23" height="23" fill="black"></rect>
135
+ <rect x="46" y="115" width="23" height="23" fill="black"></rect>
136
+ <rect x="46" y="69" width="23" height="23" fill="black"></rect>
137
+ <rect x="23" y="46" width="23" height="23" fill="#D9D9D9"></rect>
138
+ <rect x="23" y="115" width="23" height="23" fill="#AEAEAE"></rect>
139
+ <rect x="23" y="69" width="23" height="23" fill="black"></rect>
140
+ </svg>
141
+ <h1 style="font-weight: 900; margin-bottom: 7px;">
142
+ Whisper
143
+ </h1>
144
+ </div>
145
+ <p style="margin-bottom: 10px; font-size: 94%">
146
+ Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.
147
+ </p>
148
+ </div>
149
+ """
150
+ )
151
+ with gr.Group():
152
+ with gr.Box():
153
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
154
+ audio = gr.Audio(
155
+ label="Input Audio",
156
+ show_label=False,
157
+ source="microphone",
158
+ type="filepath"
159
+ )
160
+
161
+ btn = gr.Button("Transcribe")
162
+ text = gr.Textbox(show_label=False)
163
+
164
+
165
+
166
+
167
+ btn.click(inference, inputs=[audio], outputs=[text])
168
+
169
+ gr.HTML('''
170
+ <div class="footer">
171
+ <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a> - Gradio Demo by 🤗 Hugging Face
172
+ </p>
173
+ </div>
174
+ ''')
175
+
176
+ block.launch()
requirements.txt CHANGED
@@ -1 +1,2 @@
1
- gradio
 
 
1
+ gradio
2
+ whisper