mmichelli mrm8488 commited on
Commit
34f21a1
0 Parent(s):

Duplicate from mrm8488/OpenAI_Whisper_ASR

Browse files

Co-authored-by: Manuel Romero <mrm8488@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +31 -0
  2. README.md +14 -0
  3. app.py +144 -0
  4. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zst filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: OpenAI Whisper ASR
3
+ emoji: 🗣️🔤
4
+ colorFrom: purple
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.3.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: bigscience-bloom-rail-1.0
11
+ duplicated_from: mrm8488/OpenAI_Whisper_ASR
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+
4
+ model = whisper.load_model("base")
5
+
6
+
7
+
8
+ def inference(audio):
9
+ audio = whisper.load_audio(audio)
10
+ audio = whisper.pad_or_trim(audio)
11
+
12
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
13
+
14
+ _, probs = model.detect_language(mel)
15
+ lang = max(probs, key=probs.get)
16
+
17
+ options = whisper.DecodingOptions(fp16 = False)
18
+ result = whisper.decode(model, mel, options)
19
+
20
+ return lang.upper(), result.text
21
+
22
+
23
+ title="Open AI Whisper"
24
+
25
+ description="Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification."
26
+
27
+ css = """
28
+ .gradio-container {
29
+ font-family: 'IBM Plex Sans', sans-serif;
30
+ }
31
+ .gr-button {
32
+ color: white;
33
+ border-color: black;
34
+ background: black;
35
+ }
36
+ input[type='range'] {
37
+ accent-color: black;
38
+ }
39
+ .dark input[type='range'] {
40
+ accent-color: #dfdfdf;
41
+ }
42
+ .container {
43
+ max-width: 730px;
44
+ margin: auto;
45
+ padding-top: 1.5rem;
46
+ }
47
+
48
+ .details:hover {
49
+ text-decoration: underline;
50
+ }
51
+ .gr-button {
52
+ white-space: nowrap;
53
+ }
54
+ .gr-button:focus {
55
+ border-color: rgb(147 197 253 / var(--tw-border-opacity));
56
+ outline: none;
57
+ box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
58
+ --tw-border-opacity: 1;
59
+ --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
60
+ --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
61
+ --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
62
+ --tw-ring-opacity: .5;
63
+ }
64
+ .footer {
65
+ margin-bottom: 45px;
66
+ margin-top: 35px;
67
+ text-align: center;
68
+ border-bottom: 1px solid #e5e5e5;
69
+ }
70
+ .footer>p {
71
+ font-size: .8rem;
72
+ display: inline-block;
73
+ padding: 0 10px;
74
+ transform: translateY(10px);
75
+ background: white;
76
+ }
77
+ .dark .footer {
78
+ border-color: #303030;
79
+ }
80
+ .dark .footer>p {
81
+ background: #0b0f19;
82
+ }
83
+ .prompt h4{
84
+ margin: 1.25em 0 .25em 0;
85
+ font-weight: bold;
86
+ font-size: 115%;
87
+ }
88
+ """
89
+
90
+ block = gr.Blocks(css=css)
91
+
92
+
93
+
94
+ with block:
95
+ gr.HTML(
96
+ """
97
+ <div style="text-align: center; max-width: 650px; margin: 0 auto;">
98
+ <div
99
+ style="
100
+ display: inline-flex;
101
+ align-items: center;
102
+ gap: 0.8rem;
103
+ font-size: 1.75rem;
104
+ "
105
+ >
106
+ <h1 style="font-weight: 900; margin-bottom: 7px;">
107
+ OpenAI Whisper
108
+ </h1>
109
+ </div>
110
+ <p style="margin-bottom: 10px; font-size: 94%">
111
+ Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.
112
+ </p>
113
+ </div>
114
+ """
115
+ )
116
+ with gr.Group():
117
+ with gr.Box():
118
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
119
+ audio = gr.Audio(
120
+ label="Input Audio",
121
+ show_label=False,
122
+ source="microphone",
123
+ type="filepath"
124
+ )
125
+
126
+ btn = gr.Button("Transcribe")
127
+
128
+ lang_str = gr.Textbox(label="language")
129
+
130
+ text = gr.Textbox(label="Transcription")
131
+
132
+
133
+
134
+
135
+ btn.click(inference, inputs=[audio], outputs=[lang_str,text])
136
+
137
+ gr.HTML('''
138
+ <div class="footer">
139
+ <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
140
+ </p>
141
+ </div>
142
+ ''')
143
+
144
+ block.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ git+https://github.com/openai/whisper.git