drclab commited on
Commit
e3dc46b
β€’
1 Parent(s): faf1b1b
Files changed (1) hide show
  1. app.py +75 -1
app.py CHANGED
@@ -1,3 +1,77 @@
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- gr.Interface.load("models/facebook/fastspeech2-en-ljspeech").launch()
 
 
 
1
+ # import gradio as gr
2
+
3
+ # gr.Interface.load("models/facebook/fastspeech2-en-ljspeech").launch()
4
+
5
+ # import gradio as gr
6
+
7
+ # gr.Interface.load("models/openai/whisper-large-v2").launch()
8
+
9
  import gradio as gr
10
+ import torch.cuda
11
+ import whisper
12
+ from whisper.tokenizer import LANGUAGES
13
+
14
+ gpu = torch.cuda.is_available()
15
+ model = None
16
+
17
+ DESCRIPTION = """
18
+ <div style="display:flex; gap: 1em; justify-content: center; align-items: center;">
19
+ <a target="_blank" href="https://github.com/dsymbol">
20
+ <img alt="GitHub" src="https://img.shields.io/github/followers/dsymbol?style=social">
21
+ </a>
22
+ <a target="_blank" href="https://colab.research.google.com/#fileId=https://huggingface.co/spaces/dsymbol/whisper-webui/blob/main/notebook.ipynb">
23
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
24
+ </a>
25
+ <a target="_blank" href="https://huggingface.co/spaces/dsymbol/whisper-webui" rel="noopener noreferrer"><img
26
+ src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue" alt="Hugging Face Spaces">
27
+ </a>
28
+ </div>
29
+ """
30
+
31
+
32
+ def transcribe(recording, file, language, task):
33
+ if recording and file:
34
+ text = "Please only use one field."
35
+ elif not recording and not file:
36
+ text = "Please use one field."
37
+ else:
38
+ language = None if language == "Detect" else language
39
+ filepath = file if file else recording
40
+ text = model.transcribe(
41
+ filepath, task=task.lower(), language=language, fp16=gpu
42
+ )["text"].strip()
43
+ return text
44
+
45
+
46
+ def interface(model_name="small"):
47
+ global model
48
+ model = whisper.load_model(model_name)
49
+
50
+ return gr.Interface(
51
+ fn=transcribe,
52
+ inputs=[
53
+ gr.Audio(label="Record", source="microphone", type="filepath"),
54
+ gr.Audio(label="Upload", source="upload", type="filepath"),
55
+ gr.Dropdown(
56
+ label="Language",
57
+ choices=["Detect"] + sorted([i.title() for i in LANGUAGES.values()]),
58
+ value="Detect",
59
+ ),
60
+ gr.Dropdown(
61
+ label="Task",
62
+ choices=["Transcribe", "Translate"],
63
+ value="Transcribe",
64
+ info="Whether to perform X->X speech recognition or X->English translation",
65
+ ),
66
+ ],
67
+ outputs=gr.Textbox(label="Transcription", lines=26),
68
+ theme=gr.themes.Default(),
69
+ title="Whisper: Transcribe Audio",
70
+ description=DESCRIPTION,
71
+ allow_flagging="never",
72
+ )
73
+
74
 
75
+ if __name__ == "__main__":
76
+ demo = interface()
77
+ demo.queue().launch(debug=True)