edyrkaj commited on
Commit
4df5ada
1 Parent(s): 697f01c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -0
app.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ from datasets import load_dataset
4
+
5
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
6
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
7
+
8
+ model_id = "openai/whisper-large-v3-turbo"
9
+
10
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
11
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
12
+ )
13
+ model.to(device)
14
+
15
+ processor = AutoProcessor.from_pretrained(model_id)
16
+
17
+ pipe = pipeline(
18
+ "automatic-speech-recognition",
19
+ model=model,
20
+ tokenizer=processor.tokenizer,
21
+ feature_extractor=processor.feature_extractor,
22
+ torch_dtype=torch_dtype,
23
+ device=device,
24
+ )
25
+
26
+ # dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
27
+ # sample = dataset[0]["audio"]
28
+ # result = pipe(sample)
29
+ # transcript = result["text"]
30
+
31
+ import os
32
+ import gradio as gr
33
+
34
+ def launch(input):
35
+ out = pipe(input)
36
+ result = pipe(out[0])
37
+ transcript = result["text"]
38
+
39
+ # context_str = out[0]['generated_text']
40
+ # translate_str = translate(context_str, 'en', 'sq')
41
+ return translate_str
42
+
43
+ iface = gr.Interface(launch,
44
+ inputs=gr.Audio(label="Audio", source="microphone", type="filepath", elem_id='audio'),
45
+ outputs="text")
46
+
47
+ iface.launch(share=True)
48
+ # iface.launch(share=True,
49
+ # server_port=int(os.environ['PORT1']))
50
+
51
+ iface.close()
52
+
53
+
54
+ # def click_js():
55
+ # return """function audioRecord() {
56
+ # var xPathRes = document.evaluate ('//*[@id="audio"]//button', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
57
+ # xPathRes.singleNodeValue.click();}"""
58
+
59
+
60
+ # def action(btn):
61
+ # """Changes button text on click"""
62
+ # if btn == 'Speak': return 'Stop'
63
+ # else: return 'Speak'
64
+
65
+
66
+ # def check_btn(btn):
67
+ # """Checks for correct button text before invoking transcribe()"""
68
+ # if btn != 'Speak': raise Exception('Recording...')
69
+
70
+
71
+ # def transcribe():
72
+ # return 'Success'
73
+
74
+
75
+ # with gr.Blocks() as demo:
76
+ # msg = gr.Textbox()
77
+ # audio_box = gr.Audio(label="Audio", source="microphone", type="filepath", elem_id='audio')
78
+
79
+ # with gr.Row():
80
+ # audio_btn = gr.Button('Speak')
81
+ # clear = gr.Button("Clear")
82
+
83
+ # audio_btn.click(fn=action, inputs=audio_btn, outputs=audio_btn).\
84
+ # then(fn=lambda: None, _js=click_js()).\
85
+ # then(fn=check_btn, inputs=audio_btn).\
86
+ # success(fn=transcribe, outputs=msg)
87
+
88
+ # clear.click(lambda: None, None, msg, queue=False)
89
+
90
+ # demo.queue().launch(debug=True)
91
+
92
+ # print(result["text"])