thak123 commited on
Commit
8a1e498
1 Parent(s): 7fb921e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -66
app.py CHANGED
@@ -1,75 +1,105 @@
1
- from transformers import WhisperTokenizer
2
- import os
3
- tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"
4
 
5
- from transformers import pipeline
6
- import gradio as gr
7
- import torch
 
 
 
8
 
9
- pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
10
- task="automatic-speech-recognition", tokenizer= tokenizer) # change to "your-username/the-name-you-picked"
 
 
 
11
 
12
- # pipe.model.config.forced_decoder_ids = (
13
- # pipe.tokenizer.get_decoder_prompt_ids(
14
- # language="marathi", task="transcribe"
15
- # )
 
 
 
 
 
 
 
16
  # )
 
17
 
18
- def transcribe_speech(filepath):
19
- output = pipe(
20
- filepath,
21
- max_new_tokens=256,
22
- generate_kwargs={
23
- "task": "transcribe",
24
- "language": "konkani",
25
- }, # update with the language you've fine-tuned on
26
- chunk_length_s=30,
27
- batch_size=8,
28
- padding=True
29
- )
30
- return output["text"]
31
-
32
-
33
- demo = gr.Blocks()
34
-
35
- mic_transcribe = gr.Interface(
36
- fn=transcribe_speech,
37
- inputs=gr.Audio(sources="microphone", type="filepath"),
38
- outputs=gr.components.Textbox(),
39
- )
40
 
41
- file_transcribe = gr.Interface(
42
- fn=transcribe_speech,
43
- inputs=gr.Audio(sources="upload", type="filepath"),
44
- outputs=gr.components.Textbox(),
45
- )
46
- with demo:
47
- gr.TabbedInterface(
48
- [mic_transcribe, file_transcribe],
49
- ["Transcribe Microphone", "Transcribe Audio File"],
50
- )
51
-
52
- demo.launch(debug=True)
53
-
54
- # def transcribe(audio):
55
- # # text = pipe(audio)["text"]
56
- # # pipe(audio)
57
- # text = pipe(audio)
58
- # print("op",text)
59
- # return text#pipe(audio) #text
60
-
61
- # iface = gr.Interface(
62
- # fn=transcribe,
63
- # inputs=[gr.Audio(sources=["microphone", "upload"])],
64
- # outputs="text",
65
- # examples=[
66
- # [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
67
- # [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
68
- # [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
69
- # ],
70
- # title="Whisper Konkani",
71
- # description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
72
  # )
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- # iface.launch()
 
1
+ # from transformers import WhisperTokenizer
2
+ # import os
3
+ # tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"
4
 
5
+ # from transformers import pipeline
6
+ # import gradio as gr
7
+ # import torch
8
+
9
+ # pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
10
+ # task="automatic-speech-recognition", tokenizer= tokenizer) # change to "your-username/the-name-you-picked"
11
 
12
+ # # pipe.model.config.forced_decoder_ids = (
13
+ # # pipe.tokenizer.get_decoder_prompt_ids(
14
+ # # language="marathi", task="transcribe"
15
+ # # )
16
+ # # )
17
 
18
+ # def transcribe_speech(filepath):
19
+ # output = pipe(
20
+ # filepath,
21
+ # max_new_tokens=256,
22
+ # generate_kwargs={
23
+ # "task": "transcribe",
24
+ # "language": "konkani",
25
+ # }, # update with the language you've fine-tuned on
26
+ # chunk_length_s=30,
27
+ # batch_size=8,
28
+ # padding=True
29
  # )
30
+ # return output["text"]
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # demo = gr.Blocks()
34
+
35
+ # mic_transcribe = gr.Interface(
36
+ # fn=transcribe_speech,
37
+ # inputs=gr.Audio(sources="microphone", type="filepath"),
38
+ # outputs=gr.components.Textbox(),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # )
40
 
41
+ # file_transcribe = gr.Interface(
42
+ # fn=transcribe_speech,
43
+ # inputs=gr.Audio(sources="upload", type="filepath"),
44
+ # outputs=gr.components.Textbox(),
45
+ # )
46
+ # with demo:
47
+ # gr.TabbedInterface(
48
+ # [mic_transcribe, file_transcribe],
49
+ # ["Transcribe Microphone", "Transcribe Audio File"],
50
+ # )
51
+
52
+ # demo.launch(debug=True)
53
+
54
+ # # def transcribe(audio):
55
+ # # # text = pipe(audio)["text"]
56
+ # # # pipe(audio)
57
+ # # text = pipe(audio)
58
+ # # print("op",text)
59
+ # # return text#pipe(audio) #text
60
+
61
+ # # iface = gr.Interface(
62
+ # # fn=transcribe,
63
+ # # inputs=[gr.Audio(sources=["microphone", "upload"])],
64
+ # # outputs="text",
65
+ # # examples=[
66
+ # # [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
67
+ # # [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
68
+ # # [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
69
+ # # ],
70
+ # # title="Whisper Konkani",
71
+ # # description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
72
+ # # )
73
+
74
+
75
+ # # iface.launch()
76
+
77
+
78
+ from transformers import WhisperTokenizer, pipeline
79
+ import gradio as gr
80
+ import os
81
+
82
+ tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="marathi", task="transcribe")
83
+
84
+ pipe = pipeline(model="thak123/gom-stt-v3", task="automatic-speech-recognition", tokenizer=tokenizer)
85
+
86
+ def transcribe(audio):
87
+ result = pipe(audio)
88
+ text = result[0]['text']
89
+ print("op", text)
90
+ return text
91
+
92
+ iface = gr.Interface(
93
+ fn=transcribe,
94
+ inputs=[gr.Audio(sources=["microphone", "upload"])],
95
+ outputs="text",
96
+ examples=[
97
+ [os.path.join(os.path.dirname("."), "audio/chalyaami.mp3")],
98
+ [os.path.join(os.path.dirname("."), "audio/ekdonteen.flac")],
99
+ [os.path.join(os.path.dirname("."), "audio/heyatachadjaale.mp3")],
100
+ ],
101
+ title="Whisper Konkani",
102
+ description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
103
+ )
104
 
105
+ iface.launch()