jhj0517 commited on
Commit
dcaff95
·
unverified ·
2 Parent(s): 1599c90 f314f4c

Merge pull request #141 from jhj0517/feature/add-parameters

Browse files
Files changed (2) hide show
  1. app.py +15 -3
  2. modules/whisper_data_class.py +23 -0
app.py CHANGED
@@ -65,6 +65,8 @@ class App:
65
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
66
  nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
67
  nb_patience = gr.Number(label="Patience", value=1, interactive=True)
 
 
68
  with gr.Row():
69
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
70
  with gr.Row():
@@ -81,7 +83,9 @@ class App:
81
  no_speech_threshold=nb_no_speech_threshold,
82
  compute_type=dd_compute_type,
83
  best_of=nb_best_of,
84
- patience=nb_patience)
 
 
85
  btn_run.click(fn=self.whisper_inf.transcribe_file,
86
  inputs=params + whisper_params.to_list(),
87
  outputs=[tb_indicator, files_subtitles])
@@ -115,6 +119,8 @@ class App:
115
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
116
  nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
117
  nb_patience = gr.Number(label="Patience", value=1, interactive=True)
 
 
118
  with gr.Row():
119
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
120
  with gr.Row():
@@ -131,7 +137,9 @@ class App:
131
  no_speech_threshold=nb_no_speech_threshold,
132
  compute_type=dd_compute_type,
133
  best_of=nb_best_of,
134
- patience=nb_patience)
 
 
135
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
136
  inputs=params + whisper_params.to_list(),
137
  outputs=[tb_indicator, files_subtitles])
@@ -158,6 +166,8 @@ class App:
158
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
159
  nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
160
  nb_patience = gr.Number(label="Patience", value=1, interactive=True)
 
 
161
  with gr.Row():
162
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
163
  with gr.Row():
@@ -174,7 +184,9 @@ class App:
174
  no_speech_threshold=nb_no_speech_threshold,
175
  compute_type=dd_compute_type,
176
  best_of=nb_best_of,
177
- patience=nb_patience)
 
 
178
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
179
  inputs=params + whisper_params.to_list(),
180
  outputs=[tb_indicator, files_subtitles])
 
65
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
66
  nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
67
  nb_patience = gr.Number(label="Patience", value=1, interactive=True)
68
+ cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
69
+ tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
70
  with gr.Row():
71
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
72
  with gr.Row():
 
83
  no_speech_threshold=nb_no_speech_threshold,
84
  compute_type=dd_compute_type,
85
  best_of=nb_best_of,
86
+ patience=nb_patience,
87
+ condition_on_previous_text=cb_condition_on_previous_text,
88
+ initial_prompt=tb_initial_prompt)
89
  btn_run.click(fn=self.whisper_inf.transcribe_file,
90
  inputs=params + whisper_params.to_list(),
91
  outputs=[tb_indicator, files_subtitles])
 
119
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
120
  nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
121
  nb_patience = gr.Number(label="Patience", value=1, interactive=True)
122
+ cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
123
+ tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
124
  with gr.Row():
125
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
126
  with gr.Row():
 
137
  no_speech_threshold=nb_no_speech_threshold,
138
  compute_type=dd_compute_type,
139
  best_of=nb_best_of,
140
+ patience=nb_patience,
141
+ condition_on_previous_text=cb_condition_on_previous_text,
142
+ initial_prompt=tb_initial_prompt)
143
  btn_run.click(fn=self.whisper_inf.transcribe_youtube,
144
  inputs=params + whisper_params.to_list(),
145
  outputs=[tb_indicator, files_subtitles])
 
166
  dd_compute_type = gr.Dropdown(label="Compute Type", choices=self.whisper_inf.available_compute_types, value=self.whisper_inf.current_compute_type, interactive=True)
167
  nb_best_of = gr.Number(label="Best Of", value=5, interactive=True)
168
  nb_patience = gr.Number(label="Patience", value=1, interactive=True)
169
+ cb_condition_on_previous_text = gr.Checkbox(label="Condition On Previous Text", value=True, interactive=True)
170
+ tb_initial_prompt = gr.Textbox(label="Initial Prompt", value=None, interactive=True)
171
  with gr.Row():
172
  btn_run = gr.Button("GENERATE SUBTITLE FILE", variant="primary")
173
  with gr.Row():
 
184
  no_speech_threshold=nb_no_speech_threshold,
185
  compute_type=dd_compute_type,
186
  best_of=nb_best_of,
187
+ patience=nb_patience,
188
+ condition_on_previous_text=cb_condition_on_previous_text,
189
+ initial_prompt=tb_initial_prompt)
190
  btn_run.click(fn=self.whisper_inf.transcribe_mic,
191
  inputs=params + whisper_params.to_list(),
192
  outputs=[tb_indicator, files_subtitles])
modules/whisper_data_class.py CHANGED
@@ -1,5 +1,6 @@
1
  from dataclasses import dataclass, fields
2
  import gradio as gr
 
3
 
4
 
5
  @dataclass
@@ -13,6 +14,8 @@ class WhisperGradioComponents:
13
  compute_type: gr.Dropdown
14
  best_of: gr.Number
15
  patience: gr.Number
 
 
16
  """
17
  A data class to pass Gradio components to the function before Gradio pre-processing.
18
  See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
@@ -21,26 +24,44 @@ class WhisperGradioComponents:
21
  ----------
22
  model_size: gr.Dropdown
23
  Whisper model size.
 
24
  lang: gr.Dropdown
25
  Source language of the file to transcribe.
 
26
  is_translate: gr.Checkbox
27
  Boolean value that determines whether to translate to English.
28
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
 
29
  beam_size: gr.Number
30
  Int value that is used for decoding option.
 
31
  log_prob_threshold: gr.Number
32
  If the average log probability over sampled tokens is below this value, treat as failed.
 
33
  no_speech_threshold: gr.Number
34
  If the no_speech probability is higher than this value AND
35
  the average log probability over sampled tokens is below `log_prob_threshold`,
36
  consider the segment as silent.
 
37
  compute_type: gr.Dropdown
38
  compute type for transcription.
39
  see more info : https://opennmt.net/CTranslate2/quantization.html
 
40
  best_of: gr.Number
41
  Number of candidates when sampling with non-zero temperature.
 
42
  patience: gr.Number
43
  Beam search patience factor.
 
 
 
 
 
 
 
 
 
 
44
  """
45
 
46
  def to_list(self) -> list:
@@ -66,6 +87,8 @@ class WhisperValues:
66
  compute_type: str
67
  best_of: int
68
  patience: float
 
 
69
  """
70
  A data class to use Whisper parameters in your function after Gradio pre-processing.
71
  See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components
 
1
  from dataclasses import dataclass, fields
2
  import gradio as gr
3
+ from typing import Optional
4
 
5
 
6
  @dataclass
 
14
  compute_type: gr.Dropdown
15
  best_of: gr.Number
16
  patience: gr.Number
17
+ condition_on_previous_text: gr.Checkbox
18
+ initial_prompt: gr.Textbox
19
  """
20
  A data class to pass Gradio components to the function before Gradio pre-processing.
21
  See this documentation for more information about Gradio pre-processing: https://www.gradio.app/docs/components
 
24
  ----------
25
  model_size: gr.Dropdown
26
  Whisper model size.
27
+
28
  lang: gr.Dropdown
29
  Source language of the file to transcribe.
30
+
31
  is_translate: gr.Checkbox
32
  Boolean value that determines whether to translate to English.
33
  It's Whisper's feature to translate speech from another language directly into English end-to-end.
34
+
35
  beam_size: gr.Number
36
  Int value that is used for decoding option.
37
+
38
  log_prob_threshold: gr.Number
39
  If the average log probability over sampled tokens is below this value, treat as failed.
40
+
41
  no_speech_threshold: gr.Number
42
  If the no_speech probability is higher than this value AND
43
  the average log probability over sampled tokens is below `log_prob_threshold`,
44
  consider the segment as silent.
45
+
46
  compute_type: gr.Dropdown
47
  compute type for transcription.
48
  see more info : https://opennmt.net/CTranslate2/quantization.html
49
+
50
  best_of: gr.Number
51
  Number of candidates when sampling with non-zero temperature.
52
+
53
  patience: gr.Number
54
  Beam search patience factor.
55
+
56
+ condition_on_previous_text: gr.Checkbox
57
+ if True, the previous output of the model is provided as a prompt for the next window;
58
+ disabling may make the text inconsistent across windows, but the model becomes less prone to
59
+ getting stuck in a failure loop, such as repetition looping or timestamps going out of sync.
60
+
61
+ initial_prompt: gr.Textbox
62
+ Optional text to provide as a prompt for the first window. This can be used to provide, or
63
+ "prompt-engineer" a context for transcription, e.g. custom vocabularies or proper nouns
64
+ to make it more likely to predict those word correctly.
65
  """
66
 
67
  def to_list(self) -> list:
 
87
  compute_type: str
88
  best_of: int
89
  patience: float
90
+ condition_on_previous_text: bool
91
+ initial_prompt: Optional[str]
92
  """
93
  A data class to use Whisper parameters in your function after Gradio pre-processing.
94
  See this documentation for more information about Gradio pre-processing: : https://www.gradio.app/docs/components