Spaces:
Running
on
Zero
Running
on
Zero
add stability ts
Browse files
app.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import os
|
2 |
import time
|
3 |
import tempfile
|
4 |
-
from copy import deepcopy
|
5 |
from math import floor
|
6 |
from typing import Optional, List, Dict, Any
|
7 |
|
@@ -162,11 +161,11 @@ def get_prediction(inputs, prompt: Optional[str], punctuate_text: bool = True, s
|
|
162 |
generate_kwargs = {"language": "japanese", "task": "transcribe"}
|
163 |
if prompt:
|
164 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors='pt').to(device)
|
165 |
-
|
|
|
|
|
166 |
if stabilize_timestamp:
|
167 |
-
prediction['chunks'] = fix_timestamp(pipeline_output=prediction['chunks'],
|
168 |
-
audio=inputs["array"],
|
169 |
-
sample_rate=inputs["sampling_rate"])
|
170 |
if punctuate_text:
|
171 |
prediction['chunks'] = PUNCTUATOR.punctuate(prediction['chunks'])
|
172 |
text = "".join([c['text'] for c in prediction['chunks']])
|
@@ -176,9 +175,11 @@ def get_prediction(inputs, prompt: Optional[str], punctuate_text: bool = True, s
|
|
176 |
return text, text_timestamped
|
177 |
|
178 |
|
179 |
-
def transcribe(inputs, prompt, punctuate_text, stabilize_timestamp):
|
180 |
if inputs is None:
|
181 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
|
|
|
|
182 |
inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
183 |
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
184 |
return get_prediction(inputs, prompt, punctuate_text, stabilize_timestamp)
|
|
|
1 |
import os
|
2 |
import time
|
3 |
import tempfile
|
|
|
4 |
from math import floor
|
5 |
from typing import Optional, List, Dict, Any
|
6 |
|
|
|
161 |
generate_kwargs = {"language": "japanese", "task": "transcribe"}
|
162 |
if prompt:
|
163 |
generate_kwargs['prompt_ids'] = pipe.tokenizer.get_prompt_ids(prompt, return_tensors='pt').to(device)
|
164 |
+
array = inputs["array"]
|
165 |
+
sr = inputs["sampling_rate"]
|
166 |
+
prediction = pipe(inputs, return_timestamps=True, generate_kwargs=generate_kwargs)
|
167 |
if stabilize_timestamp:
|
168 |
+
prediction['chunks'] = fix_timestamp(pipeline_output=prediction['chunks'], audio=array, sample_rate=sr)
|
|
|
|
|
169 |
if punctuate_text:
|
170 |
prediction['chunks'] = PUNCTUATOR.punctuate(prediction['chunks'])
|
171 |
text = "".join([c['text'] for c in prediction['chunks']])
|
|
|
175 |
return text, text_timestamped
|
176 |
|
177 |
|
178 |
+
def transcribe(inputs: str, prompt, punctuate_text, stabilize_timestamp):
|
179 |
if inputs is None:
|
180 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
181 |
+
with open(inputs, "rb") as f:
|
182 |
+
inputs = f.read()
|
183 |
inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
|
184 |
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
|
185 |
return get_prediction(inputs, prompt, punctuate_text, stabilize_timestamp)
|