csukuangfj
commited on
Commit
•
cfd7673
1
Parent(s):
16e9291
output all texts
Browse files- .gitattributes +1 -0
- app.py +15 -5
- decode.py +12 -1
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -106,8 +106,8 @@ def process_uploaded_video_file(
|
|
106 |
|
107 |
logging.info(f"Processing uploaded file: {in_filename}")
|
108 |
|
109 |
-
ans = process(language, repo_id, add_punctuation, in_filename)
|
110 |
-
return (in_filename, ans[0]), ans[0], ans[1], ans[2]
|
111 |
|
112 |
|
113 |
def process_uploaded_audio_file(
|
@@ -142,8 +142,9 @@ def process(language: str, repo_id: str, add_punctuation: str, in_filename: str)
|
|
142 |
else:
|
143 |
punct = None
|
144 |
|
145 |
-
result = decode(recognizer, vad, punct, in_filename)
|
146 |
logging.info(result)
|
|
|
147 |
|
148 |
srt_filename = Path(in_filename).with_suffix(".srt")
|
149 |
with open(srt_filename, "w", encoding="utf-8") as f:
|
@@ -156,6 +157,7 @@ def process(language: str, repo_id: str, add_punctuation: str, in_filename: str)
|
|
156 |
str(srt_filename),
|
157 |
build_html_output("Done! Please download the SRT file", "result_item_success"),
|
158 |
result,
|
|
|
159 |
)
|
160 |
|
161 |
|
@@ -205,7 +207,10 @@ with demo:
|
|
205 |
|
206 |
output_info_video = gr.HTML(label="Info")
|
207 |
output_textbox_video = gr.Textbox(
|
208 |
-
label="Recognized speech from uploaded video file"
|
|
|
|
|
|
|
209 |
)
|
210 |
|
211 |
with gr.TabItem("Upload audio from disk"):
|
@@ -222,7 +227,10 @@ with demo:
|
|
222 |
|
223 |
output_info_audio = gr.HTML(label="Info")
|
224 |
output_textbox_audio = gr.Textbox(
|
225 |
-
label="Recognized speech from uploaded audio file"
|
|
|
|
|
|
|
226 |
)
|
227 |
|
228 |
upload_video_button.click(
|
@@ -238,6 +246,7 @@ with demo:
|
|
238 |
output_srt_file_video,
|
239 |
output_info_video,
|
240 |
output_textbox_video,
|
|
|
241 |
],
|
242 |
)
|
243 |
|
@@ -253,6 +262,7 @@ with demo:
|
|
253 |
output_srt_file_audio,
|
254 |
output_info_audio,
|
255 |
output_textbox_audio,
|
|
|
256 |
],
|
257 |
)
|
258 |
|
|
|
106 |
|
107 |
logging.info(f"Processing uploaded file: {in_filename}")
|
108 |
|
109 |
+
ans, all_text = process(language, repo_id, add_punctuation, in_filename)
|
110 |
+
return (in_filename, ans[0]), ans[0], ans[1], ans[2], all_text
|
111 |
|
112 |
|
113 |
def process_uploaded_audio_file(
|
|
|
142 |
else:
|
143 |
punct = None
|
144 |
|
145 |
+
result, all_text = decode(recognizer, vad, punct, in_filename)
|
146 |
logging.info(result)
|
147 |
+
logging.info(all_text)
|
148 |
|
149 |
srt_filename = Path(in_filename).with_suffix(".srt")
|
150 |
with open(srt_filename, "w", encoding="utf-8") as f:
|
|
|
157 |
str(srt_filename),
|
158 |
build_html_output("Done! Please download the SRT file", "result_item_success"),
|
159 |
result,
|
160 |
+
all_text,
|
161 |
)
|
162 |
|
163 |
|
|
|
207 |
|
208 |
output_info_video = gr.HTML(label="Info")
|
209 |
output_textbox_video = gr.Textbox(
|
210 |
+
label="Recognized speech from uploaded video file (srt format)"
|
211 |
+
)
|
212 |
+
all_output_textbox_video = gr.Textbox(
|
213 |
+
label="Recognized speech from uploaded video file (all in one)"
|
214 |
)
|
215 |
|
216 |
with gr.TabItem("Upload audio from disk"):
|
|
|
227 |
|
228 |
output_info_audio = gr.HTML(label="Info")
|
229 |
output_textbox_audio = gr.Textbox(
|
230 |
+
label="Recognized speech from uploaded audio file (srt format)"
|
231 |
+
)
|
232 |
+
all_output_textbox_audio = gr.Textbox(
|
233 |
+
label="Recognized speech from uploaded audio file (all in one)"
|
234 |
)
|
235 |
|
236 |
upload_video_button.click(
|
|
|
246 |
output_srt_file_video,
|
247 |
output_info_video,
|
248 |
output_textbox_video,
|
249 |
+
all_output_textbox_video,
|
250 |
],
|
251 |
)
|
252 |
|
|
|
262 |
output_srt_file_audio,
|
263 |
output_info_audio,
|
264 |
output_textbox_audio,
|
265 |
+
all_output_textbox_audio,
|
266 |
],
|
267 |
)
|
268 |
|
decode.py
CHANGED
@@ -81,6 +81,8 @@ def decode(
|
|
81 |
|
82 |
logging.info("Started!")
|
83 |
|
|
|
|
|
84 |
while True:
|
85 |
# *2 because int16_t has two bytes
|
86 |
data = process.stdout.read(frames_per_read * 2)
|
@@ -116,8 +118,17 @@ def decode(
|
|
116 |
|
117 |
for seg, stream in zip(segments, streams):
|
118 |
seg.text = stream.result.text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
if punct is not None:
|
120 |
seg.text = punct.add_punctuation(seg.text)
|
121 |
segment_list.append(seg)
|
|
|
|
|
|
|
122 |
|
123 |
-
return "\n\n".join(f"{i}\n{seg}" for i, seg in enumerate(segment_list, 1))
|
|
|
81 |
|
82 |
logging.info("Started!")
|
83 |
|
84 |
+
all_text = []
|
85 |
+
|
86 |
while True:
|
87 |
# *2 because int16_t has two bytes
|
88 |
data = process.stdout.read(frames_per_read * 2)
|
|
|
118 |
|
119 |
for seg, stream in zip(segments, streams):
|
120 |
seg.text = stream.result.text.strip()
|
121 |
+
if not all_text:
|
122 |
+
all_text.append(seg.text)
|
123 |
+
elif len(all_text[-1][0].encode()) == 1 and len(seg.text[0].encode()) == 1:
|
124 |
+
all_text.append(" ")
|
125 |
+
all_text.append(seg.text)
|
126 |
+
|
127 |
if punct is not None:
|
128 |
seg.text = punct.add_punctuation(seg.text)
|
129 |
segment_list.append(seg)
|
130 |
+
all_text = " ".join(all_text)
|
131 |
+
if punct is not None:
|
132 |
+
all_text = punct.add_punctuation(all_text)
|
133 |
|
134 |
+
return "\n\n".join(f"{i}\n{seg}" for i, seg in enumerate(segment_list, 1)), all_text
|