LittleLirow commited on
Commit
600bbf7
1 Parent(s): c36acea

Add subtitles file

Browse files
Files changed (1) hide show
  1. subtitles.py +48 -0
subtitles.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from rev_ai import apiclient
4
+
5
+ def timestamp2frame(ts, fps=10):
6
+ ts = [int(x) for x in ts]
7
+ frame = (ts[0] * 3600 + ts[1] * 60 + ts[2] + ts[3] / 1000) * fps
8
+ return str(int(frame))
9
+
10
+ def audio2subtitle(rev_ai_token, fps=10):
11
+ speech_file_path = "audio_out.mp3"
12
+ client = apiclient.RevAiAPIClient(rev_ai_token)
13
+ job = client.submit_job_local_file(speech_file_path)
14
+ job_details = client.get_job_details(job.id)
15
+
16
+ while str(job_details.status) != "JobStatus.TRANSCRIBED":
17
+ job_details = client.get_job_details(job.id)
18
+ transcript_srt = client.get_captions(job.id)
19
+
20
+ with open("audio_out.srt", "w") as f:
21
+ f.write(transcript_srt)
22
+ f.close()
23
+
24
+ timestamp_re = r"(\d+:\d+:\d+,\d+)"
25
+ string_re = r"^[a-zA-Z][^>]+$"
26
+ last_num_re = r"\d+"
27
+
28
+ deforum_str = ""
29
+
30
+ with open("audio_out.srt", "r") as f:
31
+ for line in f:
32
+ timestamps = re.findall(timestamp_re, line)
33
+ if timestamps:
34
+ timestamp = re.split(":|,", timestamps[0])
35
+ deforum_str += timestamp2frame(timestamp)
36
+ deforum_str += ": "
37
+ strings = re.findall(string_re, line)
38
+ if strings:
39
+ deforum_str += line
40
+ deforum_str += " | "
41
+ f.close()
42
+
43
+ deforum_str = deforum_str.replace("\n", "").strip()
44
+ deforum_str = re.sub(r"\|\s+(?=[a-zA-Z])", r"", deforum_str)
45
+ deforum_str = deforum_str[:-1]
46
+ last_timestamp = re.findall(last_num_re, deforum_str)[-1]
47
+
48
+ return deforum_str, int(last_timestamp)