Spaces:
Sleeping
Sleeping
File size: 1,552 Bytes
600bbf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import os
import re
from rev_ai import apiclient
def timestamp2frame(ts, fps=10):
ts = [int(x) for x in ts]
frame = (ts[0] * 3600 + ts[1] * 60 + ts[2] + ts[3] / 1000) * fps
return str(int(frame))
def audio2subtitle(rev_ai_token, fps=10):
speech_file_path = "audio_out.mp3"
client = apiclient.RevAiAPIClient(rev_ai_token)
job = client.submit_job_local_file(speech_file_path)
job_details = client.get_job_details(job.id)
while str(job_details.status) != "JobStatus.TRANSCRIBED":
job_details = client.get_job_details(job.id)
transcript_srt = client.get_captions(job.id)
with open("audio_out.srt", "w") as f:
f.write(transcript_srt)
f.close()
timestamp_re = r"(\d+:\d+:\d+,\d+)"
string_re = r"^[a-zA-Z][^>]+$"
last_num_re = r"\d+"
deforum_str = ""
with open("audio_out.srt", "r") as f:
for line in f:
timestamps = re.findall(timestamp_re, line)
if timestamps:
timestamp = re.split(":|,", timestamps[0])
deforum_str += timestamp2frame(timestamp)
deforum_str += ": "
strings = re.findall(string_re, line)
if strings:
deforum_str += line
deforum_str += " | "
f.close()
deforum_str = deforum_str.replace("\n", "").strip()
deforum_str = re.sub(r"\|\s+(?=[a-zA-Z])", r"", deforum_str)
deforum_str = deforum_str[:-1]
last_timestamp = re.findall(last_num_re, deforum_str)[-1]
return deforum_str, int(last_timestamp) |