fearflixai / subtitles.py
LittleLirow's picture
Fix last timestamp value
e3bc9da
import os
import re
from rev_ai import apiclient
def timestamp2frame(ts, fps=10):
ts = [int(x) for x in ts]
frame = (ts[0] * 3600 + ts[1] * 60 + ts[2] + ts[3] / 1000) * fps
return str(int(frame))
def audio2subtitle(rev_ai_token, fps=10):
speech_file_path = "audio_out.mp3"
client = apiclient.RevAiAPIClient(rev_ai_token)
job = client.submit_job_local_file(speech_file_path)
job_details = client.get_job_details(job.id)
while str(job_details.status) != "JobStatus.TRANSCRIBED":
job_details = client.get_job_details(job.id)
transcript_srt = client.get_captions(job.id)
with open("audio_out.srt", "w") as f:
f.write(transcript_srt)
f.close()
timestamp_re = r"(\d+:\d+:\d+,\d+)"
string_re = r"^[a-zA-Z][^>]+$"
last_num_re = r"\d+"
last_timestamp = ""
deforum_str = ""
with open("audio_out.srt", "r") as f:
for line in f:
timestamps = re.findall(timestamp_re, line)
if timestamps:
timestamp = re.split(":|,", timestamps[0])
last_timestamp = re.split(":|,", timestamps[1])
deforum_str += timestamp2frame(timestamp)
deforum_str += ": "
strings = re.findall(string_re, line)
if strings:
deforum_str += line
deforum_str += " | "
f.close()
deforum_str = deforum_str.replace("\n", "").strip()
deforum_str = re.sub(r"\|\s+(?=[a-zA-Z])", r"", deforum_str)
deforum_str = deforum_str[:-1]
last_timestamp = timestamp2frame(last_timestamp)
return deforum_str, int(last_timestamp)