import os import re from rev_ai import apiclient def timestamp2frame(ts, fps=10): ts = [int(x) for x in ts] frame = (ts[0] * 3600 + ts[1] * 60 + ts[2] + ts[3] / 1000) * fps return str(int(frame)) def audio2subtitle(rev_ai_token, fps=10): speech_file_path = "audio_out.mp3" client = apiclient.RevAiAPIClient(rev_ai_token) job = client.submit_job_local_file(speech_file_path) job_details = client.get_job_details(job.id) while str(job_details.status) != "JobStatus.TRANSCRIBED": job_details = client.get_job_details(job.id) transcript_srt = client.get_captions(job.id) with open("audio_out.srt", "w") as f: f.write(transcript_srt) f.close() timestamp_re = r"(\d+:\d+:\d+,\d+)" string_re = r"^[a-zA-Z][^>]+$" last_num_re = r"\d+" last_timestamp = "" deforum_str = "" with open("audio_out.srt", "r") as f: for line in f: timestamps = re.findall(timestamp_re, line) if timestamps: timestamp = re.split(":|,", timestamps[0]) last_timestamp = re.split(":|,", timestamps[1]) deforum_str += timestamp2frame(timestamp) deforum_str += ": " strings = re.findall(string_re, line) if strings: deforum_str += line deforum_str += " | " f.close() deforum_str = deforum_str.replace("\n", "").strip() deforum_str = re.sub(r"\|\s+(?=[a-zA-Z])", r"", deforum_str) deforum_str = deforum_str[:-1] last_timestamp = timestamp2frame(last_timestamp) return deforum_str, int(last_timestamp)