deven367's picture
sync with github
6dae90f
import datetime
import subprocess
from pathlib import Path
import numpy as np
import pandas as pd
import torch
import whisper
from fastcore.foundation import L, working_directory
def start_app():
subprocess.run(["streamlit", "run", "app.py"])
def get_audio(url: str):
audio_path = Path("./audio")
with working_directory(audio_path):
# subprocess.run(['youtube-dl', '-F', 'bestaudio[ext=m4a]', url])
subprocess.run(["yt-dlp", "-x", "--audio-format", "wav", url])
def get_v_from_url(url):
_, val = url.split('?v=')
return val.split('&')[0]
def annotate(audio_src, model_size="tiny"):
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = whisper.load_model(model_size, device=device)
result = model.transcribe(audio_src)
return result
def get_time(seconds):
return "{:0>8}".format(str(datetime.timedelta(seconds=seconds)))
def df_from_result(result):
df = pd.json_normalize(result["segments"])
df["start"] = df["start"].apply(get_time)
df["end"] = df["end"].apply(get_time)
return df
def find_word_timestamp(df, *words):
l = L()
for word in words:
vals = df["text"].str.find(word).values
arr = np.where(vals > 1)
times = list(df.iloc[arr]["start"].values)
nt = L(times).map(lambda x: x.split(".")[:-1])
l.append(nt)
return l
def generate_srt(df):
s = ""
for i, (start, end, text) in enumerate(df[["start", "end", "text"]].values):
start = start.replace(".", ",")
end = end.replace(".", ",")
s += f"{i}\n"
s += f"{start} --> {end}\n"
s += f"{text.strip()}\n\n"
return s
def write_srt(s, name):
with open(f"{name}.srt", "w") as f:
f.write(s)
f.close()