Spaces:
Running
Running
File size: 2,843 Bytes
452467a f34b85f 452467a f34b85f 452467a f34b85f 452467a f34b85f 452467a f34b85f 452467a f34b85f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import whisperx as whisper
from deep_translator import GoogleTranslator
import os
from whisperx.utils import write_vtt, write_srt, write_ass, write_tsv, write_txt
def detect_language(filename, model):
# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio(file=filename)
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")
return {"detected_language": max(probs, key=probs.get)}
def translate_to_english(transcription, json=False):
if json:
for text in transcription:
text["text"] = GoogleTranslator(source="auto", target="en").translate(
text["text"]
)
else:
for text in transcription["segments"]:
text["text"] = GoogleTranslator(source="auto", target="en").translate(
text["text"]
)
return transcription
def write(filename, dtype, result_aligned):
if dtype == "vtt":
with open(
os.path.join(".", os.path.splitext(filename)[0] + ".vtt"),
"w",
encoding="utf-8",
) as vtt:
write_vtt(result_aligned["segments"], file=vtt)
if dtype == "srt":
with open(
os.path.join(".", os.path.splitext(filename)[0] + ".srt"),
"w",
encoding="utf-8",
) as srt:
write_srt(result_aligned["segments"], file=srt)
if dtype == "ass":
with open(
os.path.join(".", os.path.splitext(filename)[0] + ".ass"),
"w",
encoding="utf-8",
) as ass:
write_ass(result_aligned["segments"], file=ass)
if dtype == "tsv":
with open(
os.path.join(".", os.path.splitext(filename)[0] + ".tsv"),
"w",
encoding="utf-8",
) as tsv:
write_tsv(result_aligned["segments"], file=tsv)
if dtype == "plain text":
print("here")
print(filename)
with open(
os.path.join(".", os.path.splitext(filename)[0] + ".txt"),
"w",
encoding="utf-8",
) as txt:
write_txt(result_aligned["segments"], file=txt)
def read(filename, transc):
if transc == "plain text":
transc = "txt"
filename = filename.split(".")[0]
print(filename)
with open(f"{filename}.{transc}", encoding="utf-8") as f:
content = f.readlines()
content = " ".join(z for z in content)
return content
from constants import language_dict
def get_key(val):
for key, value in language_dict.items():
if val == value:
return key
return "Key not found"
|