Spaces:
Sleeping
Sleeping
File size: 2,195 Bytes
e4e56ea 6edd739 e4e56ea 6edd739 e4e56ea 6edd739 e4e56ea 6edd739 e4e56ea 6edd739 e4e56ea 6edd739 e4e56ea 6edd739 e4e56ea 6edd739 e4e56ea 6edd739 e4e56ea 6edd739 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import whisper
from tempfile import NamedTemporaryFile
class Transcription:
def __init__(self, source):
self.source = source
# self.device = device
# self.audios = []
# with NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
# tmp_file.write(file.getvalue())
# # self.audios.append(tmp_file.name)
# self.audios.append(tmp_file)
# self.audios.append(source)
def transcribe(
self,
model
# whisper_model_option: str,
# translation: bool,
):
# # Get the whisper model
# transcriber = whisper.load_model(whisper_model_option, device=self.device)
# self.output = []
# for idx, _ in enumerate(self.audios):
# identify language
audio = whisper.load_audio(self.source)#audios)#[idx])
audio = whisper.pad_or_trim(audio)
# print(model.__dict__)
# n_mels = 128 if 'large' in model.name else 80
mel = whisper.log_mel_spectrogram(audio, n_mels=model.dims.n_mels).to(model.device)
_, probs = model.detect_language(mel)
language = max(probs, key=probs.get)
self.raw_output = model.transcribe(
self.source,#audios[idx],
language=language,
verbose=True,
word_timestamps=True,
# fp16=(model.device == 'cuda') # use fp16 on GPU for speed/memory
)
# if(translation):
# self.translation = model.transcribe(
# self.audios[idx],
# language=language,
# verbose=True,
# word_timestamps=True,
# task='translate'
# )["text"]
# self.raw_output["translation"] = self.translation
self.segments = self.raw_output['segments']
for segment in self.raw_output['segments']:
del segment['tokens']
self.raw_output.update(
name=self.source[0], #[idx],#.name,
language=language
)
self.output = self.raw_output
# self.output.append(self.raw_output)
# print(self.raw_output['segments'])
|