Spaces:
Runtime error
Runtime error
File size: 2,996 Bytes
55263ed fe9f0cf 11c5a18 55263ed 05c1e4b fe9f0cf 11c5a18 2d4c2a0 11c5a18 55263ed 2d4c2a0 05c1e4b 55263ed 11c5a18 55263ed 11c5a18 05c1e4b 11c5a18 eae3e73 05c1e4b eae3e73 05c1e4b eae3e73 6140c54 eae3e73 05c1e4b eae3e73 05c1e4b 55263ed 05c1e4b 55263ed fe9f0cf 05c1e4b fe9f0cf 05c1e4b fe9f0cf 05c1e4b fe9f0cf 05c1e4b fe9f0cf 95f76e3 a1042e5 95f76e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
import whisper
import os
class GradioInference():
def __init__(self):
self.sizes = ["base", "large"]
self.langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
self.current_size = "base"
self.loaded_model = whisper.load_model(self.current_size)
def __call__(self, file, lang, size, subs):
path = file.name
if lang == "none":
lang = None
if size != self.current_size:
self.loaded_model = whisper.load_model(size)
self.current_size = size
results = self.loaded_model.transcribe(path, language=lang)
if subs == "None":
return results["text"]
elif subs == ".srt":
return self.srt(results["segments"])
elif ".csv" == ".csv":
return self.csv(results["segments"])
def srt(self, segments):
output = ""
for i, segment in enumerate(segments):
output += f"{i + 1}\n"
output += f"{self.format_time(segment['start'])} --> {self.format_time(segment['end'])}\n"
output += f"{segment['text']}\n\n"
return output
def csv(self, segments):
output = ""
for segment in segments:
output += f"{segment['start']},{segment['end']},{segment['text']}\n"
return output
def format_time(self, time):
hours = time // 3600
minutes = (time - hours * 3600) // 60
seconds = time - hours * 3600 - minutes * 60
milliseconds = (time - int(time)) * 1000
return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{int(milliseconds):03d}"
gio = GradioInference()
title = "Whisperer MP3"
description = "Transcription de fichiers MP3 en texte à l'aide du modèle Whisper d'OpenAI"
block = gr.Blocks()
with block:
gr.HTML(
"""
<div style="text-align: center; max-width: 500px; margin: 0 auto;">
<div>
<h1>Whisperer MP3</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%">
Transcription de fichiers MP3 en texte à l'aide du modèle Whisper d'OpenAI
</p>
</div>
"""
)
with gr.Group():
with gr.Box():
with gr.Row().style(equal_height=True):
sz = gr.Dropdown(label="Taille du modèle", choices=gio.sizes, value='base')
lang = gr.Dropdown(label="Langue (facultatif)", choices=gio.langs, value="none")
with gr.Row().style(equal_height=True):
wt = gr.Radio(["None", ".srt", ".csv"], label="Avec horodatage ?")
file = gr.File(label="Fichier MP3")
text = gr.Textbox(label="Transcription", placeholder="Résultat de la transcription", lines=10)
with gr.Row().style(equal_height=True):
btn = gr.Button("Transcrire")
btn.click(gio, inputs=[file, lang, sz, wt], outputs=[text])
block.launch()
|