Spaces:

TaiYouWeb
/

whisper-multi-model

Sleeping

App Files Files Community

TaiYouWeb commited on Oct 26, 2024

Commit

83922b2

1 Parent(s): 70a9f6b

Initial Commit

Browse files

Files changed (5) hide show

app.py +392 -0
languages.py +147 -0
packages.txt +42 -0
requirements.txt +5 -0
subtitle.py +101 -0

app.py ADDED Viewed

	@@ -0,0 +1,392 @@

+import torch
+import gradio as gr
+import yt_dlp as youtube_dl
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperTokenizer, pipeline
+from transformers.pipelines.audio_utils import ffmpeg_read
+import tempfile
+import os
+import time
+import requests
+from playwright.sync_api import sync_playwright
+from languages import get_language_names
+from subtitle import text_output, subtitle_output
+import subprocess
+try:
+    import spaces
+    USING_SPACES = True
+except ImportError:
+    USING_SPACES = False
+subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True,
+)
+os.system("playwright install")
+YT_LENGTH_LIMIT_S = 360
+SPACES_GPU_DURATION = 90
+device = 0 if torch.cuda.is_available() else "cpu"
+def gpu_decorator(duration=60):
+    def actual_decorator(func):
+        if USING_SPACES:
+            return spaces.GPU(duration=duration)(func)
+        return func
+    return actual_decorator
+def device_info():
+    try:
+        subprocess.run(["df", "-h"], check=True)
+        subprocess.run(["lsblk"], check=True)
+        subprocess.run(["free", "-h"], check=True)
+        subprocess.run(["lscpu"], check=True)
+        subprocess.run(["nvidia-smi"], check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Command failed: {e}")
+@gpu_decorator(duration=SPACES_GPU_DURATION)
+def transcribe(inputs, model, language, batch_size, chunk_length_s, stride_length_s, task, timestamp_mode, progress=gr.Progress(track_tqdm=True)):
+    try:
+        if inputs is None:
+            raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+        torch_dtype = torch.float16
+        model_gen = AutoModelForSpeechSeq2Seq.from_pretrained(
+            model, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+        )
+        model_gen.to(device)
+        processor = AutoProcessor.from_pretrained(model)
+        tokenizer = WhisperTokenizer.from_pretrained(model)
+        pipe = pipeline(
+            task="automatic-speech-recognition",
+            model=model_gen,
+            chunk_length_s=chunk_length_s,
+            stride_length_s=stride_length_s,
+            tokenizer=tokenizer,
+            feature_extractor=processor.feature_extractor,
+            torch_dtype=torch_dtype,
+            model_kwargs={"attn_implementation": "flash_attention_2"},
+            device=device,
+        )
+        generate_kwargs = {}
+        if language != "Automatic Detection" and model.endswith(".en") == False:
+            generate_kwargs["language"] = language
+        if model.endswith(".en") == False:
+            generate_kwargs["task"] = task
+        output = pipe(inputs, batch_size=batch_size, generate_kwargs=generate_kwargs, return_timestamps=timestamp_mode)
+        print(output)
+        print({"inputs": inputs, "model": model, "language": language, "batch_size": batch_size, "chunk_length_s": chunk_length_s, "stride_length_s": stride_length_s, "task": task, "timestamp_mode": timestamp_mode})
+        if not timestamp_mode:
+            text = output['text']
+            return text_output(inputs, text)
+        else:
+            chunks = output['chunks']
+            return subtitle_output(inputs, chunks)
+    except Exception as e:
+        error_message = str(e)
+        raise gr.Error(error_message, duration=10)
+def _download_yt_audio(yt_url, filename):
+    info_loader = youtube_dl.YoutubeDL()
+    try:
+        info = info_loader.extract_info(yt_url, download=False)
+    except youtube_dl.utils.DownloadError as err:
+        raise gr.Error(str(err))
+    file_length = info.get("duration_string")
+    if not file_length:
+        raise gr.Error("Video duration is unavailable.")
+    file_h_m_s = file_length.split(":")
+    file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
+    if len(file_h_m_s) == 1:
+        file_h_m_s.insert(0, 0)
+    if len(file_h_m_s) == 2:
+        file_h_m_s.insert(0, 0)
+    file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
+    if file_length_s > YT_LENGTH_LIMIT_S:
+        yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
+        file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
+        raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.", duration=10)
+    try:
+        ydl_opts = {
+            "outtmpl": filename,
+            "format": "bestaudio[ext=m4a]/best",
+        }
+        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([yt_url])
+    except youtube_dl.utils.ExtractorError as err:
+        available_formats = info_loader.extract_info(yt_url, download=False)['formats']
+        raise gr.Error(f"Requested format not available. Available formats: {available_formats}", duration=10)
+def _return_yt_video_id(yt_url):
+    if "https://www.youtube.com/watch?v=" in yt_url:
+        video_id = yt_url.split("?v=")[-1]
+    elif "https://youtu.be/" in yt_url:
+        video_id = yt_url.split("be/")[1]
+    return video_id
+def _return_yt_html_embed(yt_url):
+    video_id = _return_yt_video_id(yt_url)
+    HTML_str = (
+        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
+        " </center>"
+    )
+    return HTML_str
+def _return_yt_thumbnail(yt_url):
+    video_id = _return_yt_video_id(yt_url)
+    if not video_id:
+        raise ValueError("Invalid YouTube URL: Unable to extract video ID.")
+    thumbnail_url = f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg"
+    thumbnail_path = None
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.jpg') as temp_file:
+            response = requests.get(thumbnail_url)
+            if response.status_code == 200:
+                temp_file.write(response.content)
+                thumbnail_path = temp_file.name
+            else:
+                raise Exception(f"Failed to retrieve thumbnail. Status code: {response.status_code}")
+    except Exception as e:
+        print(f"Error occurred: {e}")
+        return None
+    return thumbnail_path
+def _return_yt_info(yt_url):
+    video_id = _return_yt_video_id(yt_url)
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            page = browser.new_page()
+            page.goto(yt_url)
+            page.wait_for_load_state("networkidle")
+            title = page.title()
+            description = page.query_selector("meta[name='description']").get_attribute("content")
+            keywords = page.query_selector("meta[name='keywords']").get_attribute("content")
+            gr_title = gr.Textbox(label="YouTube Title", visible=True, value=title)
+            gr_description = gr.Textbox(label="YouTube Description", visible=True, value=description)
+            gr_keywords = gr.Textbox(label="YouTube Keywords", visible=True, value=keywords)
+            browser.close()
+            return gr_title, gr_description, gr_keywords
+    except Exception as e:
+        print(e)
+        return gr.Textbox(visible=False), gr.Textbox(visible=False), gr.Textbox(visible=False)
+def return_youtube(yt_url):
+    html_embed_str = _return_yt_html_embed(yt_url)
+    thumbnail = _return_yt_thumbnail(yt_url)
+    gr_html = gr.HTML(label="Youtube Video", visible=True, value=html_embed_str)
+    gr_thumbnail = gr.Image(label="Youtube Thumbnail", visible=True, value=thumbnail)
+    gr_title, gr_description, gr_keywords = _return_yt_info(yt_url)
+    return gr_html, gr_thumbnail, gr_title, gr_description, gr_keywords
+@gpu_decorator(duration=SPACES_GPU_DURATION)
+def yt_transcribe(yt_url, model, language, batch_size, chunk_length_s, stride_length_s, task, timestamp_mode):
+    gr_html, gr_thumbnail, gr_title, gr_description, gr_keywords = return_youtube(yt_url)
+    try:
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            filepath = os.path.join(tmpdirname, "video.mp4")
+            _download_yt_audio(yt_url, filepath)
+            with open(filepath, "rb") as f:
+                inputs = f.read()
+        inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
+        inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
+        torch_dtype = torch.float16
+        model_gen = AutoModelForSpeechSeq2Seq.from_pretrained(
+            model, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
+        )
+        model_gen.to(device)
+        processor = AutoProcessor.from_pretrained(model)
+        tokenizer = WhisperTokenizer.from_pretrained(model)
+        pipe = pipeline(
+            task="automatic-speech-recognition",
+            model=model_gen,
+            chunk_length_s=chunk_length_s,
+            stride_length_s=stride_length_s,
+            tokenizer=tokenizer,
+            feature_extractor=processor.feature_extractor,
+            torch_dtype=torch_dtype,
+            model_kwargs={"attn_implementation": "flash_attention_2"},
+            device=device,
+        )
+        generate_kwargs = {}
+        if language != "Automatic Detection" and model.endswith(".en") == False:
+            generate_kwargs["language"] = language
+        if model.endswith(".en") == False:
+            generate_kwargs["task"] = task
+        output = pipe(inputs, batch_size=batch_size, generate_kwargs=generate_kwargs, return_timestamps=timestamp_mode)
+        print(output)
+        print({"inputs": yt_url, "model": model, "language": language, "batch_size": batch_size, "chunk_length_s": chunk_length_s, "stride_length_s": stride_length_s, "task": task, "timestamp_mode": timestamp_mode})
+        if not timestamp_mode:
+            text = output['text']
+            subtitle, files = text_output(inputs, text)
+        else:
+            chunks = output['chunks']
+            subtitle, files = subtitle_output(inputs, chunks)
+        return subtitle, files, gr_title, gr_html, gr_thumbnail, gr_description, gr_keywords
+    except Exception as e:
+        error_message = str(e)
+        gr.Warning(error_message, duration=10)
+        return gr.Textbox(visible=False),gr.Textbox(visible=False), gr_title, gr_html, gr_thumbnail, gr_description, gr_keywords
+demo = gr.Blocks()
+file_transcribe = gr.Interface(
+    fn=transcribe,
+    inputs=[
+        gr.Audio(sources=['upload', 'microphone'], type="filepath", label="Audio file"),
+        gr.Dropdown(
+            choices=[
+                "openai/whisper-tiny",
+                "openai/whisper-base",
+                "openai/whisper-small",
+                "openai/whisper-medium",
+                "openai/whisper-large",
+                "openai/whisper-large-v1",
+                "openai/whisper-large-v2", "distil-whisper/distil-large-v2",
+                "openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
+            ],
+            value="openai/whisper-large-v3-turbo",
+            label="Model Name",
+            allow_custom_value=True,
+        ),
+        gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", interactive = True,),
+        gr.Slider(label="Batch Size", minimum=1, maximum=32, value=16, step=1),
+        gr.Slider(label="Chunk Length (s)", minimum=1, maximum=60, value=17.5, step=0.1),
+        gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
+        gr.Dropdown(
+            choices=[True, False, "word"],
+            value=True,
+            label="Timestamp Mode"
+        ),
+    ],
+    outputs=[gr.Textbox(label="Output"), gr.File(label="Download Files")],
+    title="Whisper: Transcribe Audio",
+    flagging_mode="auto",
+)
+video_transcribe = gr.Interface(
+    fn=transcribe,
+    inputs=[
+        gr.Video(sources=["upload", "webcam"], label="Video file", show_label=False, show_download_button=False, show_share_button=False, streaming=True),
+        gr.Dropdown(
+            choices=[
+                "openai/whisper-tiny",
+                "openai/whisper-base",
+                "openai/whisper-small",
+                "openai/whisper-medium",
+                "openai/whisper-large",
+                "openai/whisper-large-v1",
+                "openai/whisper-large-v2", "distil-whisper/distil-large-v2",
+                "openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
+            ],
+            value="openai/whisper-large-v3-turbo",
+            label="Model Name",
+            allow_custom_value=True,
+        ),
+        gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", interactive = True,),
+        gr.Slider(label="Batch Size", minimum=1, maximum=32, value=16, step=1),
+        gr.Slider(label="Chunk Length (s)", minimum=1, maximum=60, value=17.5, step=0.1),
+        gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
+        gr.Dropdown(
+            choices=[True, False, "word"],
+            value=True,
+            label="Timestamp Mode"
+        ),
+    ],
+    outputs=[gr.Textbox(label="Output"), gr.File(label="Download Files")],
+    title="Whisper: Transcribe Video",
+    flagging_mode="auto",
+)
+yt_transcribe = gr.Interface(
+    fn=yt_transcribe,
+    inputs=[
+        gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
+        gr.Dropdown(
+            choices=[
+                "openai/whisper-tiny",
+                "openai/whisper-base",
+                "openai/whisper-small",
+                "openai/whisper-medium",
+                "openai/whisper-large",
+                "openai/whisper-large-v1",
+                "openai/whisper-large-v2", "distil-whisper/distil-large-v2",
+                "openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "distil-whisper/distil-large-v3", "xaviviro/whisper-large-v3-catalan-finetuned-v2",
+            ],
+            value="openai/whisper-large-v3-turbo",
+            label="Model Name",
+            allow_custom_value=True,
+        ),
+        gr.Dropdown(choices=["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language", interactive = True,),
+        gr.Slider(label="Batch Size", minimum=1, maximum=32, value=16, step=1),
+        gr.Slider(label="Chunk Length (s)", minimum=1, maximum=60, value=17.5, step=0.1),
+        gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
+        gr.Dropdown(
+            choices=[True, False, "word"],
+            value=True,
+            label="Timestamp Mode"
+        ),
+    ],
+    outputs=[
+            gr.Textbox(label="Output"),
+            gr.File(label="Download Files"),
+            gr.Textbox(label="Youtube Title"),
+            gr.HTML(label="Youtube Video"),
+            gr.Image(label="Youtube Thumbnail"),
+            gr.Textbox(label="Youtube Description"),
+            gr.Textbox(label="Youtube Keywords"),
+    ],
+    title="Whisper: Transcribe YouTube",
+    flagging_mode="auto",
+)
+with demo:
+    gr.TabbedInterface(
+        interface_list=[file_transcribe, video_transcribe, yt_transcribe],
+        tab_names=["Audio", "Video", "YouTube"]
+    )
+if __name__ == "__main__":
+    demo.queue().launch(ssr_mode=False)

languages.py ADDED Viewed

	@@ -0,0 +1,147 @@

+class Language():
+    def __init__(self, code, name):
+        self.code = code
+        self.name = name
+    def __str__(self):
+        return "Language(code={}, name={})".format(self.code, self.name)
+LANGUAGES = [
+    Language('en', 'English'),
+    Language('zh', 'Chinese'),
+    Language('de', 'German'),
+    Language('es', 'Spanish'),
+    Language('ru', 'Russian'),
+    Language('ko', 'Korean'),
+    Language('fr', 'French'),
+    Language('ja', 'Japanese'),
+    Language('pt', 'Portuguese'),
+    Language('tr', 'Turkish'),
+    Language('pl', 'Polish'),
+    Language('ca', 'Catalan'),
+    Language('nl', 'Dutch'),
+    Language('ar', 'Arabic'),
+    Language('sv', 'Swedish'),
+    Language('it', 'Italian'),
+    Language('id', 'Indonesian'),
+    Language('hi', 'Hindi'),
+    Language('fi', 'Finnish'),
+    Language('vi', 'Vietnamese'),
+    Language('he', 'Hebrew'),
+    Language('uk', 'Ukrainian'),
+    Language('el', 'Greek'),
+    Language('ms', 'Malay'),
+    Language('cs', 'Czech'),
+    Language('ro', 'Romanian'),
+    Language('da', 'Danish'),
+    Language('hu', 'Hungarian'),
+    Language('ta', 'Tamil'),
+    Language('no', 'Norwegian'),
+    Language('th', 'Thai'),
+    Language('ur', 'Urdu'),
+    Language('hr', 'Croatian'),
+    Language('bg', 'Bulgarian'),
+    Language('lt', 'Lithuanian'),
+    Language('la', 'Latin'),
+    Language('mi', 'Maori'),
+    Language('ml', 'Malayalam'),
+    Language('cy', 'Welsh'),
+    Language('sk', 'Slovak'),
+    Language('te', 'Telugu'),
+    Language('fa', 'Persian'),
+    Language('lv', 'Latvian'),
+    Language('bn', 'Bengali'),
+    Language('sr', 'Serbian'),
+    Language('az', 'Azerbaijani'),
+    Language('sl', 'Slovenian'),
+    Language('kn', 'Kannada'),
+    Language('et', 'Estonian'),
+    Language('mk', 'Macedonian'),
+    Language('br', 'Breton'),
+    Language('eu', 'Basque'),
+    Language('is', 'Icelandic'),
+    Language('hy', 'Armenian'),
+    Language('ne', 'Nepali'),
+    Language('mn', 'Mongolian'),
+    Language('bs', 'Bosnian'),
+    Language('kk', 'Kazakh'),
+    Language('sq', 'Albanian'),
+    Language('sw', 'Swahili'),
+    Language('gl', 'Galician'),
+    Language('mr', 'Marathi'),
+    Language('pa', 'Punjabi'),
+    Language('si', 'Sinhala'),
+    Language('km', 'Khmer'),
+    Language('sn', 'Shona'),
+    Language('yo', 'Yoruba'),
+    Language('so', 'Somali'),
+    Language('af', 'Afrikaans'),
+    Language('oc', 'Occitan'),
+    Language('ka', 'Georgian'),
+    Language('be', 'Belarusian'),
+    Language('tg', 'Tajik'),
+    Language('sd', 'Sindhi'),
+    Language('gu', 'Gujarati'),
+    Language('am', 'Amharic'),
+    Language('yi', 'Yiddish'),
+    Language('lo', 'Lao'),
+    Language('uz', 'Uzbek'),
+    Language('fo', 'Faroese'),
+    Language('ht', 'Haitian creole'),
+    Language('ps', 'Pashto'),
+    Language('tk', 'Turkmen'),
+    Language('nn', 'Nynorsk'),
+    Language('mt', 'Maltese'),
+    Language('sa', 'Sanskrit'),
+    Language('lb', 'Luxembourgish'),
+    Language('my', 'Myanmar'),
+    Language('bo', 'Tibetan'),
+    Language('tl', 'Tagalog'),
+    Language('mg', 'Malagasy'),
+    Language('as', 'Assamese'),
+    Language('tt', 'Tatar'),
+    Language('haw', 'Hawaiian'),
+    Language('ln', 'Lingala'),
+    Language('ha', 'Hausa'),
+    Language('ba', 'Bashkir'),
+    Language('jw', 'Javanese'),
+    Language('su', 'Sundanese')
+]
+_TO_LANGUAGE_CODE = {
+    **{language.code: language for language in LANGUAGES},
+    "burmese": "my",
+    "valencian": "ca",
+    "flemish": "nl",
+    "haitian": "ht",
+    "letzeburgesch": "lb",
+    "pushto": "ps",
+    "panjabi": "pa",
+    "moldavian": "ro",
+    "moldovan": "ro",
+    "sinhalese": "si",
+    "castilian": "es",
+}
+_FROM_LANGUAGE_NAME = {
+    **{language.name.lower(): language for language in LANGUAGES}
+}
+def get_language_from_code(language_code, default=None) -> Language:
+    """Return the language name from the language code."""
+    return _TO_LANGUAGE_CODE.get(language_code, default)
+def get_language_from_name(language, default=None) -> Language:
+    """Return the language code from the language name."""
+    return _FROM_LANGUAGE_NAME.get(language.lower() if language else None, default)
+def get_language_names():
+    """Return a list of language names."""
+    return [language.name for language in LANGUAGES]
+if __name__ == "__main__":
+    # Test lookup
+    print(get_language_from_code('en'))
+    print(get_language_from_name('English'))
+    print(get_language_names())

packages.txt ADDED Viewed

	@@ -0,0 +1,42 @@

+ffmpeg
+libnss3
+libnspr4
+libatk1.0-0
+libatk-bridge2.0-0
+libcups2
+libxcomposite1
+libxdamage1
+libxrandr2
+libgbm1
+libpango-1.0-0
+libpangocairo-1.0-0
+libasound2
+libxshmfence1
+libx11-xcb1
+libxext6
+libxtst6
+libxinerama1
+libwayland-client0
+libwayland-cursor0
+libwayland-egl1
+libdbus-1-3
+libatspi2.0-0
+libdrm2
+libgtk-3-0
+libgdk-pixbuf2.0-0
+libgstreamer1.0-0
+libwoff1
+libgstreamer-plugins-base1.0-0
+libgstreamer-gl1.0-0
+libharfbuzz-icu0
+libenchant-2-2
+libsecret-1-0
+libhyphen0
+libmanette-0.2-0
+libgles2
+libgstreamer1.0-0
+libgstreamer-plugins-base1.0-0
+gstreamer1.0-plugins-good
+gstreamer1.0-plugins-bad
+gstreamer1.0-plugins-ugly

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+transformers
+pydub
+yt-dlp
+accelerate
+playwright

subtitle.py ADDED Viewed

	@@ -0,0 +1,101 @@

+class Subtitle:
+    def __init__(self, ext="srt"):
+        sub_dict = {
+            "srt": {
+                "coma": ",",
+                "header": "",
+                "format": self._srt_format,
+            },
+            "vtt": {
+                "coma": ".",
+                "header": "WebVTT\n\n",
+                "format": self._vtt_format,
+            },
+            "txt": {
+                "coma": "",
+                "header": "",
+                "format": self._txt_format,
+            },
+            "lrc": {
+                "coma": "",
+                "header": "",
+                "format": self._lrc_format,
+            },
+        }
+        self.ext = ext
+        self.coma = sub_dict[ext]["coma"]
+        self.header = sub_dict[ext]["header"]
+        self.format_fn = sub_dict[ext]["format"]
+    def timeformat(self, time):
+        hours, remainder = divmod(time, 3600)
+        minutes, seconds = divmod(remainder, 60)
+        milliseconds = (time - int(time)) * 1000
+        return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}{self.coma}{int(milliseconds):03d}"
+    def seconds_to_lrc_timestamp(self, time):
+        minutes = int(time // 60)
+        secs = time % 60
+        return f"[{minutes:02}:{secs:06.3f}]"
+    def _srt_format(self, i, segment):
+        start_time = self.timeformat(segment['timestamp'][0])
+        end_time = self.timeformat(segment['timestamp'][1] if segment['timestamp'][1] else segment['timestamp'][0])
+        return f"{i + 1}\n{start_time} --> {end_time}\n{segment['text']}\n\n"
+    def _vtt_format(self, i, segment):
+        start_time = self.timeformat(segment['timestamp'][0])
+        end_time = self.timeformat(segment['timestamp'][1] if segment['timestamp'][1] else segment['timestamp'][0])
+        return f"{start_time} --> {end_time}\n{segment['text']}\n\n"
+    def _txt_format(self, i, segment):
+        return f"{segment['text']}\n"
+    def _lrc_format(self, i, segment):
+        start_time = self.seconds_to_lrc_timestamp(segment['timestamp'][0])
+        return f"{start_time}{segment['text']}\n"
+    def get_subtitle(self, segments):
+        output = self.header
+        for i, segment in enumerate(segments):
+            segment['text'] = segment['text'].lstrip()
+            try:
+                output += self.format_fn(i, segment)
+            except Exception as e:
+                print(e, segment)
+        return output
+    def write_subtitle(self, segments, output_file):
+        output_file_with_ext = f"{output_file}.{self.ext}"
+        subtitle = self.get_subtitle(segments)
+        with open(output_file_with_ext, 'w', encoding='utf-8') as f:
+            f.write(subtitle)
+def write_file(output_file,subtitle):
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(subtitle)
+def subtitle_output(inputs, chunks):
+    file_name = inputs.split('/')[-1].split('.')[0]
+    lrc_sub = Subtitle("lrc")
+    srt_sub = Subtitle("srt")
+    vtt_sub = Subtitle("vtt")
+    txt_sub = Subtitle("txt")
+    lrc = lrc_sub.get_subtitle(chunks)
+    srt = srt_sub.get_subtitle(chunks)
+    vtt = vtt_sub.get_subtitle(chunks)
+    txt = txt_sub.get_subtitle(chunks)
+    write_file(file_name+".lrc",lrc)
+    write_file(file_name+".srt",srt)
+    write_file(file_name+".vtt",vtt)
+    write_file(file_name+".txt",txt)
+    files_out = [file_name+".lrc", file_name+".srt", file_name+".vtt", file_name+".txt"]
+    return lrc, files_out
+def text_output(inputs, text):
+    file_name = inputs.split('/')[-1].split('.')[0]
+    write_file(file_name+".txt",text)
+    files_out = [file_name+".txt"]
+    return text, files_out