ytdlp-whisper

Running

App Files Files Community

lanbogao commited on Apr 3, 2023

Commit

b1e39f9

•

1 Parent(s): 4962756

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -4

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from fastapi import FastAPI, Response, Request
 import yt_dlp
 import uvicorn
 import re
 CUSTOM_PATH = "/gradio"
@@ -23,6 +24,22 @@ def read_main():
     # Stream the subtitle as a response
     #return StreamingResponse(requests.get(subtitle_url, stream=True).iter_content(chunk_size=1024))
 def get_subtitle(url, lang='en'):
     if lang is None:
@@ -34,18 +51,24 @@ def get_subtitle(url, lang='en'):
         'subtitleslangs': [lang],
         'skip_download': True,
     }
-    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info_dict = ydl.extract_info(url, download=True)
         video_id = info_dict.get("id", None)
         if video_id is None:
             return None
         print(info_dict)
         subtitle_file = f"{video_id}.{lang}.vtt"
         with open(subtitle_file, 'r') as f:
             subtitle_content = f.read()
             subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
             return subtitle_content
     return None
 def download_audio(video_url, quality: str = '128', speed: float = None):
@@ -80,15 +103,13 @@ def get_transcript(url, model_size, lang, format):
     if lang == "None":
         lang = None
-    subtitle = get_subtitle(url, lang)
     print(subtitle)
     if subtitle:
         return subtitle
     model = whisper.load_model(model_size)
     result = model.transcribe(download_audio(url), fp16=False, language=lang)
     if format == "None":

 import yt_dlp
 import uvicorn
 import re
+import os
 CUSTOM_PATH = "/gradio"
     # Stream the subtitle as a response
     #return StreamingResponse(requests.get(subtitle_url, stream=True).iter_content(chunk_size=1024))
+def download_subtitle(url: str, lang: Optional[str] = None) -> Optional[str]:
+    ydl_opts = {
+        "writesubtitles": True,
+        "allsubtitles": True,
+        "subtitleslangs": [lang] if lang else [],
+        "skip_download": True,
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info_dict = ydl.extract_info(url, download=False)
+        if info_dict.get("subtitles"):
+            # get first available subtitle
+            subtitle_url = info_dict["subtitles"][0]["url"]
+            with ydl.urlopen(subtitle_url) as subtitle:
+                return subtitle.read().decode()
+    return None
 def get_subtitle(url, lang='en'):
     if lang is None:
         'subtitleslangs': [lang],
         'skip_download': True,
     }
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info_dict = ydl.extract_info(url, download=True)
         video_id = info_dict.get("id", None)
         if video_id is None:
             return None
+        print(os.list)
         print(info_dict)
         subtitle_file = f"{video_id}.{lang}.vtt"
         with open(subtitle_file, 'r') as f:
             subtitle_content = f.read()
             subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
             return subtitle_content
+    except error:
+        print(error)
+        return None
     return None
 def download_audio(video_url, quality: str = '128', speed: float = None):
     if lang == "None":
         lang = None
+    subtitle = download_subtitle(url, lang)
     print(subtitle)
     if subtitle:
         return subtitle
     model = whisper.load_model(model_size)
     result = model.transcribe(download_audio(url), fp16=False, language=lang)
     if format == "None":