Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ from fastapi import FastAPI, Response, Request
|
|
5 |
import yt_dlp
|
6 |
import uvicorn
|
7 |
import re
|
|
|
8 |
|
9 |
CUSTOM_PATH = "/gradio"
|
10 |
|
@@ -23,6 +24,22 @@ def read_main():
|
|
23 |
# Stream the subtitle as a response
|
24 |
#return StreamingResponse(requests.get(subtitle_url, stream=True).iter_content(chunk_size=1024))
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
def get_subtitle(url, lang='en'):
|
28 |
if lang is None:
|
@@ -34,18 +51,24 @@ def get_subtitle(url, lang='en'):
|
|
34 |
'subtitleslangs': [lang],
|
35 |
'skip_download': True,
|
36 |
}
|
37 |
-
|
|
|
38 |
info_dict = ydl.extract_info(url, download=True)
|
39 |
video_id = info_dict.get("id", None)
|
40 |
if video_id is None:
|
41 |
return None
|
42 |
|
|
|
43 |
print(info_dict)
|
44 |
subtitle_file = f"{video_id}.{lang}.vtt"
|
45 |
with open(subtitle_file, 'r') as f:
|
46 |
subtitle_content = f.read()
|
47 |
subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
|
48 |
return subtitle_content
|
|
|
|
|
|
|
|
|
49 |
return None
|
50 |
|
51 |
def download_audio(video_url, quality: str = '128', speed: float = None):
|
@@ -80,15 +103,13 @@ def get_transcript(url, model_size, lang, format):
|
|
80 |
if lang == "None":
|
81 |
lang = None
|
82 |
|
83 |
-
subtitle =
|
84 |
print(subtitle)
|
85 |
if subtitle:
|
86 |
return subtitle
|
87 |
|
88 |
model = whisper.load_model(model_size)
|
89 |
|
90 |
-
|
91 |
-
|
92 |
result = model.transcribe(download_audio(url), fp16=False, language=lang)
|
93 |
|
94 |
if format == "None":
|
|
|
5 |
import yt_dlp
|
6 |
import uvicorn
|
7 |
import re
|
8 |
+
import os
|
9 |
|
10 |
CUSTOM_PATH = "/gradio"
|
11 |
|
|
|
24 |
# Stream the subtitle as a response
|
25 |
#return StreamingResponse(requests.get(subtitle_url, stream=True).iter_content(chunk_size=1024))
|
26 |
|
27 |
+
def download_subtitle(url: str, lang: Optional[str] = None) -> Optional[str]:
|
28 |
+
ydl_opts = {
|
29 |
+
"writesubtitles": True,
|
30 |
+
"allsubtitles": True,
|
31 |
+
"subtitleslangs": [lang] if lang else [],
|
32 |
+
"skip_download": True,
|
33 |
+
}
|
34 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
35 |
+
info_dict = ydl.extract_info(url, download=False)
|
36 |
+
if info_dict.get("subtitles"):
|
37 |
+
# get first available subtitle
|
38 |
+
subtitle_url = info_dict["subtitles"][0]["url"]
|
39 |
+
with ydl.urlopen(subtitle_url) as subtitle:
|
40 |
+
return subtitle.read().decode()
|
41 |
+
|
42 |
+
return None
|
43 |
|
44 |
def get_subtitle(url, lang='en'):
|
45 |
if lang is None:
|
|
|
51 |
'subtitleslangs': [lang],
|
52 |
'skip_download': True,
|
53 |
}
|
54 |
+
try:
|
55 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
56 |
info_dict = ydl.extract_info(url, download=True)
|
57 |
video_id = info_dict.get("id", None)
|
58 |
if video_id is None:
|
59 |
return None
|
60 |
|
61 |
+
print(os.list)
|
62 |
print(info_dict)
|
63 |
subtitle_file = f"{video_id}.{lang}.vtt"
|
64 |
with open(subtitle_file, 'r') as f:
|
65 |
subtitle_content = f.read()
|
66 |
subtitle_content = re.sub(r"<[^>]+>", "", subtitle_content)
|
67 |
return subtitle_content
|
68 |
+
except error:
|
69 |
+
print(error)
|
70 |
+
return None
|
71 |
+
|
72 |
return None
|
73 |
|
74 |
def download_audio(video_url, quality: str = '128', speed: float = None):
|
|
|
103 |
if lang == "None":
|
104 |
lang = None
|
105 |
|
106 |
+
subtitle = download_subtitle(url, lang)
|
107 |
print(subtitle)
|
108 |
if subtitle:
|
109 |
return subtitle
|
110 |
|
111 |
model = whisper.load_model(model_size)
|
112 |
|
|
|
|
|
113 |
result = model.transcribe(download_audio(url), fp16=False, language=lang)
|
114 |
|
115 |
if format == "None":
|