lanbogao commited on
Commit
43f5428
1 Parent(s): a6f578a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -12
app.py CHANGED
@@ -7,20 +7,22 @@ import yt_dlp
7
  langs = ["None"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
8
  model_size = list(whisper._MODELS.keys())
9
 
10
- def get_subtitles(url, langs=['en']):
11
  # Download subtitles if available
12
  ydl_opts = {
13
  'writesubtitles': True,
14
  'outtmpl': '%(id)s.%(ext)s',
15
- 'subtitleslangs': langs,
16
  'skip_download': True,
17
  }
18
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
19
- result = ydl.extract_info(url, download=False)
20
- subtitles = result.get("subtitles")
21
- if subtitles and len(subtitles):
22
- return subtitles
23
- return None
 
 
24
 
25
  def download_audio(video_url, quality: str = '128', speed: float = None):
26
  ydl_opts = {
@@ -52,10 +54,9 @@ def get_audio(url):
52
  return yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
53
 
54
  def get_transcript(url, model_size, lang, format):
55
- subtitles = get_subtitles(url, langs)
56
- if subtitles:
57
- print(subtitles)
58
- return subtitles.get(lang)
59
 
60
  model = whisper.load_model(model_size)
61
 
 
7
  langs = ["None"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
8
  model_size = list(whisper._MODELS.keys())
9
 
10
+ def get_subtitle(url, lang='en'):
11
  # Download subtitles if available
12
  ydl_opts = {
13
  'writesubtitles': True,
14
  'outtmpl': '%(id)s.%(ext)s',
15
+ 'subtitleslangs': [lang],
16
  'skip_download': True,
17
  }
18
+ info_dict = ydl.extract_info(url, download=True)
19
+ video_id = info_dict.get("id", None)
20
+ if video_id is None:
21
+ return None
22
+ subtitle_file = f"{video_id}.{lang}.vtt"
23
+ if not os.path.exists(subtitle_file):
24
+ return None
25
+ return subtitle_file
26
 
27
  def download_audio(video_url, quality: str = '128', speed: float = None):
28
  ydl_opts = {
 
54
  return yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
55
 
56
  def get_transcript(url, model_size, lang, format):
57
+ subtitle = get_subtitle(url, lang)
58
+ print(subtitle)
59
+ return subtitle
 
60
 
61
  model = whisper.load_model(model_size)
62