tomiwa1a commited on
Commit
97023c4
1 Parent(s): c8f6664

add video info to transcript

Browse files
Files changed (1) hide show
  1. handler.py +14 -4
handler.py CHANGED
@@ -42,18 +42,28 @@ class EndpointHandler():
42
  "verbose": True
43
  }
44
  yt = pytube.YouTube(video_url)
 
 
 
 
 
 
 
 
 
 
45
  stream = yt.streams.filter(only_audio=True)[0]
46
  path_to_audio = f"{yt.video_id}.mp3"
47
  stream.download(filename=path_to_audio)
48
  t0 = time.time()
49
  transcript = self.model.transcribe(path_to_audio, **decode_options)
50
  t1 = time.time()
51
- total = t1-t0
52
- print(f'Finished transcription in {total} seconds')
53
  for segment in transcript['segments']:
54
- # Remove the tokens array, it was making response too verbose
55
  segment.pop('tokens', None)
56
 
 
 
57
 
58
  # postprocess the prediction
59
- return {"transcript": transcript}
 
42
  "verbose": True
43
  }
44
  yt = pytube.YouTube(video_url)
45
+ video_info = {
46
+ 'id': yt.video_id,
47
+ 'thumbnail': yt.thumbnail_url,
48
+ 'title': yt.title,
49
+ 'views': yt.views,
50
+ 'length': yt.length,
51
+ # Althhough, this might seem redundant since we already have id
52
+ # but it allows the link to the video be accessed in 1-click in the API response
53
+ 'url': f"https://www.youtube.com/watch?v={yt.video_id}"
54
+ }
55
  stream = yt.streams.filter(only_audio=True)[0]
56
  path_to_audio = f"{yt.video_id}.mp3"
57
  stream.download(filename=path_to_audio)
58
  t0 = time.time()
59
  transcript = self.model.transcribe(path_to_audio, **decode_options)
60
  t1 = time.time()
 
 
61
  for segment in transcript['segments']:
62
+ # Remove the tokens array, it makes the response too verbose
63
  segment.pop('tokens', None)
64
 
65
+ total = t1-t0
66
+ print(f'Finished transcription in {total} seconds')
67
 
68
  # postprocess the prediction
69
+ return {"transcript": transcript, 'video': video_info}