add video info to transcript
Browse files- handler.py +14 -4
handler.py
CHANGED
@@ -42,18 +42,28 @@ class EndpointHandler():
|
|
42 |
"verbose": True
|
43 |
}
|
44 |
yt = pytube.YouTube(video_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
stream = yt.streams.filter(only_audio=True)[0]
|
46 |
path_to_audio = f"{yt.video_id}.mp3"
|
47 |
stream.download(filename=path_to_audio)
|
48 |
t0 = time.time()
|
49 |
transcript = self.model.transcribe(path_to_audio, **decode_options)
|
50 |
t1 = time.time()
|
51 |
-
total = t1-t0
|
52 |
-
print(f'Finished transcription in {total} seconds')
|
53 |
for segment in transcript['segments']:
|
54 |
-
# Remove the tokens array, it
|
55 |
segment.pop('tokens', None)
|
56 |
|
|
|
|
|
57 |
|
58 |
# postprocess the prediction
|
59 |
-
return {"transcript": transcript}
|
|
|
42 |
"verbose": True
|
43 |
}
|
44 |
yt = pytube.YouTube(video_url)
|
45 |
+
video_info = {
|
46 |
+
'id': yt.video_id,
|
47 |
+
'thumbnail': yt.thumbnail_url,
|
48 |
+
'title': yt.title,
|
49 |
+
'views': yt.views,
|
50 |
+
'length': yt.length,
|
51 |
+
# Althhough, this might seem redundant since we already have id
|
52 |
+
# but it allows the link to the video be accessed in 1-click in the API response
|
53 |
+
'url': f"https://www.youtube.com/watch?v={yt.video_id}"
|
54 |
+
}
|
55 |
stream = yt.streams.filter(only_audio=True)[0]
|
56 |
path_to_audio = f"{yt.video_id}.mp3"
|
57 |
stream.download(filename=path_to_audio)
|
58 |
t0 = time.time()
|
59 |
transcript = self.model.transcribe(path_to_audio, **decode_options)
|
60 |
t1 = time.time()
|
|
|
|
|
61 |
for segment in transcript['segments']:
|
62 |
+
# Remove the tokens array, it makes the response too verbose
|
63 |
segment.pop('tokens', None)
|
64 |
|
65 |
+
total = t1-t0
|
66 |
+
print(f'Finished transcription in {total} seconds')
|
67 |
|
68 |
# postprocess the prediction
|
69 |
+
return {"transcript": transcript, 'video': video_info}
|