Spaces:
Runtime error
Runtime error
Added UUID & Media Metadata
Browse files- helperfunctions.py +42 -3
- main.py +73 -107
- media_download.py +33 -2
- models.py +23 -0
- pytorch_test.py +0 -4
helperfunctions.py
CHANGED
@@ -1,7 +1,29 @@
|
|
1 |
import os
|
|
|
|
|
|
|
2 |
import subprocess
|
3 |
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
def extract_audio(video_path):
|
6 |
"""
|
7 |
Extract audio from a video file (MP4 or WebM) and save it as an MP3 file using ffmpeg.
|
@@ -62,7 +84,6 @@ def convert_to_srt_time_format(seconds):
|
|
62 |
'''
|
63 |
Converts seconds into .srt format
|
64 |
'''
|
65 |
-
|
66 |
try:
|
67 |
hours = seconds // 3600
|
68 |
seconds %= 3600
|
@@ -78,7 +99,6 @@ def save_translated_subtitles(subtitles, media_path):
|
|
78 |
'''
|
79 |
Saves the translated subtitles into .srt file
|
80 |
'''
|
81 |
-
|
82 |
try:
|
83 |
# Converting to SRT Format
|
84 |
srt_content = ""
|
@@ -103,4 +123,23 @@ def save_translated_subtitles(subtitles, media_path):
|
|
103 |
return subtitles_filename
|
104 |
|
105 |
except Exception as e:
|
106 |
-
print(f"Error in save_translated_subtitles: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import time
|
3 |
+
import uuid
|
4 |
+
import hashlib
|
5 |
import subprocess
|
6 |
|
7 |
|
8 |
+
def generate_uuid(user_ip, url):
|
9 |
+
"""
|
10 |
+
Generates a uuid based on User IP, URL & timestamp
|
11 |
+
"""
|
12 |
+
# Converting User IP to bytes and Hashing it using SHA-1
|
13 |
+
user_ip_bytes = user_ip.encode('utf-8')
|
14 |
+
hashed_user_ip = hashlib.sha1(user_ip_bytes).hexdigest()
|
15 |
+
|
16 |
+
# Converting URL to bytes and Hashing it using SHA-1
|
17 |
+
url_bytes = url.encode('utf-8')
|
18 |
+
hashed_url = hashlib.sha1(url_bytes).hexdigest()
|
19 |
+
|
20 |
+
# Combining Hashed User IP, URL and Current Timestamp to Create a Unique Identifier
|
21 |
+
unique_id = f"{hashed_user_ip}-{hashed_url}-{int(time.time())}"
|
22 |
+
|
23 |
+
# Generate a UUID from the Unique Identifier
|
24 |
+
uuid_value = uuid.uuid5(uuid.NAMESPACE_URL, unique_id)
|
25 |
+
return str(uuid_value)
|
26 |
+
|
27 |
def extract_audio(video_path):
|
28 |
"""
|
29 |
Extract audio from a video file (MP4 or WebM) and save it as an MP3 file using ffmpeg.
|
|
|
84 |
'''
|
85 |
Converts seconds into .srt format
|
86 |
'''
|
|
|
87 |
try:
|
88 |
hours = seconds // 3600
|
89 |
seconds %= 3600
|
|
|
99 |
'''
|
100 |
Saves the translated subtitles into .srt file
|
101 |
'''
|
|
|
102 |
try:
|
103 |
# Converting to SRT Format
|
104 |
srt_content = ""
|
|
|
123 |
return subtitles_filename
|
124 |
|
125 |
except Exception as e:
|
126 |
+
print(f"Error in save_translated_subtitles: {e}")
|
127 |
+
|
128 |
+
def convert_audio(input_file, audio_format, audio_quality):
|
129 |
+
'''
|
130 |
+
Converts the audio according to the given audio parameters
|
131 |
+
'''
|
132 |
+
try:
|
133 |
+
# Getting Output File Path
|
134 |
+
output_file = f"{input_file.rsplit('.', 1)[0]}_converted.{audio_format}"
|
135 |
+
|
136 |
+
# Running the command using Subprocess
|
137 |
+
command = [
|
138 |
+
'ffmpeg', '-i', input_file,
|
139 |
+
'-b:a', audio_quality[:-3], output_file,
|
140 |
+
'-loglevel', 'quiet'
|
141 |
+
]
|
142 |
+
subprocess.run(command)
|
143 |
+
|
144 |
+
except Exception as e:
|
145 |
+
print(f"Error in convert_audio: {e}")
|
main.py
CHANGED
@@ -1,22 +1,44 @@
|
|
1 |
import os
|
2 |
import json
|
3 |
|
4 |
-
# import pytorch_test
|
5 |
-
|
6 |
import uvicorn
|
7 |
from fastapi import FastAPI, Request
|
8 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
9 |
|
|
|
|
|
10 |
from media_download import YoutubeDownloader
|
11 |
# from transcription import StableWhisper
|
12 |
# from summarizer import Extract_Summary, AudioBookNarration
|
13 |
# from audiobook import AudioBook
|
14 |
-
|
|
|
15 |
|
16 |
|
17 |
### API Configurations
|
18 |
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
# Output Directory for Files Storage
|
22 |
output_folder = 'Output'
|
@@ -44,9 +66,9 @@ app.add_middleware(
|
|
44 |
@app.get("/get_media_metadata")
|
45 |
async def get_media_metadata(request: Request, url: str):
|
46 |
|
47 |
-
# Getting User's IP
|
48 |
-
|
49 |
-
|
50 |
|
51 |
# Getting User's Youtube Downloader
|
52 |
youtube_downloader = YoutubeDownloader(url, output_folder)
|
@@ -59,67 +81,55 @@ async def get_media_metadata(request: Request, url: str):
|
|
59 |
|
60 |
if status:
|
61 |
# Storing Info in the context for this user's session
|
62 |
-
users_context[
|
63 |
-
users_context[
|
64 |
-
# users_context[
|
65 |
-
users_context[
|
66 |
|
67 |
-
return {'status': status, 'media_metadata': media_metadata}
|
68 |
|
69 |
|
70 |
@app.get("/get_media_formats")
|
71 |
-
async def get_media_formats(
|
72 |
-
|
73 |
-
# Getting User's IP
|
74 |
-
# user_ip = request.client.host
|
75 |
-
user_ip = 1
|
76 |
|
77 |
# Downloading Media for User
|
78 |
-
media_formats = users_context[
|
79 |
|
80 |
# Getting Status
|
81 |
status = 1 if media_formats else 0
|
82 |
|
83 |
if status:
|
84 |
# Storing Media Info in the context for this user's session
|
85 |
-
users_context[
|
86 |
|
87 |
return {'status': status, 'media_formats': media_formats}
|
88 |
|
89 |
|
90 |
@app.get("/download_media")
|
91 |
-
async def download_media(
|
92 |
-
|
93 |
-
# Getting User's IP
|
94 |
-
# user_ip = request.client.host
|
95 |
-
user_ip = 1
|
96 |
|
97 |
# Downloading Media for User
|
98 |
-
media_path = users_context[
|
99 |
|
100 |
# Getting Status
|
101 |
status = 1 if media_path else 0
|
102 |
|
103 |
if status:
|
104 |
# Storing Media Info in the context for this user's session
|
105 |
-
users_context[
|
106 |
-
users_context[
|
107 |
|
108 |
return {'status': status, 'media_path': media_path}
|
109 |
|
110 |
|
111 |
@app.get("/get_transcript")
|
112 |
-
async def get_transcript(
|
113 |
-
|
114 |
-
# Getting User's IP
|
115 |
-
# user_ip = request.client.host
|
116 |
-
user_ip = 1
|
117 |
|
118 |
# Retrieving the media_path from the context for this user's session
|
119 |
-
media_path = users_context[
|
120 |
|
121 |
# Checking if the media_type is Video, then extract it's audio
|
122 |
-
media_type = users_context[
|
123 |
if media_type == 'video':
|
124 |
media_path = extract_audio(media_path)
|
125 |
|
@@ -144,21 +154,17 @@ async def get_transcript(request: Request, subtitle_format: str = 'srt', word_le
|
|
144 |
|
145 |
if status:
|
146 |
# Storing Transcript Info in the context for this user's session
|
147 |
-
users_context[
|
148 |
-
users_context[
|
149 |
|
150 |
return {'status': status, "transcript": transcript}
|
151 |
|
152 |
|
153 |
@app.get("/get_translation")
|
154 |
-
async def get_translation(
|
155 |
-
|
156 |
-
# Getting User's IP
|
157 |
-
# user_ip = request.client.host
|
158 |
-
user_ip = 1
|
159 |
|
160 |
# Retrieving the transcript from the context for this user's session
|
161 |
-
transcript = users_context[
|
162 |
|
163 |
# # # NLLB based Translation
|
164 |
# nllb_translator = Translation(transcript, transcript['language'], target_language, 'output_path')
|
@@ -180,24 +186,20 @@ async def get_translation(request: Request, target_language: str = 'en'):
|
|
180 |
|
181 |
if status:
|
182 |
# Storing Translated Transcript Info in the context for this user's session
|
183 |
-
users_context[
|
184 |
-
users_context[
|
185 |
-
# users_context[
|
186 |
|
187 |
return {'status': status, "transcript": translated_transcript, "subtitles": translated_subtitles}
|
188 |
|
189 |
|
190 |
@app.get("/get_summary")
|
191 |
-
async def get_summary(
|
192 |
Response_length: str, Writing_style: str, text_input: str = None):
|
193 |
-
|
194 |
-
# Getting User's IP
|
195 |
-
# user_ip = request.client.host
|
196 |
-
user_ip = 1
|
197 |
|
198 |
# Getting Transcript if not provided
|
199 |
if not text_input:
|
200 |
-
text_input = users_context[
|
201 |
|
202 |
# # Extracting Summary
|
203 |
# summary_extractor = Extract_Summary(text_input=text_input)
|
@@ -219,22 +221,18 @@ async def get_summary(request: Request, Summary_type: str, Summary_strategy: str
|
|
219 |
|
220 |
if status:
|
221 |
# Storing Summary Info in the context for this user's session
|
222 |
-
users_context[
|
223 |
|
224 |
return {'status': status, "summary": output}
|
225 |
|
226 |
|
227 |
@app.get("/get_key_info")
|
228 |
-
async def get_key_info(
|
229 |
Response_length: str, Writing_style: str, text_input: str = None):
|
230 |
-
|
231 |
-
# Getting User's IP
|
232 |
-
# user_ip = request.client.host
|
233 |
-
user_ip = 1
|
234 |
|
235 |
# Getting Transcript if not provided
|
236 |
if not text_input:
|
237 |
-
text_input = users_context[
|
238 |
|
239 |
# # Extracting Summary
|
240 |
# summary_extractor = Extract_Summary(text_input=text_input)
|
@@ -256,52 +254,18 @@ async def get_key_info(request: Request, Summary_type: str, Summary_strategy: st
|
|
256 |
|
257 |
if status:
|
258 |
# Storing Key Info in the context for this user's session
|
259 |
-
users_context[
|
260 |
|
261 |
return {'status': status, "key_info": output}
|
262 |
|
263 |
|
264 |
-
# @app.get("/get_narration")
|
265 |
-
# async def get_narration(request: Request, narration_style: str, text_input: str = None):
|
266 |
-
|
267 |
-
# # Getting User's IP
|
268 |
-
# # user_ip = request.client.host
|
269 |
-
# user_ip = 1
|
270 |
-
|
271 |
-
# # Getting Transcript if not provided
|
272 |
-
# if not text_input:
|
273 |
-
# text_input = users_context[user_ip]['transcript']
|
274 |
-
|
275 |
-
# # # Extracting Narration
|
276 |
-
# # narrator = AudioBookNarration(text_input=text_input)
|
277 |
-
# # output = narrator.define_chain(narration_style=narration_style)
|
278 |
-
|
279 |
-
# temp_dir = 'temp'
|
280 |
-
# file_path = os.path.join(temp_dir, 'narration.txt')
|
281 |
-
# with open(file_path, 'r') as file:
|
282 |
-
# output = file.read()
|
283 |
-
|
284 |
-
|
285 |
-
# # Getting Status
|
286 |
-
# status = 1 if output else 0
|
287 |
-
|
288 |
-
# if status:
|
289 |
-
# # Storing Narration Info in the context for this user's session
|
290 |
-
# users_context[user_ip]['narration'] = output
|
291 |
-
|
292 |
-
# return {'status': status, "narration": output}
|
293 |
-
|
294 |
-
|
295 |
@app.get("/get_audiobook")
|
296 |
-
async def get_audiobook(
|
297 |
-
|
298 |
-
# Getting User's IP
|
299 |
-
# user_ip = request.client.host
|
300 |
-
user_ip = 1
|
301 |
|
302 |
# Getting Transcript if not provided
|
303 |
if not text_input:
|
304 |
-
text_input = users_context[
|
305 |
|
306 |
# # Extracting Narration
|
307 |
|
@@ -312,6 +276,9 @@ async def get_audiobook(request: Request, output_type : str, narration_style: st
|
|
312 |
# audiobook = AudioBook(output_folder=output_folder)
|
313 |
# audio_path = audiobook.generate_audio_from_text(output, speaker=speaker, filename="output_audio")
|
314 |
|
|
|
|
|
|
|
315 |
temp_dir = 'temp'
|
316 |
file_path = os.path.join(temp_dir, 'narration.txt')
|
317 |
|
@@ -322,29 +289,28 @@ async def get_audiobook(request: Request, output_type : str, narration_style: st
|
|
322 |
|
323 |
if status:
|
324 |
# Storing Audiobook path in the context for this user's session
|
325 |
-
users_context[
|
326 |
|
327 |
return {'status': status, "audiobook_path": audio_path}
|
328 |
|
329 |
|
330 |
@app.get("/get_rendered_video")
|
331 |
-
async def get_rendered_video(
|
332 |
|
333 |
-
#
|
334 |
-
#
|
335 |
-
user_ip = 1
|
336 |
|
337 |
-
#
|
338 |
-
media_path = users_context[
|
339 |
|
340 |
# Getting Required Subtitles
|
341 |
-
if subtitles_type == 'original':
|
342 |
-
subtitles_path = users_context[
|
343 |
|
344 |
-
elif subtitles_type == 'translated':
|
345 |
|
346 |
# Getting Translated Subtitles from the context for this user's session
|
347 |
-
translated_subtitles = users_context[
|
348 |
|
349 |
# Saving Translated Subtitles
|
350 |
subtitles_path = save_translated_subtitles(translated_subtitles, media_path)
|
|
|
1 |
import os
|
2 |
import json
|
3 |
|
|
|
|
|
4 |
import uvicorn
|
5 |
from fastapi import FastAPI, Request
|
6 |
from fastapi.middleware.cors import CORSMiddleware
|
7 |
+
from contextlib import asynccontextmanager
|
8 |
|
9 |
+
from models import load_models
|
10 |
+
from helperfunctions import *
|
11 |
from media_download import YoutubeDownloader
|
12 |
# from transcription import StableWhisper
|
13 |
# from summarizer import Extract_Summary, AudioBookNarration
|
14 |
# from audiobook import AudioBook
|
15 |
+
|
16 |
+
global MODELS
|
17 |
|
18 |
|
19 |
### API Configurations
|
20 |
|
21 |
+
# Context Manager for FastAPI Start/Shutdown
|
22 |
+
@asynccontextmanager
|
23 |
+
async def lifespan(app: FastAPI):
|
24 |
+
|
25 |
+
## FastAPI Startup Code
|
26 |
+
|
27 |
+
# TODO
|
28 |
+
# Loading ML models
|
29 |
+
print('Loading ML Models..')
|
30 |
+
MODELS = load_models()
|
31 |
+
print('ML Models Loaded!')
|
32 |
+
|
33 |
+
yield
|
34 |
+
|
35 |
+
## FastAPI Shutdown Code
|
36 |
+
|
37 |
+
# Cleaning ML Models & Releasing the Resources
|
38 |
+
MODELS.clear()
|
39 |
+
|
40 |
+
# Initializing FastAPI App
|
41 |
+
app = FastAPI(lifespan=lifespan)
|
42 |
|
43 |
# Output Directory for Files Storage
|
44 |
output_folder = 'Output'
|
|
|
66 |
@app.get("/get_media_metadata")
|
67 |
async def get_media_metadata(request: Request, url: str):
|
68 |
|
69 |
+
# Getting User's IP & Generating UUID
|
70 |
+
user_ip = request.client.host
|
71 |
+
user_id = generate_uuid(user_ip, url)
|
72 |
|
73 |
# Getting User's Youtube Downloader
|
74 |
youtube_downloader = YoutubeDownloader(url, output_folder)
|
|
|
81 |
|
82 |
if status:
|
83 |
# Storing Info in the context for this user's session
|
84 |
+
users_context[user_id] = dict()
|
85 |
+
users_context[user_id]['downloader'] = youtube_downloader
|
86 |
+
# users_context[user_id]['media_metadata'] = media_metadata
|
87 |
+
users_context[user_id]['url'] = url
|
88 |
|
89 |
+
return {'status': status, 'user_id': user_id, 'media_metadata': media_metadata}
|
90 |
|
91 |
|
92 |
@app.get("/get_media_formats")
|
93 |
+
async def get_media_formats(user_id: str):
|
|
|
|
|
|
|
|
|
94 |
|
95 |
# Downloading Media for User
|
96 |
+
media_formats = users_context[user_id]['downloader'].get_media_formats()
|
97 |
|
98 |
# Getting Status
|
99 |
status = 1 if media_formats else 0
|
100 |
|
101 |
if status:
|
102 |
# Storing Media Info in the context for this user's session
|
103 |
+
users_context[user_id]['media_formats'] = media_formats
|
104 |
|
105 |
return {'status': status, 'media_formats': media_formats}
|
106 |
|
107 |
|
108 |
@app.get("/download_media")
|
109 |
+
async def download_media(user_id: str, media_type: str, media_format: str, media_quality: str):
|
|
|
|
|
|
|
|
|
110 |
|
111 |
# Downloading Media for User
|
112 |
+
media_path = users_context[user_id]['downloader'].download(media_type, media_format, media_quality)
|
113 |
|
114 |
# Getting Status
|
115 |
status = 1 if media_path else 0
|
116 |
|
117 |
if status:
|
118 |
# Storing Media Info in the context for this user's session
|
119 |
+
users_context[user_id]['media_path'] = media_path
|
120 |
+
users_context[user_id]['media_type'] = media_type
|
121 |
|
122 |
return {'status': status, 'media_path': media_path}
|
123 |
|
124 |
|
125 |
@app.get("/get_transcript")
|
126 |
+
async def get_transcript(user_id: str, subtitle_format: str = 'srt', word_level: bool = False):
|
|
|
|
|
|
|
|
|
127 |
|
128 |
# Retrieving the media_path from the context for this user's session
|
129 |
+
media_path = users_context[user_id]['media_path']
|
130 |
|
131 |
# Checking if the media_type is Video, then extract it's audio
|
132 |
+
media_type = users_context[user_id]['media_type']
|
133 |
if media_type == 'video':
|
134 |
media_path = extract_audio(media_path)
|
135 |
|
|
|
154 |
|
155 |
if status:
|
156 |
# Storing Transcript Info in the context for this user's session
|
157 |
+
users_context[user_id]['transcript'] = transcript
|
158 |
+
users_context[user_id]['transcript_path'] = transcript_path
|
159 |
|
160 |
return {'status': status, "transcript": transcript}
|
161 |
|
162 |
|
163 |
@app.get("/get_translation")
|
164 |
+
async def get_translation(user_id: str, target_language: str = 'en'):
|
|
|
|
|
|
|
|
|
165 |
|
166 |
# Retrieving the transcript from the context for this user's session
|
167 |
+
transcript = users_context[user_id]['transcript']
|
168 |
|
169 |
# # # NLLB based Translation
|
170 |
# nllb_translator = Translation(transcript, transcript['language'], target_language, 'output_path')
|
|
|
186 |
|
187 |
if status:
|
188 |
# Storing Translated Transcript Info in the context for this user's session
|
189 |
+
users_context[user_id]['translated_transcript'] = translated_transcript
|
190 |
+
users_context[user_id]['translated_subtitles'] = translated_subtitles
|
191 |
+
# users_context[user_id]['transcript_path'] = transcript_path
|
192 |
|
193 |
return {'status': status, "transcript": translated_transcript, "subtitles": translated_subtitles}
|
194 |
|
195 |
|
196 |
@app.get("/get_summary")
|
197 |
+
async def get_summary(user_id: str, Summary_type: str, Summary_strategy: str, Target_Person_type: str,
|
198 |
Response_length: str, Writing_style: str, text_input: str = None):
|
|
|
|
|
|
|
|
|
199 |
|
200 |
# Getting Transcript if not provided
|
201 |
if not text_input:
|
202 |
+
text_input = users_context[user_id]['transcript']
|
203 |
|
204 |
# # Extracting Summary
|
205 |
# summary_extractor = Extract_Summary(text_input=text_input)
|
|
|
221 |
|
222 |
if status:
|
223 |
# Storing Summary Info in the context for this user's session
|
224 |
+
users_context[user_id]['summary'] = output
|
225 |
|
226 |
return {'status': status, "summary": output}
|
227 |
|
228 |
|
229 |
@app.get("/get_key_info")
|
230 |
+
async def get_key_info(user_id: str, Summary_type: str, Summary_strategy: str, Target_Person_type: str,
|
231 |
Response_length: str, Writing_style: str, text_input: str = None):
|
|
|
|
|
|
|
|
|
232 |
|
233 |
# Getting Transcript if not provided
|
234 |
if not text_input:
|
235 |
+
text_input = users_context[user_id]['transcript']
|
236 |
|
237 |
# # Extracting Summary
|
238 |
# summary_extractor = Extract_Summary(text_input=text_input)
|
|
|
254 |
|
255 |
if status:
|
256 |
# Storing Key Info in the context for this user's session
|
257 |
+
users_context[user_id]['key_info'] = output
|
258 |
|
259 |
return {'status': status, "key_info": output}
|
260 |
|
261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
@app.get("/get_audiobook")
|
263 |
+
async def get_audiobook(user_id: str, narration_style: str, speaker: str = "male", text_input: str = None,
|
264 |
+
audio_format: str = "mp3", audio_quality: str = "128kbps"):
|
|
|
|
|
|
|
265 |
|
266 |
# Getting Transcript if not provided
|
267 |
if not text_input:
|
268 |
+
text_input = users_context[user_id]['transcript']
|
269 |
|
270 |
# # Extracting Narration
|
271 |
|
|
|
276 |
# audiobook = AudioBook(output_folder=output_folder)
|
277 |
# audio_path = audiobook.generate_audio_from_text(output, speaker=speaker, filename="output_audio")
|
278 |
|
279 |
+
# # Converting the Audio to Required Audio Parameters
|
280 |
+
# audio_path = convert_audio(audio_path, audio_format, audio_quality)
|
281 |
+
|
282 |
temp_dir = 'temp'
|
283 |
file_path = os.path.join(temp_dir, 'narration.txt')
|
284 |
|
|
|
289 |
|
290 |
if status:
|
291 |
# Storing Audiobook path in the context for this user's session
|
292 |
+
users_context[user_id]['audiobook_path'] = audio_path
|
293 |
|
294 |
return {'status': status, "audiobook_path": audio_path}
|
295 |
|
296 |
|
297 |
@app.get("/get_rendered_video")
|
298 |
+
async def get_rendered_video(user_id: str, video_format: str, video_quality: str, subtitles_type: str = 'original'):
|
299 |
|
300 |
+
# # Retrieving the media_path from the context for this user's session
|
301 |
+
# media_path = users_context[user_id]['media_path']
|
|
|
302 |
|
303 |
+
# Downloading Video with Required Video Parameters for User
|
304 |
+
media_path = users_context[user_id]['downloader'].download('video', video_format, video_quality)
|
305 |
|
306 |
# Getting Required Subtitles
|
307 |
+
if subtitles_type.lower() == 'original':
|
308 |
+
subtitles_path = users_context[user_id]['transcript_path']
|
309 |
|
310 |
+
elif subtitles_type.lower() == 'translated':
|
311 |
|
312 |
# Getting Translated Subtitles from the context for this user's session
|
313 |
+
translated_subtitles = users_context[user_id]['translated_subtitles']
|
314 |
|
315 |
# Saving Translated Subtitles
|
316 |
subtitles_path = save_translated_subtitles(translated_subtitles, media_path)
|
media_download.py
CHANGED
@@ -2,7 +2,9 @@ import os
|
|
2 |
import re
|
3 |
import json
|
4 |
import time
|
|
|
5 |
import subprocess
|
|
|
6 |
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
@@ -71,6 +73,7 @@ class YoutubeDownloader(MediaDownloader):
|
|
71 |
self.thumbnail_url = self.youtube.thumbnail_url
|
72 |
self.streams = self.youtube.streams
|
73 |
self.streams_df, self.media_formats_dict = self._get_supported_media_formats()
|
|
|
74 |
|
75 |
def get_media_formats(self):
|
76 |
'''
|
@@ -78,17 +81,45 @@ class YoutubeDownloader(MediaDownloader):
|
|
78 |
'''
|
79 |
return self.media_formats_dict
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
def get_media_metadata(self):
|
82 |
'''
|
83 |
Returns a dictionary for media metadata
|
84 |
'''
|
85 |
media_info = {
|
86 |
-
'title': self.title,
|
87 |
-
'
|
|
|
|
|
88 |
'thumbnail_url': self.thumbnail_url
|
89 |
}
|
90 |
return media_info
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
@staticmethod
|
93 |
def __get_quality_int(media_quality):
|
94 |
'''
|
|
|
2 |
import re
|
3 |
import json
|
4 |
import time
|
5 |
+
import locale
|
6 |
import subprocess
|
7 |
+
from yt_dlp import YoutubeDL
|
8 |
|
9 |
import numpy as np
|
10 |
import pandas as pd
|
|
|
73 |
self.thumbnail_url = self.youtube.thumbnail_url
|
74 |
self.streams = self.youtube.streams
|
75 |
self.streams_df, self.media_formats_dict = self._get_supported_media_formats()
|
76 |
+
self.num_likes, self.num_views = self._get_num_likes_views()
|
77 |
|
78 |
def get_media_formats(self):
|
79 |
'''
|
|
|
81 |
'''
|
82 |
return self.media_formats_dict
|
83 |
|
84 |
+
def _get_num_likes_views(self):
|
85 |
+
'''
|
86 |
+
Returns the number of likes & views in the video
|
87 |
+
'''
|
88 |
+
|
89 |
+
with YoutubeDL() as ydl:
|
90 |
+
info = ydl.extract_info(self.url, download=False)
|
91 |
+
num_likes = info.get('like_count', None)
|
92 |
+
num_views = info.get('view_count', None)
|
93 |
+
# num_comments = info.get('comment_count', None)
|
94 |
+
|
95 |
+
return num_likes, num_views
|
96 |
+
|
97 |
def get_media_metadata(self):
|
98 |
'''
|
99 |
Returns a dictionary for media metadata
|
100 |
'''
|
101 |
media_info = {
|
102 |
+
'title': self.title,
|
103 |
+
'num_likes': self.__format_number(self.num_likes),
|
104 |
+
'num_views': self.__format_number(self.num_views),
|
105 |
+
'media_length': self.media_length,
|
106 |
'thumbnail_url': self.thumbnail_url
|
107 |
}
|
108 |
return media_info
|
109 |
|
110 |
+
@staticmethod
|
111 |
+
def __format_number(num):
|
112 |
+
'''
|
113 |
+
Returns the formatted number
|
114 |
+
E.g: Given input 123456789, it returns 123,456,789
|
115 |
+
'''
|
116 |
+
# Setting the Locale
|
117 |
+
locale.setlocale(locale.LC_ALL, '')
|
118 |
+
|
119 |
+
# Formatting the Number with Commas
|
120 |
+
num = locale.format_string("%d", num, grouping=True)
|
121 |
+
return num
|
122 |
+
|
123 |
@staticmethod
|
124 |
def __get_quality_int(media_quality):
|
125 |
'''
|
models.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import torch
|
4 |
+
|
5 |
+
|
6 |
+
def load_models():
|
7 |
+
'''
|
8 |
+
Checks CUDA availability & loads models
|
9 |
+
'''
|
10 |
+
try:
|
11 |
+
print(f"CUDA Available: {torch.cuda.is_available()}")
|
12 |
+
print(f"CUDA Device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
13 |
+
|
14 |
+
# TODO: Load Models Code Here (Refactor Exisitng Models Loading)
|
15 |
+
MODELS = {}
|
16 |
+
return MODELS
|
17 |
+
|
18 |
+
except KeyboardInterrupt:
|
19 |
+
print('Interrupted')
|
20 |
+
try:
|
21 |
+
sys.exit(0)
|
22 |
+
except SystemExit:
|
23 |
+
os._exit(0)
|
pytorch_test.py
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
|
3 |
-
print(f"CUDA available: {torch.cuda.is_available()}")
|
4 |
-
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
|
|
|
|
|
|
|
|
|