ChandimaPrabath commited on
Commit
836f75b
·
1 Parent(s): 15d97c1

0.0.0.1 Alpha

Browse files
Files changed (9) hide show
  1. .gitignore +10 -0
  2. LoadBalancer.py +302 -0
  3. api.py +63 -0
  4. app.py +18 -1
  5. hf_scrapper.py +42 -0
  6. indexer.py +33 -0
  7. requirements.txt +1 -0
  8. tvdb.py +91 -0
  9. utils.py +64 -0
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #.env
2
+ .env
3
+ # cache
4
+ tmp
5
+ # pycache
6
+ __pycache__
7
+ # stream-test.py
8
+ stream-test.py
9
+ #test
10
+ test.py
LoadBalancer.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from indexer import indexer
4
+ import re
5
+ from tvdb import fetch_and_cache_json
6
+ from threading import Event, Thread
7
+ import time
8
+ import logging
9
+ from utils import convert_to_gb
10
+ from api import InstancesAPI
11
+
12
+ CACHE_DIR = os.getenv("CACHE_DIR")
13
+
14
+ download_progress = {}
15
+
16
+ class LoadBalancer:
17
+ def __init__(self, cache_dir, token, repo, polling_interval=4, max_retries=3, initial_delay=1):
18
+ self.version = "0.0.0.1 Alpha"
19
+ self.instances = []
20
+ self.instances_health = {}
21
+ self.polling_interval = polling_interval
22
+ self.max_retries = max_retries
23
+ self.initial_delay = initial_delay
24
+ self.stop_event = Event()
25
+ self.instances_api = InstancesAPI(self.instances)
26
+ self.CACHE_DIR = cache_dir
27
+ self.TOKEN = token
28
+ self.REPO = repo
29
+ self.FILM_STORE = {}
30
+ self.TV_STORE = {}
31
+ self.file_structure = None
32
+
33
+ # Ensure CACHE_DIR exists
34
+ if not os.path.exists(self.CACHE_DIR):
35
+ os.makedirs(self.CACHE_DIR)
36
+
37
+ # Index the file structure initially
38
+ self.file_structure=indexer()
39
+
40
+ # Start polling and file checking in separate threads
41
+ polling_thread = Thread(target=self.start_polling)
42
+ polling_thread.daemon = True
43
+ polling_thread.start()
44
+
45
+ def register_instance(self, instance_url):
46
+ if instance_url not in self.instances:
47
+ self.instances.append(instance_url)
48
+ logging.info(f"Registered instance {instance_url}")
49
+ else:
50
+ logging.info(f"Instance {instance_url} is already registered.")
51
+
52
+ def remove_instance(self, instance_url):
53
+ if instance_url in self.instances:
54
+ self.instances.remove(instance_url)
55
+ self.instances_health.pop(instance_url, None)
56
+ logging.info(f"Removed instance {instance_url}")
57
+ else:
58
+ logging.info(f"Instance {instance_url} not found for removal.")
59
+
60
+ def get_reports(self):
61
+ reports = self.instances_api.fetch_reports()
62
+
63
+ # Initialize temporary JSON data holders
64
+ temp_film_store = {}
65
+ temp_tv_store = {}
66
+
67
+ for instance_url in self.instances[:]: # Copy list to avoid modification during iteration
68
+ if instance_url in reports:
69
+ report = reports[instance_url]
70
+ logging.info(f"Report from {instance_url}: {report}")
71
+ self.process_report(instance_url, report, temp_film_store, temp_tv_store)
72
+ else:
73
+ logging.error(f"Failed to get report from {instance_url}. Removing instance.")
74
+ self.remove_instance(instance_url)
75
+
76
+ self.FILM_STORE = temp_film_store
77
+ self.TV_STORE = temp_tv_store
78
+
79
+ def process_report(self, instance_url, report, temp_film_store, temp_tv_store):
80
+ film_store = report.get('film_store', {})
81
+ tv_store = report.get('tv_store', {})
82
+ cache_size = report.get('cache_size')
83
+
84
+ logging.info(f"Processing report from {instance_url}")
85
+
86
+ # Update temporary film store
87
+ for title, path in film_store.items():
88
+ url = f"{instance_url}/api/film/{title.replace(' ', '%20')}"
89
+ temp_film_store[title] = url
90
+
91
+ # Update temporary TV store
92
+ for title, seasons in tv_store.items():
93
+ if title not in temp_tv_store:
94
+ temp_tv_store[title] = {}
95
+ for season, episodes in seasons.items():
96
+ if season not in temp_tv_store[title]:
97
+ temp_tv_store[title][season] = {}
98
+ for episode, path in episodes.items():
99
+ url = f"{instance_url}/api/tv/{title.replace(' ', '%20')}/{season.replace(' ', '%20')}/{episode.replace(' ', '%20')}"
100
+ temp_tv_store[title][season][episode] = url
101
+
102
+ logging.info("Film and TV Stores processed successfully.")
103
+ self.update_instances_health(instance=instance_url, cache_size=cache_size)
104
+
105
+ def start_polling(self):
106
+ logging.info("Starting polling.")
107
+ while not self.stop_event.is_set():
108
+ self.get_reports()
109
+ time.sleep(self.polling_interval)
110
+ logging.info("Polling stopped.")
111
+
112
+ def stop_polling(self):
113
+ logging.info("Stopping polling.")
114
+ self.stop_event.set()
115
+
116
+ def start_prefetching(self):
117
+ """Start the metadata prefetching in a separate thread."""
118
+ self.prefetch_metadata()
119
+
120
+ #################################################################
121
+
122
+ def update_instances_health(self, instance, cache_size):
123
+ self.instances_health[instance] = {"used":cache_size["cache_size"],
124
+ "total": "50 GB"}
125
+ logging.info(f"Updated instance {instance} with cache size {cache_size}")
126
+
127
+
128
+ def download_film_to_best_instance(self, title):
129
+ """
130
+ Downloads a film to the first instance that has more free space on the self.instance_health list variable.
131
+ The instance_health looks like this:
132
+ {
133
+ "https://unicone-studio-instance1.hf.space": {
134
+ "total": "50 GB",
135
+ "used": "3.33 GB"
136
+ }
137
+ }
138
+ Args:
139
+ title (str): The title of the film.
140
+ """
141
+ best_instance = None
142
+ max_free_space = -1
143
+
144
+ # Calculate free space for each instance
145
+ for instance_url, space_info in self.instances_health.items():
146
+ total_space = convert_to_gb(space_info['total'])
147
+ used_space = convert_to_gb(space_info['used'])
148
+ free_space = total_space - used_space
149
+
150
+ if free_space > max_free_space:
151
+ max_free_space = free_space
152
+ best_instance = instance_url
153
+
154
+ if best_instance:
155
+ result = self.instances_api.download_film(best_instance, title)
156
+ film_id = result["film_id"]
157
+ status = result["status"]
158
+ progress_url = f'{best_instance}/api/progress/{film_id}'
159
+ response = {
160
+ "film_id":film_id,
161
+ "status":status,
162
+ "progress_url":progress_url
163
+ }
164
+
165
+ return response
166
+ else:
167
+ logging.error("No suitable instance found for downloading the film.")
168
+ return {"error": "No suitable instance found for downloading the film."}
169
+
170
+ def download_episode_to_best_instance(self, title, season, episode):
171
+ """
172
+ Downloads a episode to the first instance that has more free space on the self.instance_health list variable.
173
+ The instance_health looks like this:
174
+ {
175
+ "https://unicone-studio-instance1.hf.space": {
176
+ "total": "50 GB",
177
+ "used": "3.33 GB"
178
+ }
179
+ }
180
+ Args:
181
+ title (str): The title of the Tv show.
182
+ season (str): The season of the Tv show.
183
+ episode (str): The title of the Tv show.
184
+ """
185
+ best_instance = None
186
+ max_free_space = -1
187
+
188
+ # Calculate free space for each instance
189
+ for instance_url, space_info in self.instances_health.items():
190
+ total_space = convert_to_gb(space_info['total'])
191
+ used_space = convert_to_gb(space_info['used'])
192
+ free_space = total_space - used_space
193
+
194
+ if free_space > max_free_space:
195
+ max_free_space = free_space
196
+ best_instance = instance_url
197
+
198
+ if best_instance:
199
+ result = self.instances_api.download_episode(best_instance, title, season, episode)
200
+ episode_id = result["episode_id"]
201
+ status = result["status"]
202
+ progress_url = f'{best_instance}/api/progress/{episode_id}'
203
+ response = {
204
+ "episode_id":episode_id,
205
+ "status":status,
206
+ "progress_url":progress_url
207
+ }
208
+
209
+ return response
210
+ else:
211
+ logging.error("No suitable instance found for downloading the film.")
212
+ return {"error": "No suitable instance found for downloading the film."}
213
+
214
+ #################################################################
215
+ def find_movie_path(self, title):
216
+ """Find the path of the movie in the JSON data based on the title."""
217
+ for directory in self.file_structure:
218
+ if directory['type'] == 'directory' and directory['path'] == 'films':
219
+ for sub_directory in directory['contents']:
220
+ if sub_directory['type'] == 'directory':
221
+ for item in sub_directory['contents']:
222
+ if item['type'] == 'file' and title.lower() in item['path'].lower():
223
+ return item['path']
224
+ return None
225
+
226
+ def find_tv_path(self, title):
227
+ """Find the path of the TV show in the JSON data based on the title."""
228
+ for directory in self.file_structure:
229
+ if directory['type'] == 'directory' and directory['path'] == 'tv':
230
+ for sub_directory in directory['contents']:
231
+ if sub_directory['type'] == 'directory' and title.lower() in sub_directory['path'].lower():
232
+ return sub_directory['path']
233
+ return None
234
+
235
+ def get_tv_structure(self, title):
236
+ """Find the path of the TV show in the JSON data based on the title."""
237
+ for directory in self.file_structure:
238
+ if directory['type'] == 'directory' and directory['path'] == 'tv':
239
+ for sub_directory in directory['contents']:
240
+ if sub_directory['type'] == 'directory' and title.lower() in sub_directory['path'].lower():
241
+ return sub_directory
242
+ return None
243
+
244
+ def get_film_id(self, title):
245
+ """Generate a film ID based on the title."""
246
+ return title.replace(" ", "_").lower()
247
+
248
+ def prefetch_metadata(self):
249
+ """Prefetch metadata for all items in the file structure."""
250
+ for item in self.file_structure:
251
+ if 'contents' in item:
252
+ for sub_item in item['contents']:
253
+ original_title = sub_item['path'].split('/')[-1]
254
+ media_type = 'series' if item['path'].startswith('tv') else 'movie'
255
+ title = original_title
256
+ year = None
257
+
258
+ # Extract year from the title if available
259
+ match = re.search(r'\((\d{4})\)', original_title)
260
+ if match:
261
+ year_str = match.group(1)
262
+ if year_str.isdigit() and len(year_str) == 4:
263
+ title = original_title[:match.start()].strip()
264
+ year = int(year_str)
265
+ else:
266
+ parts = original_title.rsplit(' ', 1)
267
+ if len(parts) > 1 and parts[-1].isdigit() and len(parts[-1]) == 4:
268
+ title = parts[0].strip()
269
+ year = int(parts[-1])
270
+
271
+ fetch_and_cache_json(original_title, title, media_type, year)
272
+
273
+ def get_all_tv_shows(self):
274
+ """Get all TV shows from the indexed cache structure JSON file."""
275
+ tv_shows = {}
276
+ for directory in self.file_structure:
277
+ if directory['type'] == 'directory' and directory['path'] == 'tv':
278
+ for sub_directory in directory['contents']:
279
+ if sub_directory['type'] == 'directory':
280
+ show_title = sub_directory['path'].split('/')[-1]
281
+ tv_shows[show_title] = []
282
+ for season_directory in sub_directory['contents']:
283
+ if season_directory['type'] == 'directory':
284
+ season = season_directory['path'].split('/')[-1]
285
+ for episode in season_directory['contents']:
286
+ if episode['type'] == 'file':
287
+ tv_shows[show_title].append({
288
+ "season": season,
289
+ "episode": episode['path'].split('/')[-1],
290
+ "path": episode['path']
291
+ })
292
+ return tv_shows
293
+
294
+ def get_all_films(self):
295
+ """Get all films from the indexed cache structure JSON file."""
296
+ films = []
297
+ for directory in self.file_structure:
298
+ if directory['type'] == 'directory' and directory['path'] == 'films':
299
+ for sub_directory in directory['contents']:
300
+ if sub_directory['type'] == 'directory':
301
+ films.append(sub_directory['path'])
302
+ return films
api.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import logging
3
+
4
+ class InstancesAPI:
5
+ def __init__(self, instances):
6
+ self.instances = instances
7
+
8
+ def fetch_reports(self):
9
+ reports = {}
10
+ for instance_url in self.instances:
11
+ try:
12
+ response = requests.get(f"{instance_url}/api/get/report")
13
+ response.raise_for_status()
14
+ reports[instance_url] = response.json()
15
+ except requests.exceptions.RequestException as e:
16
+ logging.error(f"Error contacting instance {instance_url}: {e}")
17
+ return reports
18
+
19
+ def download_film(self, instance_url, title):
20
+ """
21
+ Download a film to an instance.
22
+
23
+ If the download started, it returns a JSON like this:
24
+ example:
25
+ {"film_id": "my_spy_2020",
26
+ "status": "Download started"}
27
+
28
+ If the film has already been downloaded, it will return the video file.
29
+ """
30
+ data = {}
31
+ try:
32
+ response = requests.get(f"{instance_url}/api/film/{title}")
33
+ response.raise_for_status()
34
+ data = response.json()
35
+
36
+ except requests.exceptions.RequestException as e:
37
+ logging.error(f"Error contacting instance {instance_url}: {e}")
38
+ data = {"error": str(e)}
39
+
40
+ return data
41
+
42
+ def download_episode(self, instance_url, title, season, episode):
43
+ """
44
+ Download a film to an instance.
45
+
46
+ If the download started, it returns a JSON like this:
47
+ example:
48
+ {"film_id": "my_spy_2020",
49
+ "status": "Download started"}
50
+
51
+ If the film has already been downloaded, it will return the video file.
52
+ """
53
+ data = {}
54
+ try:
55
+ response = requests.get(f"{instance_url}/api/tv/{title}/{season}/{episode}")
56
+ response.raise_for_status()
57
+ data = response.json()
58
+
59
+ except requests.exceptions.RequestException as e:
60
+ logging.error(f"Error contacting instance {instance_url}: {e}")
61
+ data = {"error": str(e)}
62
+
63
+ return data
app.py CHANGED
@@ -1,7 +1,24 @@
1
  from fastapi import FastAPI
 
 
 
 
 
 
 
 
2
 
3
  app = FastAPI()
4
 
5
  @app.get("/")
6
  def greet_json():
7
- return {"Hello": "World!"}
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from LoadBalancer import LoadBalancer
3
+ import os
4
+
5
+ CACHE_DIR = os.getenv("CACHE_DIR")
6
+ TOKEN = os.getenv("TOKEN")
7
+ REPO = os.getenv("REPO")
8
+
9
+ load_balancer = LoadBalancer(cache_dir=CACHE_DIR, token=TOKEN, repo=REPO)
10
 
11
  app = FastAPI()
12
 
13
  @app.get("/")
14
  def greet_json():
15
+ return {"Version": "0.0.1 Alpha"}
16
+
17
+ @app.post("/api/register")
18
+ def register_instance():
19
+ #register the instance to Instance Register
20
+ pass
21
+
22
+ @app.get("/api/get/file_structure")
23
+ def get_file_structure():
24
+ return load_balancer.file_structure
hf_scrapper.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ from requests.exceptions import RequestException
5
+
6
+ def get_file_structure(repo, token, path=""):
7
+ """
8
+ Fetches the file structure of a specified Hugging Face repository.
9
+
10
+ Args:
11
+ repo (str): The name of the repository.
12
+ token (str): The authorization token for the request.
13
+ path (str, optional): The specific path in the repository. Defaults to "".
14
+
15
+ Returns:
16
+ list: A list of file structure information.
17
+ """
18
+ api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
19
+ headers = {'Authorization': f'Bearer {token}'}
20
+ print(f"Fetching file structure from URL: {api_url}")
21
+ try:
22
+ response = requests.get(api_url, headers=headers)
23
+ response.raise_for_status()
24
+ return response.json()
25
+ except RequestException as e:
26
+ print(f"Error fetching file structure: {e}")
27
+ return []
28
+
29
+ def write_file_structure_to_json(file_structure, file_path):
30
+ """
31
+ Writes the file structure to a JSON file.
32
+
33
+ Args:
34
+ file_structure (list): The file structure data.
35
+ file_path (str): The path where the JSON file will be saved.
36
+ """
37
+ try:
38
+ with open(file_path, 'w') as json_file:
39
+ json.dump(file_structure, json_file, indent=2)
40
+ print(f'File structure written to {file_path}')
41
+ except IOError as e:
42
+ print(f"Error writing file structure to JSON: {e}")
indexer.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from hf_scrapper import get_file_structure, write_file_structure_to_json
2
+ from dotenv import load_dotenv
3
+ import os
4
+
5
+ load_dotenv()
6
+
7
+ def index_repository(token, repo, current_path=""):
8
+ file_structure = get_file_structure(repo, token, current_path)
9
+ full_structure = []
10
+ for item in file_structure:
11
+ if item['type'] == 'directory':
12
+ sub_directory_structure = index_repository(token, repo, item['path'])
13
+ full_structure.append({
14
+ "type": "directory",
15
+ "path": item['path'],
16
+ "contents": sub_directory_structure
17
+ })
18
+ else:
19
+ # Exclude 'oid' and 'lfs' from the file item
20
+ file_item = {
21
+ "type": item['type'],
22
+ "size": item['size'],
23
+ "path": item['path']
24
+ }
25
+ full_structure.append(file_item)
26
+ return full_structure
27
+
28
+ def indexer():
29
+ token = os.getenv("TOKEN")
30
+ repo = os.getenv("REPO")
31
+ full_structure = index_repository(token, repo, "")
32
+ print(f"Full file structure for repository '{repo}' has been indexed.")
33
+ return full_structure
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  fastapi
2
  uvicorn[standard]
 
 
1
  fastapi
2
  uvicorn[standard]
3
+ python-dotenv
tvdb.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tvdb.py
2
+ import os
3
+ import requests
4
+ import urllib.parse
5
+ from datetime import datetime, timedelta
6
+ from dotenv import load_dotenv
7
+ import json
8
+
9
+ load_dotenv()
10
+ THETVDB_API_KEY = os.getenv("THETVDB_API_KEY")
11
+ THETVDB_API_URL = os.getenv("THETVDB_API_URL")
12
+ CACHE_DIR = os.getenv("CACHE_DIR")
13
+ TOKEN_EXPIRY = None
14
+ THETVDB_TOKEN = None
15
+
16
+ def authenticate_thetvdb():
17
+ global THETVDB_TOKEN, TOKEN_EXPIRY
18
+ auth_url = f"{THETVDB_API_URL}/login"
19
+ auth_data = {
20
+ "apikey": THETVDB_API_KEY
21
+ }
22
+ try:
23
+ response = requests.post(auth_url, json=auth_data)
24
+ response.raise_for_status()
25
+ response_data = response.json()
26
+ THETVDB_TOKEN = response_data['data']['token']
27
+ TOKEN_EXPIRY = datetime.now() + timedelta(days=30)
28
+ except requests.RequestException as e:
29
+ print(f"Authentication failed: {e}")
30
+ THETVDB_TOKEN = None
31
+ TOKEN_EXPIRY = None
32
+
33
+ def get_thetvdb_token():
34
+ global THETVDB_TOKEN, TOKEN_EXPIRY
35
+ if not THETVDB_TOKEN or datetime.now() >= TOKEN_EXPIRY:
36
+ authenticate_thetvdb()
37
+ return THETVDB_TOKEN
38
+
39
+ def fetch_and_cache_json(original_title, title, media_type, year=None):
40
+ if year:
41
+ search_url = f"{THETVDB_API_URL}/search?query={urllib.parse.quote(title)}&type={media_type}&year={year}"
42
+ else:
43
+ search_url = f"{THETVDB_API_URL}/search?query={urllib.parse.quote(title)}&type={media_type}"
44
+
45
+ token = get_thetvdb_token()
46
+ if not token:
47
+ print("Authentication failed")
48
+ return
49
+
50
+ headers = {
51
+ "Authorization": f"Bearer {token}",
52
+ "accept": "application/json",
53
+ }
54
+
55
+ try:
56
+ # Fetch initial search results
57
+ response = requests.get(search_url, headers=headers)
58
+ response.raise_for_status()
59
+ data = response.json()
60
+
61
+ if 'data' in data and data['data']:
62
+ # Extract the TVDB ID and type from the first result
63
+ first_result = data['data'][0]
64
+ tvdb_id = first_result.get('tvdb_id')
65
+ media_type = first_result.get('type')
66
+
67
+ if not tvdb_id:
68
+ print("TVDB ID not found in the search results")
69
+ return
70
+
71
+ # Determine the correct extended URL based on media type
72
+ if media_type == 'movie':
73
+ extended_url = f"{THETVDB_API_URL}/movies/{tvdb_id}/extended?meta=translations"
74
+ elif media_type == 'series':
75
+ extended_url = f"{THETVDB_API_URL}/series/{tvdb_id}/extended?meta=translations"
76
+ else:
77
+ print(f"Unsupported media type: {media_type}")
78
+ return
79
+
80
+ # Request the extended information using the TVDB ID
81
+ response = requests.get(extended_url, headers=headers)
82
+ response.raise_for_status()
83
+ extended_data = response.json()
84
+
85
+ # Cache the extended JSON response
86
+ json_cache_path = os.path.join(CACHE_DIR, f"{urllib.parse.quote(original_title)}.json")
87
+ with open(json_cache_path, 'w') as f:
88
+ json.dump(extended_data, f)
89
+
90
+ except requests.RequestException as e:
91
+ print(f"Error fetching data: {e}")
utils.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def is_valid_url(url):
4
+ """
5
+ Validates the URL.
6
+
7
+ Args:
8
+ url (str): The URL to validate.
9
+
10
+ Returns:
11
+ bool: True if the URL is valid, False otherwise.
12
+ """
13
+ regex = re.compile(
14
+ r'^(?:http|ftp)s?://' # http:// or https://
15
+ r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
16
+ r'localhost|' # localhost...
17
+ r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv4
18
+ r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # ...or ipv6
19
+ r'(?::\d+)?' # optional port
20
+ r'(?:/?|[/?]\S+)$', re.IGNORECASE)
21
+ return re.match(regex, url) is not None
22
+
23
+ def convert_to_gb(space_str):
24
+ """
25
+ Converts a space string like '50 GB' or '3.33 GB' to a float representing the number of GB.
26
+
27
+ Args:
28
+ space_str (str): The space string to convert.
29
+
30
+ Returns:
31
+ float: The space in GB.
32
+ """
33
+ return float(space_str.split()[0])
34
+
35
+ def bytes_to_human_readable(num, suffix="B"):
36
+ """
37
+ Converts bytes to a human-readable format.
38
+
39
+ Args:
40
+ num (int): The number of bytes.
41
+ suffix (str): The suffix to use (default is 'B').
42
+
43
+ Returns:
44
+ str: The human-readable string.
45
+ """
46
+ for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
47
+ if abs(num) < 1024.0:
48
+ return f"{num:3.1f} {unit}{suffix}"
49
+ num /= 1024.0
50
+ return f"{num:.1f} Y{suffix}"
51
+
52
+ def encode_episodeid(title, season, episode):
53
+ """
54
+ Encodes the episode ID based on title, season, and episode.
55
+
56
+ Args:
57
+ title (str): The title of the TV show.
58
+ season (str): The season of the TV show.
59
+ episode (str): The episode number.
60
+
61
+ Returns:
62
+ str: The encoded episode ID.
63
+ """
64
+ return f"{title}_{season}_{episode}"