Spaces:
Paused
Paused
Commit
·
aeed8f1
1
Parent(s):
190ac99
update
Browse files- app.py +30 -34
- hf_scrapper.py +39 -8
- requirements.txt +2 -1
- templates/film_details_page.html +18 -13
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from flask import Flask, jsonify, render_template, request, Response, abort
|
2 |
import os
|
3 |
import urllib.parse
|
4 |
from hf_scrapper import download_and_cache_file, get_system_proxies
|
@@ -8,6 +8,7 @@ from dotenv import load_dotenv
|
|
8 |
import json
|
9 |
import re
|
10 |
from threading import Thread
|
|
|
11 |
|
12 |
load_dotenv()
|
13 |
INDEX_FILE = os.getenv("INDEX_FILE")
|
@@ -58,6 +59,7 @@ thread.daemon = True
|
|
58 |
thread.start()
|
59 |
|
60 |
app = Flask(__name__)
|
|
|
61 |
|
62 |
def get_film_file_path(title):
|
63 |
decoded_title = urllib.parse.unquote(title)
|
@@ -219,41 +221,35 @@ def film_player(title):
|
|
219 |
|
220 |
@app.route('/cached_films/<path:title>')
|
221 |
def serve_cached_film(title):
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
if not os.path.isdir(folder_path):
|
227 |
-
return jsonify({'error': 'Film folder not found'}), 404
|
228 |
-
|
229 |
-
# Get the list of files in the folder
|
230 |
-
files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
|
231 |
-
|
232 |
-
# If no files found, return error
|
233 |
-
if not files:
|
234 |
-
return jsonify({'error': 'No film files found in the folder'}), 404
|
235 |
|
236 |
-
|
237 |
-
file_path = os.path.join(folder_path, files[0])
|
238 |
-
print("fills :"+files)
|
239 |
-
print("file path : "+ file_path)
|
240 |
-
# Determine the MIME type based on the file extension
|
241 |
-
mime_type = 'video/mp4'
|
242 |
-
if file_path.lower().endswith('.mkv'):
|
243 |
-
mime_type = 'video/x-matroska'
|
244 |
-
elif file_path.lower().endswith('.avi'):
|
245 |
-
mime_type = 'video/x-msvideo'
|
246 |
-
elif file_path.lower().endswith('.webm'):
|
247 |
-
mime_type = 'video/webm'
|
248 |
-
elif file_path.lower().endswith('.ts'):
|
249 |
-
mime_type = 'video/mp2t'
|
250 |
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
if __name__ == '__main__':
|
259 |
-
|
|
|
1 |
+
from flask import Flask, jsonify, render_template, request, Response, abort, send_from_directory
|
2 |
import os
|
3 |
import urllib.parse
|
4 |
from hf_scrapper import download_and_cache_file, get_system_proxies
|
|
|
8 |
import json
|
9 |
import re
|
10 |
from threading import Thread
|
11 |
+
from flask_socketio import SocketIO, emit
|
12 |
|
13 |
load_dotenv()
|
14 |
INDEX_FILE = os.getenv("INDEX_FILE")
|
|
|
59 |
thread.start()
|
60 |
|
61 |
app = Flask(__name__)
|
62 |
+
socketio = SocketIO(app)
|
63 |
|
64 |
def get_film_file_path(title):
|
65 |
decoded_title = urllib.parse.unquote(title)
|
|
|
221 |
|
222 |
@app.route('/cached_films/<path:title>')
|
223 |
def serve_cached_film(title):
|
224 |
+
title = urllib.parse.unquote(title)
|
225 |
+
cached_file_path = get_film_file_path(title)
|
226 |
+
if not cached_file_path:
|
227 |
+
return jsonify({'error': 'Film not found'}), 404
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
+
return send_from_directory(os.path.dirname(cached_file_path), os.path.basename(cached_file_path))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
|
231 |
+
@socketio.on('download')
|
232 |
+
def handle_download(data):
|
233 |
+
title = data.get('title')
|
234 |
+
film_file_path = get_film_file_path(title)
|
235 |
+
if not film_file_path:
|
236 |
+
emit('download_progress', {'error': 'Film not found'})
|
237 |
+
return
|
238 |
+
|
239 |
+
cached_file_path = os.path.join(CACHE_DIR, film_file_path)
|
240 |
+
if os.path.exists(cached_file_path):
|
241 |
+
emit('download_progress', {'progress': 100, 'status': 'already cached'})
|
242 |
+
return
|
243 |
|
244 |
+
file_url = f"https://huggingface.co/{REPO}/resolve/main/{film_file_path}"
|
245 |
+
def download_callback(progress):
|
246 |
+
socketio.emit('download_progress', {'progress': progress})
|
247 |
+
|
248 |
+
success = download_and_cache_file(file_url, TOKEN, cached_file_path, proxies=get_system_proxies(), callback=download_callback)
|
249 |
+
if not success:
|
250 |
+
emit('download_progress', {'error': 'Failed to download film'})
|
251 |
+
else:
|
252 |
+
emit('download_progress', {'progress': 100, 'status': 'completed'})
|
253 |
|
254 |
if __name__ == '__main__':
|
255 |
+
socketio.run(app, host='0.0.0.0', port=5000)
|
hf_scrapper.py
CHANGED
@@ -4,6 +4,11 @@ import json
|
|
4 |
import urllib.request
|
5 |
from requests.exceptions import RequestException
|
6 |
from tqdm import tqdm
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
def get_system_proxies():
|
9 |
try:
|
@@ -17,23 +22,35 @@ def get_system_proxies():
|
|
17 |
print(f"Error getting system proxies: {e}")
|
18 |
return {}
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def download_and_cache_file(file_url, token, cache_path, proxies=None):
|
21 |
print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
|
|
|
22 |
try:
|
23 |
-
response = requests.
|
24 |
response.raise_for_status()
|
25 |
|
26 |
-
# Get the total file size from the headers
|
27 |
total_size = int(response.headers.get('content-length', 0))
|
28 |
-
|
29 |
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
34 |
f.write(chunk)
|
|
|
35 |
|
36 |
print(f'File cached to {cache_path} successfully.')
|
|
|
37 |
return True
|
38 |
except RequestException as e:
|
39 |
print(f"Error downloading file: {e}")
|
@@ -41,6 +58,20 @@ def download_and_cache_file(file_url, token, cache_path, proxies=None):
|
|
41 |
print(f"Error writing file {cache_path}: {e}")
|
42 |
return False
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
def get_file_structure(repo, token, path="", proxies=None):
|
45 |
api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
|
46 |
headers = {'Authorization': f'Bearer {token}'}
|
@@ -64,6 +95,6 @@ def write_file_structure_to_json(file_structure, file_path):
|
|
64 |
if __name__ == "__main__":
|
65 |
file_url = "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
|
66 |
token = os.getenv("TOKEN")
|
67 |
-
cache_path = "
|
68 |
proxy = get_system_proxies()
|
69 |
download_and_cache_file(file_url, token, cache_path, proxies=proxy)
|
|
|
4 |
import urllib.request
|
5 |
from requests.exceptions import RequestException
|
6 |
from tqdm import tqdm
|
7 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
8 |
+
|
9 |
+
CACHE_DIR = os.getenv("CACHE_DIR")
|
10 |
+
CACHE_JSON_PATH = os.path.join(CACHE_DIR, "cached_films.json")
|
11 |
+
MAX_WORKERS = 4 # Adjust the number of threads for concurrent downloads
|
12 |
|
13 |
def get_system_proxies():
|
14 |
try:
|
|
|
22 |
print(f"Error getting system proxies: {e}")
|
23 |
return {}
|
24 |
|
25 |
+
def download_file_chunk(url, headers, proxies, start, end):
|
26 |
+
headers['Range'] = f"bytes={start}-{end}"
|
27 |
+
response = requests.get(url, headers=headers, proxies=proxies, stream=True)
|
28 |
+
response.raise_for_status()
|
29 |
+
return response.content
|
30 |
+
|
31 |
def download_and_cache_file(file_url, token, cache_path, proxies=None):
|
32 |
print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
|
33 |
+
headers = {'Authorization': f'Bearer {token}'}
|
34 |
try:
|
35 |
+
response = requests.head(file_url, headers=headers, proxies=proxies)
|
36 |
response.raise_for_status()
|
37 |
|
|
|
38 |
total_size = int(response.headers.get('content-length', 0))
|
|
|
39 |
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
40 |
+
|
41 |
+
chunk_size = total_size // MAX_WORKERS
|
42 |
+
ranges = [(i, min(i + chunk_size, total_size) - 1) for i in range(0, total_size, chunk_size)]
|
43 |
+
|
44 |
+
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
45 |
+
futures = [executor.submit(download_file_chunk, file_url, headers, proxies, start, end) for start, end in ranges]
|
46 |
+
with open(cache_path, 'wb') as f, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
|
47 |
+
for future in as_completed(futures):
|
48 |
+
chunk = future.result()
|
49 |
f.write(chunk)
|
50 |
+
pbar.update(len(chunk))
|
51 |
|
52 |
print(f'File cached to {cache_path} successfully.')
|
53 |
+
update_cache_json(file_url, cache_path)
|
54 |
return True
|
55 |
except RequestException as e:
|
56 |
print(f"Error downloading file: {e}")
|
|
|
58 |
print(f"Error writing file {cache_path}: {e}")
|
59 |
return False
|
60 |
|
61 |
+
def update_cache_json(file_url, cache_path):
|
62 |
+
cache_data = {}
|
63 |
+
if os.path.exists(CACHE_JSON_PATH):
|
64 |
+
with open(CACHE_JSON_PATH, 'r') as json_file:
|
65 |
+
cache_data = json.load(json_file)
|
66 |
+
|
67 |
+
film_title = os.path.basename(cache_path)
|
68 |
+
cache_data[film_title] = cache_path
|
69 |
+
|
70 |
+
with open(CACHE_JSON_PATH, 'w') as json_file:
|
71 |
+
json.dump(cache_data, json_file, indent=2)
|
72 |
+
|
73 |
+
print(f'Updated cache JSON: {CACHE_JSON_PATH} with {film_title}: {cache_path}')
|
74 |
+
|
75 |
def get_file_structure(repo, token, path="", proxies=None):
|
76 |
api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
|
77 |
headers = {'Authorization': f'Bearer {token}'}
|
|
|
95 |
if __name__ == "__main__":
|
96 |
file_url = "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
|
97 |
token = os.getenv("TOKEN")
|
98 |
+
cache_path = os.path.join(CACHE_DIR, "films/Funky Monkey 2004/Funky Monkey (2004) Web-dl 1080p.mp4")
|
99 |
proxy = get_system_proxies()
|
100 |
download_and_cache_file(file_url, token, cache_path, proxies=proxy)
|
requirements.txt
CHANGED
@@ -4,4 +4,5 @@ requests
|
|
4 |
python-dotenv
|
5 |
ffmpy
|
6 |
ffmpeg-python
|
7 |
-
tqdm
|
|
|
|
4 |
python-dotenv
|
5 |
ffmpy
|
6 |
ffmpeg-python
|
7 |
+
tqdm
|
8 |
+
Flask-SocketIO
|
templates/film_details_page.html
CHANGED
@@ -4,6 +4,7 @@
|
|
4 |
<meta charset="UTF-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
<title>Film Details</title>
|
|
|
7 |
<style>
|
8 |
body {
|
9 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
@@ -123,6 +124,7 @@
|
|
123 |
<div class="genres" id="film-genres"></div>
|
124 |
<div class="metadata-container" id="film-metadata"></div>
|
125 |
<a href="#" class="play-button" id="play-button" onclick="playFilm()">Play</a>
|
|
|
126 |
</div>
|
127 |
</div>
|
128 |
<script>
|
@@ -170,24 +172,27 @@
|
|
170 |
metadataContainer.appendChild(createMetadataElement('Director', metadata.director));
|
171 |
metadataContainer.appendChild(createMetadataElement('Country', metadata.country));
|
172 |
metadataContainer.appendChild(createMetadataElement('Release Date', metadata.first_air_time));
|
173 |
-
|
174 |
-
const playButton = document.getElementById('play-button');
|
175 |
-
playButton.href = `#`; // Will be handled by JavaScript
|
176 |
}
|
177 |
|
178 |
-
|
179 |
const title = "{{ title }}";
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
185 |
} else {
|
186 |
-
|
|
|
|
|
|
|
187 |
}
|
188 |
-
}
|
189 |
-
alert('Error: ' + error.message);
|
190 |
-
}
|
191 |
}
|
192 |
|
193 |
const urlParams = new URLSearchParams(window.location.search);
|
|
|
4 |
<meta charset="UTF-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
<title>Film Details</title>
|
7 |
+
<script src="https://cdn.jsdelivr.net/npm/socket.io@4.0.0/dist/socket.io.min.js"></script>
|
8 |
<style>
|
9 |
body {
|
10 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
|
|
124 |
<div class="genres" id="film-genres"></div>
|
125 |
<div class="metadata-container" id="film-metadata"></div>
|
126 |
<a href="#" class="play-button" id="play-button" onclick="playFilm()">Play</a>
|
127 |
+
<p id="progress" style="margin-top: 20px; text-align: center;"></p>
|
128 |
</div>
|
129 |
</div>
|
130 |
<script>
|
|
|
172 |
metadataContainer.appendChild(createMetadataElement('Director', metadata.director));
|
173 |
metadataContainer.appendChild(createMetadataElement('Country', metadata.country));
|
174 |
metadataContainer.appendChild(createMetadataElement('Release Date', metadata.first_air_time));
|
|
|
|
|
|
|
175 |
}
|
176 |
|
177 |
+
function playFilm() {
|
178 |
const title = "{{ title }}";
|
179 |
+
const socket = io();
|
180 |
+
const progressElement = document.getElementById('progress');
|
181 |
+
|
182 |
+
socket.emit('download', { title: title });
|
183 |
+
|
184 |
+
socket.on('download_progress', function(data) {
|
185 |
+
if (data.error) {
|
186 |
+
progressElement.textContent = 'Error: ' + data.error;
|
187 |
+
} else if (data.status) {
|
188 |
+
progressElement.textContent = 'Status: ' + data.status;
|
189 |
} else {
|
190 |
+
progressElement.textContent = 'Progress: ' + data.progress + '%';
|
191 |
+
if (data.progress === 100) {
|
192 |
+
window.location.href = `/cached_films/${encodeURIComponent(title)}`;
|
193 |
+
}
|
194 |
}
|
195 |
+
});
|
|
|
|
|
196 |
}
|
197 |
|
198 |
const urlParams = new URLSearchParams(window.location.search);
|