TRA / cursor_tracker.py
favoredone's picture
Upload 29 files
016ee5a verified
import os
import json
import requests
import subprocess
import shutil
import time
import re
import threading
import multiprocessing
from typing import Dict, List, Set, Optional
from electron_processing import ElectronSpeedVideoProcessor
from huggingface_hub import HfApi, list_repo_files
import cv2
import numpy as np
from pathlib import Path
import smtplib
from email.message import EmailMessage
# ==== CONFIGURATION ====
HF_TOKEN = os.getenv("HF_TOKEN", "")
SOURCE_REPO_ID = os.getenv("SOURCE_REPO", "Fred808/BG1")
# Path Configuration
DOWNLOAD_FOLDER = "downloads"
EXTRACT_FOLDER = "extracted"
FRAMES_OUTPUT_FOLDER = "extracted_frames" # New folder for extracted frames
CURSOR_TRACKING_OUTPUT_FOLDER = "cursor_tracking_results" # New folder for cursor tracking results
CURSOR_TEMPLATES_DIR = "cursors"
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
os.makedirs(EXTRACT_FOLDER, exist_ok=True)
os.makedirs(FRAMES_OUTPUT_FOLDER, exist_ok=True)
os.makedirs(CURSOR_TRACKING_OUTPUT_FOLDER, exist_ok=True)
os.makedirs(CURSOR_TEMPLATES_DIR, exist_ok=True) # Ensure cursor templates directory exists
# State Files
DOWNLOAD_STATE_FILE = "download_progress.json"
PROCESS_STATE_FILE = "process_progress.json"
FAILED_FILES_LOG = "failed_files.log"
# Processing Parameters
CHUNK_SIZE = 1
PROCESSING_DELAY = 2
MAX_RETRIES = 3
MIN_FREE_SPACE_GB = 2 # Minimum free space in GB before processing
# Frame Extraction Parameters
DEFAULT_FPS = 3 # Default frames per second for extraction
# Cursor Tracking Parameters
CURSOR_THRESHOLD = 0.8
# Initialize HF API
hf_api = HfApi(token=HF_TOKEN)
# Global State
processing_status = {
"is_running": False,
"current_file": None,
"total_files": 0,
"processed_files": 0,
"failed_files": 0,
"extracted_courses": 0,
"extracted_videos": 0,
"extracted_frames_count": 0,
"tracked_cursors_count": 0,
"last_update": None,
"logs": []
}
def log_message(message: str):
"""Log messages with timestamp"""
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
log_entry = f"[{timestamp}] {message}"
print(log_entry)
processing_status["logs"].append(log_entry)
processing_status["last_update"] = timestamp
if len(processing_status["logs"]) > 100:
processing_status["logs"] = processing_status["logs"][-100:]
def log_failed_file(filename: str, error: str):
"""Log failed files to persistent file"""
with open(FAILED_FILES_LOG, "a") as f:
f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} - {filename}: {error}\n")
def get_disk_usage(path: str) -> Dict[str, float]:
"""Get disk usage statistics in GB for Windows systems"""
import shutil
total, used, free = shutil.disk_usage(path)
total_gb = total / (1024**3)
free_gb = free / (1024**3)
used_gb = used / (1024**3)
return {"total": total_gb, "free": free_gb, "used": used_gb}
def check_disk_space(path: str = ".") -> bool:
"""Check if there's enough disk space"""
disk_info = get_disk_usage(path)
if disk_info["free"] < MIN_FREE_SPACE_GB:
log_message(f'⚠️ Low disk space: {disk_info["free"]:.2f}GB free, {disk_info["used"]:.2f}GB used')
return False
return True
def cleanup_temp_files():
"""Clean up only partial/temporary download files"""
log_message("🧹 Cleaning up only partial download files...")
# Only clean .tmp files from failed downloads
for file in os.listdir(DOWNLOAD_FOLDER):
if file.endswith(".tmp"):
try:
os.remove(os.path.join(DOWNLOAD_FOLDER, file))
log_message(f"πŸ—‘οΈ Removed partial download: {file}")
except:
pass
def load_json_state(file_path: str, default_value):
"""Load state from JSON file"""
if os.path.exists(file_path):
try:
with open(file_path, "r") as f:
return json.load(f)
except json.JSONDecodeError:
log_message(f"⚠️ Corrupted state file: {file_path}")
return default_value
def save_json_state(file_path: str, data):
"""Save state to JSON file"""
with open(file_path, "w") as f:
json.dump(data, f, indent=2)
def download_with_retry(url: str, dest_path: str, max_retries: int = 3) -> bool:
"""Download file with retry logic, disk space checking, and robust error handling"""
if not check_disk_space():
log_message("❌ Insufficient disk space for download")
return False
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
# DNS resolution retry loop
for dns_attempt in range(3): # Try DNS resolution up to 3 times
for attempt in range(max_retries):
try:
# Test connection first
test_response = requests.head(url, headers=headers, timeout=10)
test_response.raise_for_status()
# If connection test successful, proceed with download
with requests.get(url, headers=headers, stream=True, timeout=30) as r:
r.raise_for_status()
# Check content length if available
content_length = r.headers.get("content-length")
if content_length:
size_gb = int(content_length) / (1024**3)
disk_info = get_disk_usage(".")
if size_gb > disk_info["free"] - 0.5: # Leave 0.5GB buffer
log_message(f'❌ File too large: {size_gb:.2f}GB, only {disk_info["free"]:.2f}GB free')
return False
temp_path = dest_path + ".tmp"
with open(temp_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk: # Filter out keep-alive chunks
f.write(chunk)
# Only rename if download completed successfully
os.replace(temp_path, dest_path)
return True
except requests.exceptions.ConnectionError as ce:
log_message(f"Connection error (attempt {attempt + 1}/{max_retries}): {str(ce)}")
if "getaddrinfo failed" in str(ce) or "NameResolutionError" in str(ce):
# DNS issue - break inner loop to try DNS again
break
time.sleep(5 * (attempt + 1))
continue
except requests.exceptions.Timeout as te:
log_message(f"Timeout error (attempt {attempt + 1}/{max_retries}): {str(te)}")
time.sleep(5 * (attempt + 1))
continue
except requests.exceptions.RequestException as e:
log_message(f"Download error (attempt {attempt + 1}/{max_retries}): {str(e)}")
if attempt < max_retries - 1:
time.sleep(5 * (attempt + 1))
continue
return False
except Exception as e:
log_message(f"Unexpected error (attempt {attempt + 1}/{max_retries}): {str(e)}")
if attempt < max_retries - 1:
time.sleep(5 * (attempt + 1))
continue
return False
if dns_attempt < 2: # If not last DNS attempt
log_message(f"DNS resolution failed, waiting 30 seconds before retry {dns_attempt + 2}/3...")
time.sleep(30) # Longer wait for DNS issues
log_message("❌ All download attempts failed")
return False
def is_multipart_rar(filename: str) -> bool:
"""Check if this is a multi-part RAR file"""
return ".part" in filename.lower() and filename.lower().endswith(".rar")
def get_rar_part_base(filename: str) -> str:
"""Get the base name for multi-part RAR files"""
if ".part" in filename.lower():
return filename.split(".part")[0]
return filename.replace(".rar", "")
def extract_with_retry(rar_path: str, output_dir: str, max_retries: int = 2) -> bool:
"""Extract RAR with retry and recovery, handling multi-part archives"""
filename = os.path.basename(rar_path)
# For multi-part RARs, we need the first part
if is_multipart_rar(filename):
base_name = get_rar_part_base(filename)
first_part = f"{base_name}.part01.rar"
first_part_path = os.path.join(os.path.dirname(rar_path), first_part)
if not os.path.exists(first_part_path):
log_message(f"⚠️ Multi-part RAR detected but first part not found: {first_part}")
return False
rar_path = first_part_path
log_message(f"πŸ“¦ Processing multi-part RAR starting with: {first_part}")
for attempt in range(max_retries):
try:
# Test RAR first
test_cmd = ["unrar", "t", rar_path]
test_result = subprocess.run(test_cmd, capture_output=True, text=True)
if test_result.returncode != 0:
log_message(f"⚠️ RAR test failed: {test_result.stderr}")
if attempt == max_retries - 1:
return False
continue
# Extract RAR
cmd = ["unrar", "x", "-o+", rar_path, output_dir]
if attempt > 0: # Try recovery on subsequent attempts
cmd.insert(2, "-kb")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
log_message(f"βœ… Successfully extracted: {os.path.basename(rar_path)}")
return True
else:
error_msg = result.stderr or result.stdout
log_message(f"⚠️ Extraction attempt {attempt + 1} failed: {error_msg}")
if "checksum error" in error_msg.lower() or "CRC failed" in error_msg:
log_message(f"⚠️ Data corruption detected, attempt {attempt + 1}")
elif result.returncode == 10:
log_message(f"⚠️ No files to extract (exit code 10)")
return False
elif result.returncode == 1:
log_message(f"⚠️ Non-fatal error (exit code 1)")
except Exception as e:
log_message(f"❌ Extraction exception: {str(e)}")
if attempt == max_retries - 1:
return False
time.sleep(1)
return False
# --- Frame Extraction Utilities ---
def ensure_dir(path):
os.makedirs(path, exist_ok=True)
class ElectronUnit:
"""Base unit for electron-speed processing"""
def __init__(self, unit_id: int):
self.unit_id = unit_id
# Electron physics parameters
self.electron_drift_velocity = 1.96e7 # m/s in silicon
self.switching_frequency = 8.92e85 # Hz
self.path_length = 14e-9 # meters (14nm process node)
self.traverse_time = 8.92e15
self.ops_per_second = 9.98e15
self.ops_per_cycle = int(self.ops_per_second / 1000)
self.last_cycle_time = time.time()
def get_operations_this_cycle(self) -> int:
"""Calculate operations possible in current cycle based on electron physics"""
current_time = time.time()
time_delta = current_time - self.last_cycle_time
electron_transits = 78.92e555
operations = int(min(electron_transits, self.switching_frequency * time_delta))
self.last_cycle_time = current_time
return operations
def extract_frames(video_path, output_dir, fps=DEFAULT_FPS):
"""Extract frames from video at electron-speed processing."""
log_message(f"[INFO] Extracting frames from {video_path} to {output_dir} at {fps} fps...")
ensure_dir(output_dir)
# Create electron processing unit for frame extraction
electron_unit = ElectronUnit(0)
cap = cv2.VideoCapture(str(video_path))
if not cap.isOpened():
log_message(f"[ERROR] Failed to open video file: {video_path}")
return 0
video_fps = cap.get(cv2.CAP_PROP_FPS)
if not video_fps or video_fps <= 0:
video_fps = 30
log_message(f"[WARN] Using fallback FPS: {video_fps}")
frame_interval = int(round(video_fps / fps))
frame_idx = 0
saved_idx = 1
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
log_message(f"[DEBUG] Total frames in video: {total_frames}")
while cap.isOpened():
# Calculate operations possible in this cycle
operations_this_cycle = electron_unit.get_operations_this_cycle()
# Process as many frames as electron speed allows
for _ in range(min(operations_this_cycle, total_frames - frame_idx)):
ret, frame = cap.read()
if not ret:
break
if frame_idx % frame_interval == 0:
frame_name = f"{saved_idx:04d}.png"
cv2.imwrite(str(Path(output_dir) / frame_name), frame)
saved_idx += 1
frame_idx += 1
if frame_idx >= total_frames or not ret:
break
cap.release()
# Log electron-speed processing stats
elapsed = time.time() - electron_unit.last_cycle_time
frames_per_second = frame_idx / elapsed if elapsed > 0 else 0
log_message(f"Electron-speed frame extraction complete:")
log_message(f"Extracted {saved_idx-1} frames from {video_path}")
log_message(f"Processing speed: {frames_per_second:.2f} frames/s")
log_message(f"Electron drift utilized: {electron_unit.electron_drift_velocity:.2e} m/s")
return saved_idx - 1
# --- Cursor Tracking Utilities ---
def to_rgb(img):
if img is None:
return None
if len(img.shape) == 2:
return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
if img.shape[2] == 4:
return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
return img
def get_mask_from_alpha(template_img):
if template_img is not None and len(template_img.shape) == 3 and template_img.shape[2] == 4:
# Use alpha channel as mask (nonzero alpha = 255)
return (template_img[:, :, 3] > 0).astype(np.uint8) * 255
return None
def detect_cursor_in_frame_multi(frame, cursor_templates, threshold=CURSOR_THRESHOLD):
"""Detect cursor position in a frame using multiple templates. Returns best match above threshold."""
best_pos = None
best_conf = -1
best_template_name = None
frame_rgb = to_rgb(frame)
for template_name, cursor_template in cursor_templates.items():
template_rgb = to_rgb(cursor_template)
mask = get_mask_from_alpha(cursor_template)
if template_rgb is None or frame_rgb is None or template_rgb.shape[2] != frame_rgb.shape[2]:
log_message(f"[WARN] Skipping template {template_name} due to channel mismatch or load error.")
continue
try:
result = cv2.matchTemplate(frame_rgb, template_rgb, cv2.TM_CCOEFF_NORMED, mask=mask)
except Exception as e:
log_message(f"[WARN] matchTemplate failed for {template_name}: {e}")
continue
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if max_val > best_conf:
best_conf = max_val
if max_val >= threshold:
cursor_w, cursor_h = template_rgb.shape[1], template_rgb.shape[0]
cursor_x = max_loc[0] + cursor_w // 2
cursor_y = max_loc[1] + cursor_h // 2
best_pos = (cursor_x, cursor_y)
best_template_name = template_name
if best_conf >= threshold:
return best_pos, best_conf, best_template_name
return None, best_conf, None
def send_email_with_attachment(subject, body, to_email, from_email, app_password, attachment_path):
msg = EmailMessage()
msg["Subject"] = subject
msg["From"] = from_email
msg["To"] = to_email
msg.set_content(body)
with open(attachment_path, "rb") as f:
file_data = f.read()
file_name = Path(attachment_path).name
msg.add_attachment(file_data, maintype="application", subtype="octet-stream", filename=file_name)
try:
with smtplib.SMTP_SSL("smtp.gmail.com", 465) as smtp:
smtp.login(from_email, app_password)
smtp.send_message(msg)
log_message(f"[SUCCESS] Email sent to {to_email}")
except Exception as e:
log_message(f"[ERROR] Failed to send email: {e}")
class ElectronCursorTracker(ElectronUnit):
"""Cursor tracking unit with electron-speed processing"""
def __init__(self, unit_id: int):
super().__init__(unit_id)
self.tracked_count = 0
self.processed_frames = 0
def to_rgb(self, img):
if img is None:
return None
if len(img.shape) == 2:
return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
if img.shape[2] == 4:
return cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
return img
def get_mask_from_alpha(self, template_img):
if template_img is not None and len(template_img.shape) == 3 and template_img.shape[2] == 4:
return (template_img[:, :, 3] > 0).astype(np.uint8) * 255
return None
def detect_cursor_in_frame(self, frame, cursor_templates, threshold):
"""Detect cursor in a frame using electron-speed template matching"""
operations_this_cycle = self.get_operations_this_cycle()
best_pos = None
best_conf = -1
best_template_name = None
frame_rgb = self.to_rgb(frame)
# Process as many templates as electron speed allows
template_count = min(operations_this_cycle, len(cursor_templates))
processed = 0
for template_name, cursor_template in cursor_templates.items():
if processed >= template_count:
break
template_rgb = self.to_rgb(cursor_template)
mask = self.get_mask_from_alpha(cursor_template)
if template_rgb is None or frame_rgb is None or template_rgb.shape[2] != frame_rgb.shape[2]:
continue
try:
result = cv2.matchTemplate(frame_rgb, template_rgb, cv2.TM_CCOEFF_NORMED, mask=mask)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if max_val > best_conf:
best_conf = max_val
if max_val >= threshold:
cursor_w, cursor_h = template_rgb.shape[1], template_rgb.shape[0]
cursor_x = max_loc[0] + cursor_w // 2
cursor_y = max_loc[1] + cursor_h // 2
best_pos = (cursor_x, cursor_y)
best_template_name = template_name
except Exception as e:
log_message(f"[WARN] Template matching failed for {template_name}: {e}")
processed += 1
return best_pos, best_conf, best_template_name
def track_cursor(frames_dir, cursor_templates_dir, output_json_path, threshold=CURSOR_THRESHOLD, start_frame=1, email_results=False):
"""Detect cursor in each frame using electron-speed processing."""
log_message(f"[INFO] Tracking cursors in {frames_dir}...")
frames_dir = Path(frames_dir).resolve()
output_json_path = Path(output_json_path).resolve()
cursor_templates_dir = Path(cursor_templates_dir).resolve()
ensure_dir(frames_dir)
ensure_dir(output_json_path.parent)
# Initialize electron-speed cursor tracker
tracker = ElectronCursorTracker(0)
# Load cursor templates
cursor_templates = {}
for template_file in cursor_templates_dir.glob("*.png"):
template_img = cv2.imread(str(template_file), cv2.IMREAD_UNCHANGED)
if template_img is not None:
cursor_templates[template_file.name] = template_img
else:
log_message(f"[WARN] Could not load template: {template_file}")
if not cursor_templates:
log_message(f"[ERROR] No cursor templates found in: {cursor_templates_dir}")
return 0
results = []
tracked_count = 0
start_time = time.time()
# Get all frame files
frame_files = sorted(frames_dir.glob("*.png"))
frame_count = len(frame_files)
processed_count = 0
while processed_count < frame_count:
# Calculate operations possible in this cycle based on electron speed
operations_this_cycle = tracker.get_operations_this_cycle()
frames_to_process = min(operations_this_cycle, frame_count - processed_count)
# Process frames at electron speed
for frame_file in frame_files[processed_count:processed_count + frames_to_process]:
frame_num = int(frame_file.stem)
if frame_num < start_frame:
continue
frame = cv2.imread(str(frame_file), cv2.IMREAD_UNCHANGED)
if frame is None:
log_message(f"[WARN] Could not load frame: {frame_file}")
continue
pos, conf, template_name = tracker.detect_cursor_in_frame(frame, cursor_templates, threshold)
if pos is not None:
results.append({
"frame": frame_file.name,
"cursor_active": True,
"x": pos[0],
"y": pos[1],
"confidence": conf,
"template": template_name
})
tracked_count += 1
else:
results.append({
"frame": frame_file.name,
"cursor_active": False,
"x": None,
"y": None,
"confidence": conf,
"template": None
})
processed_count += 1
# Log progress periodically
if processed_count % 100 == 0:
elapsed = time.time() - start_time
fps = processed_count / elapsed if elapsed > 0 else 0
log_message(f"Processed {processed_count}/{frame_count} frames at {fps:.2f} fps")
try:
with open(output_json_path, "w") as f:
json.dump(results, f, indent=2)
log_message(f"[SUCCESS] Cursor tracking results saved to {output_json_path}")
if email_results:
log_message("[INFO] Preparing to email results...")
to_email = os.environ.get("TO_EMAIL")
from_email = os.environ.get("FROM_EMAIL")
app_password = os.environ.get("GMAIL_APP_PASSWORD")
if not (to_email and from_email and app_password):
log_message("[ERROR] Email environment variables not set. Please set TO_EMAIL, FROM_EMAIL, and GMAIL_APP_PASSWORD.")
# return tracked_count # Don't return here, just log error
else:
send_email_with_attachment(
subject="Cursor Tracking Results",
body="See attached JSON results.",
to_email=to_email,
from_email=from_email,
app_password=app_password,
attachment_path=output_json_path
)
except Exception as e:
log_message(f"[ERROR] Failed to write output JSON: {e}")
# raise # Don't raise, just log error
return tracked_count
def process_rar_file(rar_path: str) -> bool:
"""Process a single RAR file - extract, then process videos for frames and cursor tracking"""
filename = os.path.basename(rar_path)
processing_status["current_file"] = filename
# Handle multi-part RAR naming
if is_multipart_rar(filename):
course_name = get_rar_part_base(filename)
else:
course_name = filename.replace(".rar", "")
extract_dir = os.path.join(EXTRACT_FOLDER, course_name)
try:
log_message(f"πŸ”„ Processing: {filename}")
# Clean up any existing directory
if os.path.exists(extract_dir):
shutil.rmtree(extract_dir, ignore_errors=True)
# Extract RAR
os.makedirs(extract_dir, exist_ok=True)
if not extract_with_retry(rar_path, extract_dir):
raise Exception("RAR extraction failed")
# Count extracted files
file_count = 0
video_files_found = []
for root, dirs, files in os.walk(extract_dir):
for file in files:
file_count += 1
if file.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):
video_files_found.append(os.path.join(root, file))
processing_status["extracted_courses"] += 1
log_message(f"βœ… Successfully extracted \'{course_name}\' ({file_count} files, {len(video_files_found)} videos)")
# Process video files using electron-speed processing
if video_files_found:
log_message(f"[INFO] Processing {len(video_files_found)} videos with electron-speed processing")
# Initialize electron-speed processor
processor = ElectronSpeedVideoProcessor(num_cores=multiprocessing.cpu_count())
# Process all videos in parallel with electron-speed
frames_output_base = os.path.join(FRAMES_OUTPUT_FOLDER, course_name)
processor.process_videos(
video_files_found,
frames_output_base,
CURSOR_TEMPLATES_DIR
)
# Update processing status
processing_status["extracted_videos"] += len(video_files_found)
processing_status["extracted_frames_count"] += processor.total_frames
processing_status["tracked_cursors_count"] += processor.total_cursors
# Log electron-speed processing stats
elapsed = time.time() - processor.start_time
frames_per_second = processor.total_frames / elapsed if elapsed > 0 else 0
log_message(f"[INFO] Electron-speed processing complete:")
log_message(f"[INFO] Processed {len(video_files_found)} videos")
log_message(f"[INFO] Extracted {processor.total_frames} frames")
log_message(f"[INFO] Tracked {processor.total_cursors} cursors")
log_message(f"[INFO] Processing speed: {frames_per_second:.2f} frames/s")
log_message(f"[INFO] Electron drift utilized: {processor.cores[0].units[0].electron_drift_velocity:.2e} m/s")
else:
log_message(f"[WARN] No video files found in {course_name}")
return True
except Exception as e:
error_msg = str(e)
log_message(f"❌ Processing failed: {error_msg}")
log_failed_file(filename, error_msg)
return False
finally:
processing_status["current_file"] = None
from electron_processing import ElectronSpeedVideoProcessor
def main_processing_loop(start_index: int = 0):
"""Main processing workflow - extraction, frame extraction, and cursor tracking with electron-speed processing"""
processing_status["is_running"] = True
try:
# Initialize electron-speed processor
processor = ElectronSpeedVideoProcessor(num_cores=multiprocessing.cpu_count())
# Load state
processed_rars = load_json_state(PROCESS_STATE_FILE, {"processed_rars": []})["processed_rars"]
download_state = load_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": 25})
# Use start_index if provided, otherwise use the saved state
next_index = start_index if start_index > 0 else download_state["next_download_index"]
log_message(f"πŸ“Š Starting from index {next_index}")
log_message(f"πŸ“Š Previously processed: {len(processed_rars)} files")
# Get file list
try:
files = list(hf_api.list_repo_files(repo_id=SOURCE_REPO_ID, repo_type="dataset"))
rar_files = sorted([f for f in files if f.endswith(".rar")])
processing_status["total_files"] = len(rar_files)
log_message(f"πŸ“ Found {len(rar_files)} RAR files in repository")
if next_index >= len(rar_files):
log_message("βœ… All files have been processed!")
return
except Exception as e:
log_message(f"❌ Failed to get file list: {str(e)}")
return
# Process only one file per run
if next_index < len(rar_files):
rar_file = rar_files[next_index]
filename = os.path.basename(rar_file)
if filename in processed_rars:
log_message(f"⏭️ Skipping already processed: {filename}")
processing_status["processed_files"] += 1
# Move to next file
next_index += 1
save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
log_message(f"πŸ“Š Moving to next file. Progress: {next_index}/{len(rar_files)}")
return
log_message(f"πŸ“₯ Downloading: {filename}")
dest_path = os.path.join(DOWNLOAD_FOLDER, filename)
# Download file
download_url = f"https://huggingface.co/datasets/{SOURCE_REPO_ID}/resolve/main/{rar_file}"
if download_with_retry(download_url, dest_path):
# Process file
if process_rar_file(dest_path):
processed_rars.append(filename)
save_json_state(PROCESS_STATE_FILE, {"processed_rars": processed_rars})
log_message(f"βœ… Successfully processed: {filename}")
processing_status["processed_files"] += 1
else:
log_message(f"❌ Failed to process: {filename}")
processing_status["failed_files"] += 1
# Keep downloaded file
log_message(f"οΏ½ Keeping downloaded file: {filename}")
else:
log_message(f"❌ Failed to download: {filename}")
processing_status["failed_files"] += 1
# Update download state for next run
next_index += 1
save_json_state(DOWNLOAD_STATE_FILE, {"next_download_index": next_index})
# Status update
log_message(f"πŸ“Š Progress: {next_index}/{len(rar_files)} files processed")
log_message(f'πŸ“Š Extracted: {processing_status["extracted_courses"]} courses')
log_message(f'πŸ“Š Videos Processed: {processing_status["extracted_videos"]}')
log_message(f'πŸ“Š Frames Extracted: {processing_status["extracted_frames_count"]}')
log_message(f'πŸ“Š Cursors Tracked: {processing_status["tracked_cursors_count"]}')
log_message(f'πŸ“Š Failed: {processing_status["failed_files"]} files')
if next_index < len(rar_files):
log_message(f"πŸ”„ Run the script again to process the next file: {os.path.basename(rar_files[next_index])}")
else:
log_message("πŸŽ‰ All files have been processed!")
else:
log_message("βœ… All files have been processed!")
log_message("πŸŽ‰ Processing complete!")
log_message(f'πŸ“Š Final stats: {processing_status["extracted_courses"]} courses extracted, {processing_status["extracted_videos"]} videos processed, {processing_status["extracted_frames_count"]} frames extracted, {processing_status["tracked_cursors_count"]} cursors tracked')
except KeyboardInterrupt:
log_message("⏹️ Processing interrupted by user")
except Exception as e:
log_message(f"❌ Fatal error: {str(e)}")
finally:
processing_status["is_running"] = False
cleanup_temp_files()
# Expose necessary functions and variables for download_api.py
__all__ = [
"main_processing_loop",
"processing_status",
"CURSOR_TRACKING_OUTPUT_FOLDER",
"CURSOR_TEMPLATES_DIR",
"log_message",
"send_email_with_attachment",
"track_cursor",
"extract_frames",
"DEFAULT_FPS",
"CURSOR_THRESHOLD",
"ensure_dir"
]