Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 8,734 Bytes
07505ba a683f71 07505ba a683f71 07505ba a683f71 07505ba a683f71 07505ba 7f4024a 07505ba 033e9d9 07505ba 033e9d9 07505ba 7f4024a 0bc84f3 7f4024a 0bc84f3 7f4024a 033e9d9 7f4024a 033e9d9 a683f71 033e9d9 7f4024a 033e9d9 7f4024a 033e9d9 07505ba 033e9d9 07505ba 033e9d9 07505ba 033e9d9 07505ba 7f4024a 07505ba a683f71 07505ba a683f71 07505ba a683f71 07505ba a683f71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
import asyncio
import json
from fastapi import HTTPException
import dropbox
from dropbox.files import FolderMetadata, FileMetadata
from datetime import datetime, timedelta, timezone
from config import SanatanConfig
from db import SanatanDatabase
from modules.audio.model import AudioRequest, AudioType
import logging
from modules.dropbox.client import dbx
from fastapi import HTTPException
from enum import Enum
import dropbox
from dropbox.files import FileMetadata
from dropbox.files import FileMetadata
from datetime import datetime, timezone
from fastapi import HTTPException
from typing import List, Set
from datetime import datetime, timezone, timedelta
from fastapi import HTTPException
import dropbox
from dropbox.files import FileMetadata
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
def list_dropbox_folder_hierarchy(dbx: dropbox.Dropbox, base_path: str = ""):
"""
Recursively fetches the folder/file hierarchy from Dropbox starting at base_path.
Includes direct temporary download links for files.
Args:
dbx (dropbox.Dropbox): Authenticated Dropbox client.
base_path (str): Path inside Dropbox ("" means root).
Returns:
dict: Nested dict with folders -> {subfolders/files with links}.
"""
hierarchy = {}
try:
print("listing files in", base_path)
result = dbx.files_list_folder(base_path)
while True:
for entry in result.entries:
if isinstance(entry, FolderMetadata):
# Recurse into subfolder
hierarchy[entry.name] = list_dropbox_folder_hierarchy(
dbx, entry.path_lower
)
elif isinstance(entry, FileMetadata):
try:
link = dbx.files_get_temporary_link(entry.path_lower).link
hierarchy.setdefault("__files__", []).append(
{
"name": entry.name,
"path": entry.path_lower,
"download_url": link,
}
)
except Exception as link_err:
print(
f"Could not generate link for {entry.path_lower}: {link_err}"
)
if result.has_more:
result = dbx.files_list_folder_continue(result.cursor)
else:
break
except Exception as e:
print(f"Error listing folder {base_path}: {e}")
return hierarchy
# cache = {(scripture_name, global_index, type): {"url": ..., "expiry": ...}}
audio_cache: dict[tuple[str, int, str], dict] = {}
CACHE_TTL = timedelta(hours=3, minutes=30) # refresh before 4h expiry
AUDIO_LIST_CACHE_TTL = timedelta(hours=24)
audio_list_cache = {} # {(scripture_name): {"entries": [...], "expiry": datetime}}
async def get_audio_urls(req: AudioRequest):
base_path = f"/{req.scripture_name}/audio"
prefix = f"{req.global_index}-"
urls = {}
now = datetime.now(timezone.utc)
# --- 1️⃣ Check if folder listing is cached ---
cache_entry = audio_list_cache.get(req.scripture_name)
if cache_entry and cache_entry["expiry"] > now:
entries = cache_entry["entries"]
else:
# Fetch fresh listing from Dropbox
try:
result = dbx.files_list_folder(base_path)
entries = result.entries
while result.has_more:
result = dbx.files_list_folder_continue(result.cursor)
entries.extend(result.entries)
audio_list_cache[req.scripture_name] = {
"entries": entries,
"expiry": now + AUDIO_LIST_CACHE_TTL,
}
except dropbox.exceptions.ApiError:
raise HTTPException(status_code=404, detail="Audio directory not found")
# --- 2️⃣ Filter matching files ---
matching_files = [
entry
for entry in entries
if isinstance(entry, FileMetadata) and entry.name.startswith(prefix)
]
if not matching_files:
raise HTTPException(status_code=404, detail="No audio files found")
# --- 3️⃣ Generate or reuse cached URLs ---
for entry in matching_files:
filename = entry.name
file_type = filename[len(prefix):].rsplit(".", 1)[0]
cache_key = (req.scripture_name, req.global_index, file_type)
cached = audio_cache.get(cache_key)
if cached and cached["expiry"] > now:
urls[file_type] = cached["url"]
continue
file_path = f"{base_path}/{filename}"
try:
temp_link = dbx.files_get_temporary_link(file_path).link
urls[file_type] = temp_link
audio_cache[cache_key] = {"url": temp_link, "expiry": now + CACHE_TTL}
except dropbox.exceptions.ApiError:
urls[file_type] = None
return urls
async def cleanup_audio_url_cache(interval_seconds: int = 600):
"""Periodically remove expired entries from audio_cache."""
while True:
now = datetime.now(timezone.utc)
expired_keys = [key for key, val in audio_cache.items() if val["expiry"] <= now]
for key in expired_keys:
del audio_cache[key]
# Debug log
if expired_keys:
print(f"Cleaned up {len(expired_keys)} expired cache entries")
await asyncio.sleep(interval_seconds)
from datetime import datetime, timezone, timedelta
# Simple in-memory cache
_audio_indices_cache: dict[tuple[str, str], dict] = {}
CACHE_TTL_2 = timedelta(minutes=10)
async def get_global_indices_with_audio(scripture_name: str, audio_type: AudioType):
"""
Returns a sorted list of global indices for a given scripture that have audio of the specified type.
Supports AudioType.any, AudioType.none, and specific types.
Uses in-memory caching for repeated calls.
"""
now = datetime.now(timezone.utc)
cache_key = (scripture_name, audio_type.value)
# Check cache
cached = _audio_indices_cache.get(cache_key)
if cached and cached["expiry"] > now:
return cached["indices"]
# Step 1: list all files in Dropbox folder
base_path = f"/{scripture_name}/audio"
entries = []
try:
result = dbx.files_list_folder(base_path)
entries.extend(result.entries)
while result.has_more:
result = dbx.files_list_folder_continue(result.cursor)
entries.extend(result.entries)
except dropbox.exceptions.ApiError:
raise HTTPException(status_code=404, detail="Audio directory not found")
# Step 2: collect all global indices with any audio
all_indices_with_audio = set()
for entry in entries:
if not isinstance(entry, FileMetadata) or "-" not in entry.name:
continue
global_index_str, _ = entry.name.split("-", 1)
try:
global_index = int(global_index_str)
except ValueError:
continue
all_indices_with_audio.add(global_index)
# Step 3: filter based on audio_type
if audio_type == AudioType.none:
db = SanatanDatabase()
config = SanatanConfig()
total_verses = db.count(
collection_name=config.get_collection_name(scripture_name=scripture_name)
)
indices = set(range(1, total_verses + 1)) - all_indices_with_audio
elif audio_type == AudioType.any:
indices = all_indices_with_audio
else:
indices = set()
for entry in entries:
if not isinstance(entry, FileMetadata) or "-" not in entry.name:
continue
global_index_str, rest = entry.name.split("-", 1)
try:
global_index = int(global_index_str)
except ValueError:
continue
file_type = rest.rsplit(".", 1)[0].strip().lower()
if file_type.startswith(audio_type.value):
indices.add(global_index)
# Cache the result
_audio_indices_cache[cache_key] = {
"indices": sorted(indices),
"expiry": now + CACHE_TTL_2
}
return sorted(indices)
if __name__ == "__main__":
# Create Dropbox client with your access token
# data = list_dropbox_folder_hierarchy(dbx, "")
# data = asyncio.run(
# get_audio_urls(AudioRequest(scripture_name="divya_prabandham", global_index=0))
# )
data = asyncio.run(
get_global_indices_with_audio(
scripture_name="divya_prabandham", audio_type=AudioType.upanyasam
)
)
# print(json.dumps(data, indent=2))
print(len(data))
|