Spaces:
Build error
Build error
import base64 | |
import re | |
import shutil | |
import time | |
from datetime import datetime | |
from pathlib import Path | |
import enum | |
import pandas as pd | |
from shortGPT.audio.audio_utils import downloadYoutubeAudio, get_asset_duration | |
from shortGPT.database.db_document import TinyMongoDocument | |
AUDIO_EXTENSIONS = {".mp3", ".m4a", ".wav", ".flac", ".aac", ".ogg", ".wma", ".opus"} | |
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".svg", ".webp"} | |
VIDEO_EXTENSIONS = {".mp4", ".mkv", ".flv", ".avi", ".mov", ".wmv", ".webm", ".m4v"} | |
TEMPLATE_ASSETS_DB_PATH = '.database/template_asset_db.json' | |
ASSETS_DB_PATH = '.database/asset_db.json' | |
class AssetType(enum.Enum): | |
VIDEO = "video" | |
AUDIO = "audio" | |
IMAGE = "image" | |
BACKGROUND_MUSIC = "background music" | |
BACKGROUND_VIDEO = "background video" | |
OTHER = "other" | |
class AssetDatabase: | |
""" | |
Class for managing assets, both local and remote. | |
The class provides methods to add, remove, get and sync assets. | |
It uses a MongoDB-like database to store information about the assets. | |
""" | |
if not Path(ASSETS_DB_PATH).exists() and Path(TEMPLATE_ASSETS_DB_PATH).exists(): | |
shutil.copy(TEMPLATE_ASSETS_DB_PATH, ASSETS_DB_PATH) | |
local_assets = TinyMongoDocument("asset_db", "asset_collection", "local_assets", create=True) | |
remote_assets = TinyMongoDocument("asset_db", "asset_collection", "remote_assets", create=True) | |
if not remote_assets._get('subscribe animation'): | |
remote_assets._save({ | |
'subscribe animation':{ | |
"type": AssetType.VIDEO.value, | |
"url": "https://www.youtube.com/watch?v=72WhUT0OM98", | |
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
} | |
}) | |
def asset_exists(cls, name: str) -> bool: | |
return name in cls.local_assets._get() or name in cls.remote_assets._get() | |
def add_local_asset(cls, name: str, asset_type: AssetType, path: str): | |
cls.local_assets._save({ | |
name: { | |
"type": asset_type.value, | |
"path": path, | |
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
} | |
}) | |
def add_remote_asset(cls, name: str, asset_type: AssetType, url: str): | |
cls.remote_assets._save({ | |
name: { | |
"type": asset_type.value, | |
"url": url, | |
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
} | |
}) | |
def remove_asset(cls, name: str): | |
if name in cls.local_assets._get(): | |
cls._remove_local_asset(name) | |
elif name in cls.remote_assets._get(): | |
cls.remote_assets._delete(name) | |
else: | |
raise ValueError(f"Asset '{name}' does not exist in the database.") | |
def get_df(cls, source=None) -> pd.DataFrame: | |
data = [] | |
if source is None or source == 'local': | |
for key, asset in cls.local_assets._get().items(): | |
data.append({'name': key, | |
'type': asset['type'], | |
'link': asset['path'], | |
'source': 'local', | |
'ts': asset.get('ts') | |
}) | |
if source is None or source == 'youtube': | |
for key, asset in cls.remote_assets._get().items(): | |
data.append({'name': key, | |
'type': asset['type'], | |
'link': asset['url'], | |
'source': 'youtube' if 'youtube' in asset['url'] else 'internet', | |
'ts': asset.get('ts') | |
}) | |
df = pd.DataFrame(data) | |
if (not df.empty): | |
df.sort_values(by='ts', ascending=False, inplace=True) | |
return df.drop(columns='ts') | |
return df | |
def sync_local_assets(cls): | |
""" | |
Loads all local assets from the static-assets folder into the database. | |
""" | |
local_assets = cls.local_assets._get() | |
local_paths = {asset['path'] for asset in local_assets.values()} | |
for path in Path('public').rglob('*'): | |
if path.is_file() and str(path) not in local_paths: | |
cls._add_local_asset_from_path(path) | |
def get_asset_link(cls, key: str) -> str: | |
""" | |
Get the link to an asset. | |
Args: | |
key (str): Name of the asset. | |
Returns: | |
str: Link to the asset. | |
""" | |
if key in cls.local_assets._get(): | |
return cls._update_local_asset_timestamp_and_get_link(key) | |
elif key in cls.remote_assets._get(): | |
return cls._get_remote_asset_link(key) | |
else: | |
raise ValueError(f"Asset '{key}' does not exist in the database.") | |
def get_asset_duration(cls, key: str) -> str: | |
""" | |
Get the duration of an asset. | |
Args: | |
key (str): Name of the asset. | |
Returns: | |
str: Duration of the asset. | |
""" | |
if key in cls.local_assets._get(): | |
return cls._get_local_asset_duration(key) | |
elif key in cls.remote_assets._get(): | |
return cls._get_remote_asset_duration(key) | |
else: | |
raise ValueError(f"Asset '{key}' does not exist in the database.") | |
def _remove_local_asset(cls, name: str): | |
""" | |
Remove a local asset from the database. | |
Args: | |
name (str): Name of the asset. | |
""" | |
asset = cls.local_assets._get(name) | |
if 'required' not in asset: | |
try: | |
Path(asset['path']).unlink() | |
except FileNotFoundError as e: | |
print(f"File not found: {e}") | |
cls.local_assets._delete(name) | |
def _add_local_asset_from_path(cls, path: Path): | |
""" | |
Add a local asset to the database from a file path. | |
Args: | |
path (Path): Path to the asset. | |
""" | |
file_ext = path.suffix | |
if file_ext in AUDIO_EXTENSIONS: | |
asset_type = AssetType.AUDIO | |
elif file_ext in IMAGE_EXTENSIONS: | |
asset_type = AssetType.IMAGE | |
elif file_ext in VIDEO_EXTENSIONS: | |
asset_type = AssetType.VIDEO | |
else: | |
asset_type = AssetType.OTHER | |
cls.local_assets._save({ | |
path.stem: { | |
"path": str(path), | |
"type": asset_type.value, | |
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
} | |
}) | |
def _update_local_asset_timestamp_and_get_link(cls, key: str) -> str: | |
""" | |
Update the timestamp of a local asset and get its link. | |
Args: | |
key (str): Name of the asset. | |
Returns: | |
str: Link to the asset. | |
""" | |
asset = cls.local_assets._get(key) | |
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
cls.local_assets._save({key: asset}) | |
return asset['path'] | |
def _get_remote_asset_link(cls, key: str) -> str: | |
""" | |
Get the link to a remote asset. | |
Args: | |
key (str): Name of the asset. | |
Returns: | |
str: Link to the asset. | |
""" | |
asset = cls.remote_assets._get(key) | |
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
cls.remote_assets._save({key: asset}) | |
if 'youtube' in asset['url']: | |
return cls._get_youtube_asset_link(key, asset) | |
return asset['url'] | |
def _get_local_asset_duration(cls, key: str) -> str: | |
""" | |
Get the duration of a local asset. | |
Args: | |
key (str): Name of the asset. | |
Returns: | |
str: Duration of the asset. | |
""" | |
asset = cls.local_assets._get(key) | |
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
cls.local_assets._save({key: asset}) | |
if 'duration' not in asset and asset['duration'] is not None: | |
_, duration = cls._update_local_asset_duration(key) | |
return duration | |
return asset['duration'] | |
def _get_remote_asset_duration(cls, key: str) -> str: | |
""" | |
Get the duration of a remote asset. | |
Args: | |
key (str): Name of the asset. | |
Returns: | |
str: Duration of the asset. | |
""" | |
asset = cls.remote_assets._get(key) | |
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
cls.remote_assets._save({key: asset}) | |
if 'duration' in asset and asset['duration'] is not None: | |
return asset['duration'] | |
_, duration = cls._update_youtube_asset_duration(key) | |
return duration | |
def _update_local_asset_duration(cls, key: str) -> str: | |
""" | |
Update the duration of a local asset. | |
Args: | |
key (str): Name of the asset. | |
Returns: | |
str: Duration of the asset. | |
""" | |
asset = cls.local_assets._get(key) | |
path = Path(asset['path']) | |
if any(t in asset['type'] for t in ['audio', 'video', 'music']): | |
_, duration = get_asset_duration(str(path)) | |
asset['duration'] = duration | |
else: | |
duration = None | |
cls.local_assets._save({key: asset}) | |
return str(path), duration | |
def _update_youtube_asset_duration(cls, key: str) -> str: | |
""" | |
Update the duration of a Youtube asset. | |
Args: | |
key (str): Name of the asset. | |
Returns: | |
str: Duration of the asset. | |
""" | |
asset = cls.remote_assets._get(key) | |
youtube_url = asset['url'] | |
remote_url, duration = get_asset_duration(youtube_url, isVideo="video" in asset['type']) | |
asset.update({ | |
"remote_url": base64.b64encode(remote_url.encode()).decode('utf-8'), | |
"duration": duration, | |
}) | |
cls.remote_assets._save({key: asset}) | |
return remote_url, duration | |
def _get_youtube_asset_link(cls, key: str, asset: dict) -> str: | |
""" | |
Get the link to a Youtube asset. | |
Args: | |
key (str): Name of the asset. | |
asset (dict): Asset data. | |
Returns: | |
str: Link to the asset. | |
""" | |
if any(t in asset['type'] for t in ['audio', 'music']): | |
local_audio_file, duration = downloadYoutubeAudio(asset['url'], f"public/{key}.wav") | |
cls.local_assets._save({ | |
key: { | |
'path': local_audio_file, | |
'duration': duration, | |
'type': 'audio', | |
'ts': datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
} | |
}) | |
return local_audio_file | |
if 'remote_url' in asset: | |
asset['remote_url'] = base64.b64decode(asset['remote_url']).decode('utf-8') | |
expire_timestamp_match = re.search(r"expire=(\d+)", asset['remote_url']) | |
not_expired = expire_timestamp_match and int(expire_timestamp_match.group(1)) > time.time() + 1800 | |
if not_expired and 'duration' in asset: | |
return asset['remote_url'] | |
remote_url, _ = cls._update_youtube_asset_duration(key) | |
return remote_url | |