Futyn-Maker
Deploy the app
7e1f5f6
import json
import os
import sys
from pathlib import Path
from typing import Dict, Any, List
from loguru import logger
from omegaconf import OmegaConf
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
def process_json_files(
raw_data_path: str) -> tuple[List[Dict[str, str]], List[Dict[str, Any]]]:
"""
Process all JSON files in the raw data folder.
Args:
raw_data_path (str): Path to the folder containing JSON files.
Returns:
tuple: Lists of public and meme data to be added to the database.
"""
publics_to_add: List[Dict[str, str]] = []
memes_to_add: List[Dict[str, Any]] = []
for filename in os.listdir(raw_data_path):
if filename.endswith('.json'):
public_vk = filename[:-5] # Remove .json extension
file_path = os.path.join(raw_data_path, filename)
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
publics_to_add.append({
"public_vk": public_vk,
"public_name": data['name']
})
for post in data['posts']:
memes_to_add.append({
"public_vk": public_vk,
"text": post['text'],
"image_url": post['image_url']
})
logger.info(
f"Processed file: {filename}, found {len(data['posts'])} memes")
return publics_to_add, memes_to_add
def main():
from src.db.models import Base
from src.db import crud
logger.add("logs/make_db.log", rotation="10 MB")
# Load configuration
config = OmegaConf.load('config.yaml')
config = OmegaConf.to_container(config)
engine = create_engine(config['database']['url'])
# Drop all existing tables and create new ones
Base.metadata.drop_all(bind=engine)
Base.metadata.create_all(bind=engine)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
db = SessionLocal()
raw_data_path = config['data_folders']['raw_data']
publics_to_add, memes_to_add = process_json_files(raw_data_path)
# Add all publics to the database
added_publics = crud.add_publics(db, publics_to_add)
# Create a mapping of public_vk to public_id
public_vk_to_id = {public.public_vk: public.id for public in added_publics}
# Update memes with correct public_id
for meme in memes_to_add:
meme['public_id'] = public_vk_to_id[meme.pop('public_vk')]
# Add all memes to the database
crud.add_memes(db, memes_to_add)
logger.info(
f"Added {len(added_publics)} publics and {len(memes_to_add)} memes to the database")
db.close()
logger.info("Database population completed")
if __name__ == "__main__":
# Set up project root path
project_root = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(project_root))
main()