htr_demo / helper /utils.py
Gabriel's picture
fixed bug with utils for loading db
3810c45
raw history blame
No virus
3.57 kB
import hashlib
import os
import shutil
import sqlite3
import uuid
from datetime import datetime
import gradio as gr
import huggingface_hub
import pandas as pd
import pytz
from apscheduler.schedulers.background import BackgroundScheduler
class TrafficDataHandler:
_DB_FILE_PATH = "./traffic_data.db"
_DB_TEMP_PATH = "./data/traffic_data.db"
_TOKEN = os.environ.get("HUB_TOKEN")
_TZ = "Europe/Stockholm"
_INTERVAL_MIN_UPDATE = 30
_repo = huggingface_hub.Repository(
local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=_TOKEN
)
_session_uuid = None
@classmethod
def _pull_repo_data(cls):
cls._repo.git_pull()
shutil.copyfile(cls._DB_TEMP_PATH, cls._DB_FILE_PATH)
@staticmethod
def _hash_ip(ip_address):
return hashlib.sha256(ip_address.encode()).hexdigest()
@classmethod
def _current_time_in_sweden(cls):
swedish_tz = pytz.timezone(cls._TZ)
return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")
@classmethod
def onload_store_metric_data(cls, request: gr.Request):
cls._session_uuid = str(uuid.uuid1())
cls._setup_database()
hashed_host = cls._hash_ip(request.client.host)
cls._backup_and_update_database(hashed_host, "load")
@classmethod
def store_metric_data(cls, action, request: gr.Request):
hashed_host = cls._hash_ip(request.client.host)
cls._backup_and_update_database(hashed_host, action)
@classmethod
def _commit_host_to_database(cls, hashed_host, action):
with sqlite3.connect(cls._DB_FILE_PATH) as db:
db.execute(
"INSERT INTO ip_data(current_time, hashed_ip, session_uuid, action) VALUES(?,?,?,?)",
[cls._current_time_in_sweden(), hashed_host, cls._session_uuid, action],
)
@classmethod
def _setup_database(cls):
with sqlite3.connect(cls._DB_FILE_PATH) as db:
try:
db.execute("SELECT * FROM ip_data").fetchall()
except sqlite3.OperationalError:
db.execute(
"""
CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
hashed_ip TEXT,
session_uuid TEXT,
action TEXT)
"""
)
cls._pull_repo_data()
@classmethod
def _backup_and_update_database(cls, hashed_host, action):
cls._commit_host_to_database(hashed_host, action)
shutil.copyfile(cls._DB_FILE_PATH, cls._DB_TEMP_PATH)
with sqlite3.connect(cls._DB_FILE_PATH) as db:
ip_data = db.execute("SELECT * FROM ip_data").fetchall()
pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip", "session_uuid", "action"]).to_csv(
"./data/ip_data.csv", index=False
)
cls._repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")
@classmethod
def _initialize_and_schedule_backup(cls, hashed_host, action):
cls._backup_and_update_database(hashed_host, action)
scheduler = BackgroundScheduler()
scheduler.add_job(
cls._backup_and_update_database, "interval", minutes=cls._INTERVAL_MIN_UPDATE, args=(hashed_host, action)
)
scheduler.start()