File size: 3,573 Bytes
0b149d1
 
 
 
b76cf08
0b149d1
 
 
 
 
 
b76cf08
 
 
 
 
 
 
 
 
3810c45
 
 
 
b76cf08
3810c45
 
 
 
b76cf08
3810c45
 
b76cf08
 
3810c45
 
 
b76cf08
 
3810c45
 
 
 
 
 
b76cf08
3810c45
 
 
 
b76cf08
3810c45
 
 
b76cf08
 
3810c45
b76cf08
 
3810c45
 
 
b76cf08
 
 
 
 
 
 
 
 
 
 
 
3810c45
b76cf08
3810c45
 
 
 
b76cf08
3810c45
b76cf08
 
 
 
 
3810c45
b76cf08
3810c45
 
 
b76cf08
 
3810c45
b76cf08
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import hashlib
import os
import shutil
import sqlite3
import uuid
from datetime import datetime

import gradio as gr
import huggingface_hub
import pandas as pd
import pytz
from apscheduler.schedulers.background import BackgroundScheduler


class TrafficDataHandler:
    _DB_FILE_PATH = "./traffic_data.db"
    _DB_TEMP_PATH = "./data/traffic_data.db"
    _TOKEN = os.environ.get("HUB_TOKEN")
    _TZ = "Europe/Stockholm"
    _INTERVAL_MIN_UPDATE = 30
    _repo = huggingface_hub.Repository(
        local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=_TOKEN
    )
    _session_uuid = None

    @classmethod
    def _pull_repo_data(cls):
        cls._repo.git_pull()
        shutil.copyfile(cls._DB_TEMP_PATH, cls._DB_FILE_PATH)

    @staticmethod
    def _hash_ip(ip_address):
        return hashlib.sha256(ip_address.encode()).hexdigest()

    @classmethod
    def _current_time_in_sweden(cls):
        swedish_tz = pytz.timezone(cls._TZ)
        return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")

    @classmethod
    def onload_store_metric_data(cls, request: gr.Request):
        cls._session_uuid = str(uuid.uuid1())
        cls._setup_database()
        hashed_host = cls._hash_ip(request.client.host)
        cls._backup_and_update_database(hashed_host, "load")

    @classmethod
    def store_metric_data(cls, action, request: gr.Request):
        hashed_host = cls._hash_ip(request.client.host)
        cls._backup_and_update_database(hashed_host, action)

    @classmethod
    def _commit_host_to_database(cls, hashed_host, action):
        with sqlite3.connect(cls._DB_FILE_PATH) as db:
            db.execute(
                "INSERT INTO ip_data(current_time, hashed_ip, session_uuid, action) VALUES(?,?,?,?)",
                [cls._current_time_in_sweden(), hashed_host, cls._session_uuid, action],
            )

    @classmethod
    def _setup_database(cls):
        with sqlite3.connect(cls._DB_FILE_PATH) as db:
            try:
                db.execute("SELECT * FROM ip_data").fetchall()
            except sqlite3.OperationalError:
                db.execute(
                    """
                    CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
                                          current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
                                          hashed_ip TEXT,
                                          session_uuid TEXT,
                                          action TEXT)
                    """
                )
        cls._pull_repo_data()

    @classmethod
    def _backup_and_update_database(cls, hashed_host, action):
        cls._commit_host_to_database(hashed_host, action)
        shutil.copyfile(cls._DB_FILE_PATH, cls._DB_TEMP_PATH)

        with sqlite3.connect(cls._DB_FILE_PATH) as db:
            ip_data = db.execute("SELECT * FROM ip_data").fetchall()
            pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip", "session_uuid", "action"]).to_csv(
                "./data/ip_data.csv", index=False
            )

        cls._repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")

    @classmethod
    def _initialize_and_schedule_backup(cls, hashed_host, action):
        cls._backup_and_update_database(hashed_host, action)
        scheduler = BackgroundScheduler()
        scheduler.add_job(
            cls._backup_and_update_database, "interval", minutes=cls._INTERVAL_MIN_UPDATE, args=(hashed_host, action)
        )
        scheduler.start()