File size: 5,736 Bytes
7a7f67a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import json
import logging
import pathlib
import pandas as pd
import gradio as gr
import schedule
import time
from datetime import datetime, timezone
from src.envs import API
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class VoteManager:
def __init__(self, votes_path, eval_requests_path, repo_id):
self.votes_path = votes_path
self.eval_requests_path = eval_requests_path
self.repo_id = repo_id
self.vote_dataset = self.read_vote_dataset()
self.vote_check_set = self.make_check_set(self.vote_dataset)
self.votes_to_upload = []
def init_vote_dataset(self):
self.vote_dataset = self.read_vote_dataset()
self.vote_check_set = self.make_check_set(self.vote_dataset)
def read_vote_dataset(self):
result = []
votes_file = pathlib.Path(self.votes_path) / "votes_data.jsonl"
if votes_file.exists():
with open(votes_file, "r") as f:
for line in f:
data = json.loads(line.strip())
result.append(data)
result = pd.DataFrame(result)
return result
def make_check_set(self, vote_dataset: pd.DataFrame):
result = list()
for row in vote_dataset.itertuples(index=False, name='vote'):
result.append((row.model, row.revision, row.username))
return set(result)
def get_model_revision(self, selected_model: str) -> str:
"""Fetch the revision for the given model from the request files."""
for user_folder in pathlib.Path(self.eval_requests_path).iterdir():
if user_folder.is_dir():
for file in user_folder.glob("*.json"):
with open(file, "r") as f:
data = json.load(f)
if data.get("model") == selected_model:
return data.get("revision", "main")
return "main"
def create_request_vote_df(self, pending_models_df: gr.Dataframe):
if pending_models_df.empty or not "model_name" in pending_models_df.columns:
return pending_models_df
self.vote_dataset = self.read_vote_dataset()
vote_counts = self.vote_dataset.groupby(['model', 'revision']).size().reset_index(name='vote_count')
pending_models_df_votes = pd.merge(
pending_models_df,
vote_counts,
left_on=["model_name", 'revision'],
right_on=['model', 'revision'],
how='left'
)
# Filling empty votes
pending_models_df_votes['vote_count'] = pending_models_df_votes['vote_count'].fillna(0)
pending_models_df_votes = pending_models_df_votes.sort_values(by=["vote_count", "model_name"], ascending=[False, True])
# Removing useless columns
pending_models_df_votes = pending_models_df_votes.drop(["model_name", "model"], axis=1)
return pending_models_df_votes
# Function to be called when a user votes for a model
def add_vote(
self,
selected_model: str,
pending_models_df: gr.Dataframe,
profile: gr.OAuthProfile | None
):
logger.debug(f"Type of list before usage: {type(list)}")
# model_name, revision, user_id, timestamp
if selected_model in ["str", ""]:
gr.Warning("No model selected")
return
if profile is None:
gr.Warning("Hub Login required")
return
vote_username = profile.username
model_revision = self.get_model_revision(selected_model)
# tuple (immutable) for checking than already voted for model
check_tuple = (selected_model, model_revision, vote_username)
if check_tuple in self.vote_check_set:
gr.Warning("Already voted for this model")
return
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
vote_obj = {
"model": selected_model,
"revision": model_revision,
"username": vote_username,
"timestamp": current_time
}
# Append the vote to the JSONL file
try:
votes_file = pathlib.Path(self.votes_path) / "votes_data.jsonl"
with open(votes_file, "a") as f:
f.write(json.dumps(vote_obj) + "\n")
logger.info(f"Vote added locally: {vote_obj}")
self.votes_to_upload.append(vote_obj)
except Exception as e:
logger.error(f"Failed to write vote to file: {e}")
gr.Warning("Failed to record vote. Please try again")
return
self.vote_check_set.add(check_tuple)
gr.Info(f"Voted for {selected_model}")
return self.create_request_vote_df(pending_models_df)
def upload_votes(self):
if self.votes_to_upload:
votes_file = pathlib.Path(self.votes_path) / "votes_data.jsonl"
try:
with open(votes_file, "rb") as f:
API.upload_file(
path_or_fileobj=f,
path_in_repo="votes_data.jsonl",
repo_id=self.repo_id,
repo_type="dataset",
commit_message="Updating votes_data.jsonl with new votes",
)
logger.info("Votes uploaded to votes repository")
self.votes_to_upload.clear()
except Exception as e:
logger.error(f"Failed to upload votes to repository: {e}")
def run_scheduler(vote_manager):
while True:
schedule.run_pending()
time.sleep(1)
|