|
import json |
|
import logging |
|
import pathlib |
|
import pandas as pd |
|
import gradio as gr |
|
import schedule |
|
import time |
|
from datetime import datetime, timezone |
|
|
|
from src.envs import API |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class VoteManager: |
|
def __init__(self, votes_path, eval_requests_path, repo_id): |
|
self.votes_path = votes_path |
|
self.eval_requests_path = eval_requests_path |
|
self.repo_id = repo_id |
|
self.vote_dataset = self.read_vote_dataset() |
|
self.vote_check_set = self.make_check_set(self.vote_dataset) |
|
self.votes_to_upload = [] |
|
|
|
def init_vote_dataset(self): |
|
self.vote_dataset = self.read_vote_dataset() |
|
self.vote_check_set = self.make_check_set(self.vote_dataset) |
|
|
|
def read_vote_dataset(self): |
|
result = [] |
|
votes_file = pathlib.Path(self.votes_path) / "votes_data.jsonl" |
|
if votes_file.exists(): |
|
with open(votes_file, "r") as f: |
|
for line in f: |
|
data = json.loads(line.strip()) |
|
result.append(data) |
|
result = pd.DataFrame(result) |
|
return result |
|
|
|
def make_check_set(self, vote_dataset: pd.DataFrame): |
|
result = list() |
|
for row in vote_dataset.itertuples(index=False, name='vote'): |
|
result.append((row.model, row.revision, row.username)) |
|
return set(result) |
|
|
|
def get_model_revision(self, selected_model: str) -> str: |
|
"""Fetch the revision for the given model from the request files.""" |
|
for user_folder in pathlib.Path(self.eval_requests_path).iterdir(): |
|
if user_folder.is_dir(): |
|
for file in user_folder.glob("*.json"): |
|
with open(file, "r") as f: |
|
data = json.load(f) |
|
if data.get("model") == selected_model: |
|
return data.get("revision", "main") |
|
return "main" |
|
|
|
def create_request_vote_df(self, pending_models_df: gr.Dataframe): |
|
if pending_models_df.empty or not "model_name" in pending_models_df.columns: |
|
return pending_models_df |
|
self.vote_dataset = self.read_vote_dataset() |
|
vote_counts = self.vote_dataset.groupby(['model', 'revision']).size().reset_index(name='vote_count') |
|
|
|
pending_models_df_votes = pd.merge( |
|
pending_models_df, |
|
vote_counts, |
|
left_on=["model_name", 'revision'], |
|
right_on=['model', 'revision'], |
|
how='left' |
|
) |
|
|
|
pending_models_df_votes['vote_count'] = pending_models_df_votes['vote_count'].fillna(0) |
|
pending_models_df_votes = pending_models_df_votes.sort_values(by=["vote_count", "model_name"], ascending=[False, True]) |
|
|
|
pending_models_df_votes = pending_models_df_votes.drop(["model_name", "model"], axis=1) |
|
return pending_models_df_votes |
|
|
|
|
|
def add_vote( |
|
self, |
|
selected_model: str, |
|
pending_models_df: gr.Dataframe, |
|
profile: gr.OAuthProfile | None |
|
): |
|
logger.debug(f"Type of list before usage: {type(list)}") |
|
|
|
if selected_model in ["str", ""]: |
|
gr.Warning("No model selected") |
|
return |
|
|
|
if profile is None: |
|
gr.Warning("Hub Login required") |
|
return |
|
|
|
vote_username = profile.username |
|
model_revision = self.get_model_revision(selected_model) |
|
|
|
|
|
check_tuple = (selected_model, model_revision, vote_username) |
|
if check_tuple in self.vote_check_set: |
|
gr.Warning("Already voted for this model") |
|
return |
|
|
|
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") |
|
|
|
vote_obj = { |
|
"model": selected_model, |
|
"revision": model_revision, |
|
"username": vote_username, |
|
"timestamp": current_time |
|
} |
|
|
|
|
|
try: |
|
votes_file = pathlib.Path(self.votes_path) / "votes_data.jsonl" |
|
with open(votes_file, "a") as f: |
|
f.write(json.dumps(vote_obj) + "\n") |
|
logger.info(f"Vote added locally: {vote_obj}") |
|
|
|
self.votes_to_upload.append(vote_obj) |
|
except Exception as e: |
|
logger.error(f"Failed to write vote to file: {e}") |
|
gr.Warning("Failed to record vote. Please try again") |
|
return |
|
|
|
self.vote_check_set.add(check_tuple) |
|
gr.Info(f"Voted for {selected_model}") |
|
|
|
return self.create_request_vote_df(pending_models_df) |
|
|
|
def upload_votes(self): |
|
if self.votes_to_upload: |
|
votes_file = pathlib.Path(self.votes_path) / "votes_data.jsonl" |
|
try: |
|
with open(votes_file, "rb") as f: |
|
API.upload_file( |
|
path_or_fileobj=f, |
|
path_in_repo="votes_data.jsonl", |
|
repo_id=self.repo_id, |
|
repo_type="dataset", |
|
commit_message="Updating votes_data.jsonl with new votes", |
|
) |
|
logger.info("Votes uploaded to votes repository") |
|
self.votes_to_upload.clear() |
|
except Exception as e: |
|
logger.error(f"Failed to upload votes to repository: {e}") |
|
|
|
def run_scheduler(vote_manager): |
|
while True: |
|
schedule.run_pending() |
|
time.sleep(1) |
|
|