"""Global variables used in the space. """ import os import json from huggingface_hub import HfApi import jsonlines import gradio as gr from src.constants import DATASET_NAME, HF_TOKEN, ASSETS_FOLDER, CONCEPTS hf_api: HfApi all_metadata: dict all_votes: dict def setup(): global hf_api global all_metadata global all_votes hf_api = HfApi(token=HF_TOKEN) hf_api.snapshot_download( local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", repo_id=DATASET_NAME, repo_type="dataset", ) all_metadata = {} for split in ["train", "validation", "test"]: all_metadata[split] = [] with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader: for row in reader: all_metadata[split].append(row) all_votes = {} for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"): with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f: key = file.split(".")[0] all_votes[key] = json.load(f) def get_metadata(split): global all_metadata global hf_api hf_api.hf_hub_download( repo_id=DATASET_NAME, filename="metadata.jsonl", subfolder=f"data/{split}", repo_type="dataset", local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", ) all_metadata[split] = [] with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader: for row in reader: all_metadata[split].append(row) def save_metadata(split): global all_metadata values = all_metadata[split] with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer: writer.write_all(values) hf_api.upload_file( path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", path_in_repo=f"data/{split}/metadata.jsonl", repo_id=DATASET_NAME, repo_type="dataset", ) def update_votes( username: str, current_image: str, voted_concepts: list, ): global all_metadata global all_votes current_split, idx = current_image.split(":") idx = int(idx) s_id = all_metadata[current_split][idx]["id"] if s_id not in all_votes: all_votes[s_id] = {} all_votes[s_id][username] = {c: c in voted_concepts for c in CONCEPTS} new_concepts = compute_concepts(all_votes[s_id]) for concept, concept_value in new_concepts.items(): all_metadata[current_split][idx][concept] = concept_value def compute_concepts(votes): vote_sum = {c: 0 for c in CONCEPTS} for vote in votes.values(): for c in CONCEPTS: if c not in vote: continue vote_sum[c] += 2 * vote[c] - 1 return {c: vote_sum[c] > 0 if vote_sum[c] != 0 else None for c in CONCEPTS} def save_current_work( username: str, ): global all_metadata global all_votes global hf_api hf_api.snapshot_download( local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", repo_id=DATASET_NAME, repo_type="dataset", allow_patterns=["*/metadata.jsonl", "votes/*"], ) new_votes = {} for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"): with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f: key = file.split(".")[0] new_votes[key] = json.load(f) for key in all_votes: if username in all_votes[key]: if key not in new_votes: new_votes[key] = {} new_votes[key][username] = all_votes[key][username] for key in new_votes: with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f: json.dump(new_votes[key], f) all_votes = new_votes new_metadata = {} for split in ["train", "validation", "test"]: new_metadata[split] = [] with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader: for row in reader: s_id = row["id"] if s_id in all_votes: row.update(compute_concepts(all_votes[s_id])) new_metadata[split].append(row) with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer: writer.write_all(new_metadata[split]) all_metadata = new_metadata hf_api.upload_folder( folder_path=f"{ASSETS_FOLDER}/{DATASET_NAME}", repo_id=DATASET_NAME, repo_type="dataset", allow_patterns=["*/metadata.jsonl", "votes/*"], ) def get_votes(key): global all_votes global hf_api try: hf_api.hf_hub_download( repo_id=DATASET_NAME, filename=f"votes/{key}.json", repo_type="dataset", local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", ) with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json") as f: all_votes[key] = json.load(f) except: pass def save_votes(key): global all_votes global hf_api with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f: json.dump(all_votes[key], f) hf_api.upload_file( path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", path_in_repo=f"votes/{key}.json", repo_id=DATASET_NAME, repo_type="dataset", ) if gr.NO_RELOAD: setup()