Spaces:
Runtime error
Runtime error
"""Global variables used in the space. | |
""" | |
import os | |
import json | |
from huggingface_hub import HfApi | |
import jsonlines | |
import gradio as gr | |
from src.constants import DATASET_NAME, HF_TOKEN, ASSETS_FOLDER, CONCEPTS | |
hf_api: HfApi | |
all_metadata: dict | |
all_votes: dict | |
def setup(): | |
global hf_api | |
global all_metadata | |
global all_votes | |
hf_api = HfApi(token=HF_TOKEN) | |
hf_api.snapshot_download( | |
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
repo_id=DATASET_NAME, | |
repo_type="dataset", | |
) | |
all_metadata = {} | |
for split in ["train", "validation", "test"]: | |
all_metadata[split] = [] | |
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader: | |
for row in reader: | |
all_metadata[split].append(row) | |
all_votes = {} | |
for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"): | |
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f: | |
key = file.split(".")[0] | |
all_votes[key] = json.load(f) | |
def get_metadata(split): | |
global all_metadata | |
global hf_api | |
hf_api.hf_hub_download( | |
repo_id=DATASET_NAME, | |
filename="metadata.jsonl", | |
subfolder=f"data/{split}", | |
repo_type="dataset", | |
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
) | |
all_metadata[split] = [] | |
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader: | |
for row in reader: | |
all_metadata[split].append(row) | |
def save_metadata(split): | |
global all_metadata | |
values = all_metadata[split] | |
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer: | |
writer.write_all(values) | |
hf_api.upload_file( | |
path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", | |
path_in_repo=f"data/{split}/metadata.jsonl", | |
repo_id=DATASET_NAME, | |
repo_type="dataset", | |
) | |
def update_votes( | |
username: str, | |
current_image: str, | |
voted_concepts: list, | |
): | |
global all_metadata | |
global all_votes | |
current_split, idx = current_image.split(":") | |
idx = int(idx) | |
s_id = all_metadata[current_split][idx]["id"] | |
if s_id not in all_votes: | |
all_votes[s_id] = {} | |
all_votes[s_id][username] = {c: c in voted_concepts for c in CONCEPTS} | |
new_concepts = compute_concepts(all_votes[s_id]) | |
for concept, concept_value in new_concepts.items(): | |
all_metadata[current_split][idx][concept] = concept_value | |
def compute_concepts(votes): | |
vote_sum = {c: 0 for c in CONCEPTS} | |
for vote in votes.values(): | |
for c in CONCEPTS: | |
if c not in vote: | |
continue | |
vote_sum[c] += 2 * vote[c] - 1 | |
return {c: vote_sum[c] > 0 if vote_sum[c] != 0 else None for c in CONCEPTS} | |
def save_current_work( | |
username: str, | |
): | |
global all_metadata | |
global all_votes | |
global hf_api | |
hf_api.snapshot_download( | |
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
repo_id=DATASET_NAME, | |
repo_type="dataset", | |
allow_patterns=["*/metadata.jsonl", "votes/*"], | |
) | |
new_votes = {} | |
for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"): | |
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f: | |
key = file.split(".")[0] | |
new_votes[key] = json.load(f) | |
for key in all_votes: | |
if username in all_votes[key]: | |
if key not in new_votes: | |
new_votes[key] = {} | |
new_votes[key][username] = all_votes[key][username] | |
for key in new_votes: | |
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f: | |
json.dump(new_votes[key], f) | |
all_votes = new_votes | |
new_metadata = {} | |
for split in ["train", "validation", "test"]: | |
new_metadata[split] = [] | |
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader: | |
for row in reader: | |
s_id = row["id"] | |
if s_id in all_votes: | |
row.update(compute_concepts(all_votes[s_id])) | |
new_metadata[split].append(row) | |
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer: | |
writer.write_all(new_metadata[split]) | |
all_metadata = new_metadata | |
hf_api.upload_folder( | |
folder_path=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
repo_id=DATASET_NAME, | |
repo_type="dataset", | |
allow_patterns=["*/metadata.jsonl", "votes/*"], | |
) | |
def get_votes(key): | |
global all_votes | |
global hf_api | |
try: | |
hf_api.hf_hub_download( | |
repo_id=DATASET_NAME, | |
filename=f"votes/{key}.json", | |
repo_type="dataset", | |
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}", | |
) | |
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json") as f: | |
all_votes[key] = json.load(f) | |
except: | |
pass | |
def save_votes(key): | |
global all_votes | |
global hf_api | |
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f: | |
json.dump(all_votes[key], f) | |
hf_api.upload_file( | |
path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", | |
path_in_repo=f"votes/{key}.json", | |
repo_id=DATASET_NAME, | |
repo_type="dataset", | |
) | |
if gr.NO_RELOAD: | |
setup() | |