Spaces:
Runtime error
Runtime error
File size: 5,439 Bytes
1f64946 de554eb 1f64946 9b44168 1f64946 de554eb 1f64946 de554eb 1f64946 28030d1 1f64946 de554eb 1f64946 ed12cc6 1f64946 ed12cc6 1f64946 9b44168 36c11e4 9b44168 36c11e4 9b44168 de554eb 1f64946 de554eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
"""Global variables used in the space.
"""
import os
import json
from huggingface_hub import HfApi
import jsonlines
import gradio as gr
from src.constants import DATASET_NAME, HF_TOKEN, ASSETS_FOLDER, CONCEPTS
hf_api: HfApi
all_metadata: dict
all_votes: dict
def setup():
global hf_api
global all_metadata
global all_votes
hf_api = HfApi(token=HF_TOKEN)
hf_api.snapshot_download(
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}",
repo_id=DATASET_NAME,
repo_type="dataset",
)
all_metadata = {}
for split in ["train", "validation", "test"]:
all_metadata[split] = []
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
for row in reader:
all_metadata[split].append(row)
all_votes = {}
for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"):
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f:
key = file.split(".")[0]
all_votes[key] = json.load(f)
def get_metadata(split):
global all_metadata
global hf_api
hf_api.hf_hub_download(
repo_id=DATASET_NAME,
filename="metadata.jsonl",
subfolder=f"data/{split}",
repo_type="dataset",
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}",
)
all_metadata[split] = []
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
for row in reader:
all_metadata[split].append(row)
def save_metadata(split):
global all_metadata
values = all_metadata[split]
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer:
writer.write_all(values)
hf_api.upload_file(
path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl",
path_in_repo=f"data/{split}/metadata.jsonl",
repo_id=DATASET_NAME,
repo_type="dataset",
)
def update_votes(
username: str,
current_image: str,
voted_concepts: list,
):
global all_metadata
global all_votes
current_split, idx = current_image.split(":")
idx = int(idx)
s_id = all_metadata[current_split][idx]["id"]
if s_id not in all_votes:
all_votes[s_id] = {}
all_votes[s_id][username] = {c: c in voted_concepts for c in CONCEPTS}
new_concepts = compute_concepts(all_votes[s_id])
for concept, concept_value in new_concepts.items():
all_metadata[current_split][idx][concept] = concept_value
def compute_concepts(votes):
vote_sum = {c: 0 for c in CONCEPTS}
for vote in votes.values():
for c in CONCEPTS:
if c not in vote:
continue
vote_sum[c] += 2 * vote[c] - 1
return {c: vote_sum[c] > 0 if vote_sum[c] != 0 else None for c in CONCEPTS}
def save_current_work(
username: str,
):
global all_metadata
global all_votes
global hf_api
hf_api.snapshot_download(
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}",
repo_id=DATASET_NAME,
repo_type="dataset",
allow_patterns=["*/metadata.jsonl", "votes/*"],
)
new_votes = {}
for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"):
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f:
key = file.split(".")[0]
new_votes[key] = json.load(f)
for key in all_votes:
if username in all_votes[key]:
if key not in new_votes:
new_votes[key] = {}
new_votes[key][username] = all_votes[key][username]
for key in new_votes:
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f:
json.dump(new_votes[key], f)
all_votes = new_votes
new_metadata = {}
for split in ["train", "validation", "test"]:
new_metadata[split] = []
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
for row in reader:
s_id = row["id"]
if s_id in all_votes:
row.update(compute_concepts(all_votes[s_id]))
new_metadata[split].append(row)
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer:
writer.write_all(new_metadata[split])
all_metadata = new_metadata
hf_api.upload_folder(
folder_path=f"{ASSETS_FOLDER}/{DATASET_NAME}",
repo_id=DATASET_NAME,
repo_type="dataset",
allow_patterns=["*/metadata.jsonl", "votes/*"],
)
def get_votes(key):
global all_votes
global hf_api
try:
hf_api.hf_hub_download(
repo_id=DATASET_NAME,
filename=f"votes/{key}.json",
repo_type="dataset",
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}",
)
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json") as f:
all_votes[key] = json.load(f)
except:
pass
def save_votes(key):
global all_votes
global hf_api
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f:
json.dump(all_votes[key], f)
hf_api.upload_file(
path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json",
path_in_repo=f"votes/{key}.json",
repo_id=DATASET_NAME,
repo_type="dataset",
)
if gr.NO_RELOAD:
setup()
|