explore-label-concepts / src /global_variables.py
imenelydiaker's picture
integrate-new-concepts (#3)
36c11e4 verified
raw
history blame
5.44 kB
"""Global variables used in the space.
"""
import os
import json
from huggingface_hub import HfApi
import jsonlines
import gradio as gr
from src.constants import DATASET_NAME, HF_TOKEN, ASSETS_FOLDER, CONCEPTS
hf_api: HfApi
all_metadata: dict
all_votes: dict
def setup():
global hf_api
global all_metadata
global all_votes
hf_api = HfApi(token=HF_TOKEN)
hf_api.snapshot_download(
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}",
repo_id=DATASET_NAME,
repo_type="dataset",
)
all_metadata = {}
for split in ["train", "validation", "test"]:
all_metadata[split] = []
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
for row in reader:
all_metadata[split].append(row)
all_votes = {}
for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"):
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f:
key = file.split(".")[0]
all_votes[key] = json.load(f)
def get_metadata(split):
global all_metadata
global hf_api
hf_api.hf_hub_download(
repo_id=DATASET_NAME,
filename="metadata.jsonl",
subfolder=f"data/{split}",
repo_type="dataset",
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}",
)
all_metadata[split] = []
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
for row in reader:
all_metadata[split].append(row)
def save_metadata(split):
global all_metadata
values = all_metadata[split]
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer:
writer.write_all(values)
hf_api.upload_file(
path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl",
path_in_repo=f"data/{split}/metadata.jsonl",
repo_id=DATASET_NAME,
repo_type="dataset",
)
def update_votes(
username: str,
current_image: str,
voted_concepts: list,
):
global all_metadata
global all_votes
current_split, idx = current_image.split(":")
idx = int(idx)
s_id = all_metadata[current_split][idx]["id"]
if s_id not in all_votes:
all_votes[s_id] = {}
all_votes[s_id][username] = {c: c in voted_concepts for c in CONCEPTS}
new_concepts = compute_concepts(all_votes[s_id])
for concept, concept_value in new_concepts.items():
all_metadata[current_split][idx][concept] = concept_value
def compute_concepts(votes):
vote_sum = {c: 0 for c in CONCEPTS}
for vote in votes.values():
for c in CONCEPTS:
if c not in vote:
continue
vote_sum[c] += 2 * vote[c] - 1
return {c: vote_sum[c] > 0 if vote_sum[c] != 0 else None for c in CONCEPTS}
def save_current_work(
username: str,
):
global all_metadata
global all_votes
global hf_api
hf_api.snapshot_download(
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}",
repo_id=DATASET_NAME,
repo_type="dataset",
allow_patterns=["*/metadata.jsonl", "votes/*"],
)
new_votes = {}
for file in os.listdir(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes"):
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{file}") as f:
key = file.split(".")[0]
new_votes[key] = json.load(f)
for key in all_votes:
if username in all_votes[key]:
if key not in new_votes:
new_votes[key] = {}
new_votes[key][username] = all_votes[key][username]
for key in new_votes:
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f:
json.dump(new_votes[key], f)
all_votes = new_votes
new_metadata = {}
for split in ["train", "validation", "test"]:
new_metadata[split] = []
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
for row in reader:
s_id = row["id"]
if s_id in all_votes:
row.update(compute_concepts(all_votes[s_id]))
new_metadata[split].append(row)
with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer:
writer.write_all(new_metadata[split])
all_metadata = new_metadata
hf_api.upload_folder(
folder_path=f"{ASSETS_FOLDER}/{DATASET_NAME}",
repo_id=DATASET_NAME,
repo_type="dataset",
allow_patterns=["*/metadata.jsonl", "votes/*"],
)
def get_votes(key):
global all_votes
global hf_api
try:
hf_api.hf_hub_download(
repo_id=DATASET_NAME,
filename=f"votes/{key}.json",
repo_type="dataset",
local_dir=f"{ASSETS_FOLDER}/{DATASET_NAME}",
)
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json") as f:
all_votes[key] = json.load(f)
except:
pass
def save_votes(key):
global all_votes
global hf_api
with open(f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json", "w") as f:
json.dump(all_votes[key], f)
hf_api.upload_file(
path_or_fileobj=f"{ASSETS_FOLDER}/{DATASET_NAME}/votes/{key}.json",
path_in_repo=f"votes/{key}.json",
repo_id=DATASET_NAME,
repo_type="dataset",
)
if gr.NO_RELOAD:
setup()