Spaces:

mulsi
/

explore-label-concepts

Running

App Files Files Community

new-concepts

by Xmaster6y - opened Apr 20

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+40

-59

Files changed (6) hide show

README.md +1 -1
src/constants.py +2 -16
src/global_variables.py +4 -5
src/label_interface.py +16 -18
src/sample_interface.py +16 -18
src/vote_interface.py +1 -1

README.md CHANGED Viewed

@@ -11,4 +11,4 @@ license: mit
 hf_oauth: true
 ---
- Check out the configuration reference at: https://huggingface.co/docs/hub/spaces-config-reference.

 hf_oauth: true
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

src/constants.py CHANGED Viewed

@@ -5,31 +5,17 @@ import os
 import pathlib
-DATASET_NAME = "mulsi/fruit-vegetable-concepts"
 CONCEPTS = [
-    # Environment
-    "stem",
-    "leaf",
-    "tail",
-    "seed",
-    "pulp",
-    "soil",
-    "tree",
     # Shapes
-    "ovaloid",
     "sphere",
-    "cylinder",
     "cube",
     # Colors
-    "black",
-    "purple",
     "red",
-    "blue",
     "green",
-    "brown",
     "orange",
     "yellow",
-    "white",
 ]
 ASSETS_FOLDER = pathlib.Path(__file__).parent / "assets"

 import pathlib
+DATASET_NAME = "Xmaster6y/fruit-vegetable-concepts"
 CONCEPTS = [
     # Shapes
     "sphere",
     "cube",
+    "cylinder",
     # Colors
     "red",
     "green",
     "orange",
     "yellow",
 ]
 ASSETS_FOLDER = pathlib.Path(__file__).parent / "assets"

src/global_variables.py CHANGED Viewed

@@ -26,7 +26,7 @@ def setup():
         repo_type="dataset",
     )
     all_metadata = {}
-    for split in ["train", "test"]:
         all_metadata[split] = []
         with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
             for row in reader:
@@ -78,8 +78,7 @@ def update_votes(
         all_votes[s_id] = {}
     all_votes[s_id][username] = {c: c in voted_concepts for c in CONCEPTS}
     new_concepts = compute_concepts(all_votes[s_id])
-    for concept, concept_value in new_concepts.items():
-        all_metadata[current_split][idx][concept] = concept_value
 def compute_concepts(votes):
     vote_sum = {c: 0 for c in CONCEPTS}
@@ -117,13 +116,13 @@ def save_current_work(
             json.dump(new_votes[key], f)
     all_votes = new_votes
     new_metadata = {}
-    for split in ["train", "test"]:
         new_metadata[split] = []
         with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
             for row in reader:
                 s_id = row["id"]
                 if s_id in all_votes:
-                    row.update(compute_concepts(all_votes[s_id]))
                 new_metadata[split].append(row)
         with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer:
             writer.write_all(new_metadata[split])

         repo_type="dataset",
     )
     all_metadata = {}
+    for split in ["train", "validation", "test"]:
         all_metadata[split] = []
         with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
             for row in reader:
         all_votes[s_id] = {}
     all_votes[s_id][username] = {c: c in voted_concepts for c in CONCEPTS}
     new_concepts = compute_concepts(all_votes[s_id])
+    all_metadata[current_split][idx]["concepts"] = new_concepts
 def compute_concepts(votes):
     vote_sum = {c: 0 for c in CONCEPTS}
             json.dump(new_votes[key], f)
     all_votes = new_votes
     new_metadata = {}
+    for split in ["train", "validation", "test"]:
         new_metadata[split] = []
         with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl") as reader:
             for row in reader:
                 s_id = row["id"]
                 if s_id in all_votes:
+                    row["concepts"] = compute_concepts(all_votes[s_id])
                 new_metadata[split].append(row)
         with jsonlines.open(f"{ASSETS_FOLDER}/{DATASET_NAME}/data/{split}/metadata.jsonl", mode='w') as writer:
             writer.write_all(new_metadata[split])

src/label_interface.py CHANGED Viewed

@@ -12,7 +12,7 @@ from src.constants import CONCEPTS, ASSETS_FOLDER, DATASET_NAME
 def filter_sample(sample, concepts, username, sample_type):
-    has_concepts = all([sample[c] for c in concepts])
     if not has_concepts:
         return False
     if "votes" in sample and username in sample["votes"]:
@@ -53,14 +53,14 @@ def get_next_image(
         except KeyError:
             voted_concepts = []
             unseen_concepts = []
-        tie_concepts = [c for c in CONCEPTS if sample[c] is None]
         return (
             image_path,
             voted_concepts,
             f"{split}:{sample_idx}",
             sample["class"],
-            {c: sample[c] for c in CONCEPTS},
             unseen_concepts,
             tie_concepts,
             filtered_indices,
@@ -117,7 +117,7 @@ with gr.Blocks() as interface:
                 with gr.Row():
                     split = gr.Radio(
                         label="Split",
-                        choices=["train", "test"],
                         value="train",
                     )
                     sample_type = gr.Radio(
@@ -130,20 +130,6 @@ with gr.Blocks() as interface:
                     multiselect=True,
                     choices=CONCEPTS,
                 )
-            with gr.Row():
-                next_button = gr.Button(
-                    value="Next",
-                )
-                gr.LoginButton()
-                submit_button = gr.Button(
-                    value="Local Submit",
-                )
-            with gr.Row():
-                save_button = gr.Button(
-                    value="Save",
-                )
             with gr.Group():
                 voted_concepts = gr.CheckboxGroup(
                     label="Voted Concepts",
@@ -158,6 +144,18 @@ with gr.Blocks() as interface:
                     choices=CONCEPTS,
                 )
             with gr.Group():
                 gr.Markdown(
                     "##  # Image Info",

 def filter_sample(sample, concepts, username, sample_type):
+    has_concepts = all([sample["concepts"].get(c, False) for c in concepts])
     if not has_concepts:
         return False
     if "votes" in sample and username in sample["votes"]:
         except KeyError:
             voted_concepts = []
             unseen_concepts = []
+        tie_concepts = [c for c in sample["concepts"] if sample["concepts"][c] is None]
         return (
             image_path,
             voted_concepts,
             f"{split}:{sample_idx}",
             sample["class"],
+            sample["concepts"],
             unseen_concepts,
             tie_concepts,
             filtered_indices,
                 with gr.Row():
                     split = gr.Radio(
                         label="Split",
+                        choices=["train", "validation", "test"],
                         value="train",
                     )
                     sample_type = gr.Radio(
                     multiselect=True,
                     choices=CONCEPTS,
                 )
             with gr.Group():
                 voted_concepts = gr.CheckboxGroup(
                     label="Voted Concepts",
                     choices=CONCEPTS,
                 )
+            with gr.Row():
+                next_button = gr.Button(
+                    value="Next",
+                )
+                gr.LoginButton()
+                submit_button = gr.Button(
+                    value="Local Submit",
+                )
+            with gr.Row():
+                save_button = gr.Button(
+                    value="Save",
+                )
             with gr.Group():
                 gr.Markdown(
                     "##  # Image Info",

src/sample_interface.py CHANGED Viewed

@@ -37,14 +37,14 @@ def get_image(
     except KeyError:
         voted_concepts = []
         unseen_concepts = []
-    tie_concepts = [c for c in CONCEPTS if sample[c] is None]
     return (
         image_path,
         voted_concepts,
         f"{split}:{sample_idx}",
         sample["class"],
-        {c: sample[c] for c in CONCEPTS},
         str(sample_idx),
         unseen_concepts,
         tie_concepts,
@@ -104,7 +104,7 @@ with gr.Blocks() as interface:
                 )
                 split = gr.Radio(
                     label="Split",
-                    choices=["train", "test"],
                     value="train",
                 )
                 index = gr.Textbox(
@@ -112,6 +112,19 @@ with gr.Blocks() as interface:
                     label="Index",
                     max_lines=1,
                 )
             with gr.Row():
                 prev_button = gr.Button(
@@ -128,21 +141,6 @@ with gr.Blocks() as interface:
                 save_button = gr.Button(
                     value="Save",
                 )
-            with gr.Group():
-                voted_concepts = gr.CheckboxGroup(
-                    label="Voted Concepts",
-                    choices=CONCEPTS,
-                )
-                unseen_concepts = gr.CheckboxGroup(
-                    label="Previously Unseen Concepts",
-                    choices=CONCEPTS,
-                )
-                tie_concepts = gr.CheckboxGroup(
-                    label="Tie Concepts",
-                    choices=CONCEPTS,
-                )
             with gr.Group():
                 gr.Markdown(
                     "##  # Image Info",

     except KeyError:
         voted_concepts = []
         unseen_concepts = []
+    tie_concepts = [c for c in sample["concepts"] if sample["concepts"][c] is None]
     return (
         image_path,
         voted_concepts,
         f"{split}:{sample_idx}",
         sample["class"],
+        sample["concepts"],
         str(sample_idx),
         unseen_concepts,
         tie_concepts,
                 )
                 split = gr.Radio(
                     label="Split",
+                    choices=["train", "validation", "test"],
                     value="train",
                 )
                 index = gr.Textbox(
                     label="Index",
                     max_lines=1,
                 )
+            with gr.Group():
+                voted_concepts = gr.CheckboxGroup(
+                    label="Voted Concepts",
+                    choices=CONCEPTS,
+                )
+                unseen_concepts = gr.CheckboxGroup(
+                    label="Previously Unseen Concepts",
+                    choices=CONCEPTS,
+                )
+                tie_concepts = gr.CheckboxGroup(
+                    label="Tie Concepts",
+                    choices=CONCEPTS,
+                )
             with gr.Row():
                 prev_button = gr.Button(
                 save_button = gr.Button(
                     value="Save",
                 )
             with gr.Group():
                 gr.Markdown(
                     "##  # Image Info",

src/vote_interface.py CHANGED Viewed

@@ -28,7 +28,7 @@ with gr.Blocks() as interface:
                 )
                 split = gr.Radio(
                     label="Split",
-                    choices=["train", "test"],
                     value="train",
                 )

                 )
                 split = gr.Radio(
                     label="Split",
+                    choices=["train", "validation", "test"],
                     value="train",
                 )