import random import gradio as gr from datasets import load_dataset whoops = load_dataset("nlphuji/whoops")['test'] print(f"Loaded WMTIS, first example:") print(whoops[0]) dataset_size = len(whoops) print(f"all dataset size: {dataset_size}") IMAGE = 'image' IMAGE_DESIGNER = 'image_designer' DESIGNER_EXPLANATION = 'designer_explanation' CROWD_CAPTIONS = 'crowd_captions' CROWD_EXPLANATIONS = 'crowd_explanations' CROWD_UNDERSPECIFIED_CAPTIONS = 'crowd_underspecified_captions' SELECTED_CAPTION = 'selected_caption' COMMONSENSE_CATEGORY = 'commonsense_category' QA = 'question_answering_pairs' IMAGE_ID = 'image_id' left_side_columns = [IMAGE] # right_side_columns = [x for x in whoops.features.keys() if x not in left_side_columns and x not in [QA]] right_side_columns = [x for x in whoops.features.keys() if x not in left_side_columns] enumerate_cols = [CROWD_CAPTIONS, CROWD_EXPLANATIONS, CROWD_UNDERSPECIFIED_CAPTIONS] emoji_to_label = {IMAGE_DESIGNER: '🎨, 🧑‍🎨, 💻', DESIGNER_EXPLANATION: '💡, 🤔, 🧑‍🎨', CROWD_CAPTIONS: '👥, 💬, 📝', CROWD_EXPLANATIONS: '👥, 💡, 🤔', CROWD_UNDERSPECIFIED_CAPTIONS: '👥, 💬, 👎', QA: '❓, 🤔, 💡', IMAGE_ID: '🔍, 📄, 💾', COMMONSENSE_CATEGORY: '🤔, 📚, 💡', SELECTED_CAPTION: '📝, 👌, 💬'} target_size = (1024, 1024) MAX_LINES = 30 def get_instance_values(example): values = [] for k in left_side_columns + right_side_columns: if k in enumerate_cols: value = list_to_string(example[k]) elif k == QA: qa_list = [f"Q: {x[0]} A: {x[1]}" for x in example[k]] value = list_to_string(qa_list) else: value = example[k] values.append(value) return values def list_to_string(lst): return '\n'.join(['{}. {}'.format(i + 1, item) for i, item in enumerate(lst)]) def plot_image(index): example = whoops_sample[index] instance_values = get_instance_values(example) assert len(left_side_columns) == len( instance_values[:len(left_side_columns)]) # excluding the image & designer for key, value in zip(left_side_columns, instance_values[:len(left_side_columns)]): if key == IMAGE: img = whoops_sample[index]["image"] img_resized = img.resize(target_size) gr.Image(value=img_resized, label=whoops_sample[index]['commonsense_category']) else: label = key.capitalize().replace("_", " ") gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}") with gr.Accordion("Click for details", open=False): assert len(right_side_columns) == len( instance_values[len(left_side_columns):]) # excluding the image & designer for key, value in zip(right_side_columns, instance_values[len(left_side_columns):]): label = key.capitalize().replace("_", " ") gr.Textbox(value=value, label=f"{label} {emoji_to_label[key]}", max_lines=MAX_LINES) columns_number = 4 # rows_number = int(dataset_size / columns_number) # rows_number = 25 rows_number = 20 whoops_sample = whoops.shuffle().select(range(0, columns_number * rows_number)) index = 0 with gr.Blocks() as demo: gr.Markdown(f"# WHOOPS! Dataset Explorer") for row_num in range(0, rows_number): with gr.Row(): for col_num in range(0, columns_number): with gr.Column(): plot_image(index) index += 1 demo.launch()