import json import gradio as gr import os import random # load data folder_path="./data/fid-light-xl/" tasks = [ "fever", "hotpotqa", "nq", "trivia_qa", "structured_zeroshot", "trex", ] labels = [ "FEVER", "HotpotQA", "NQ", "TriviaQA", "zsRE", "T-REx", ] modes = ["Perfect (Both text & provenance are correct)", "Double Failure (Both text & provenance are wrong)", "Correct Text, Wrong Provenance (R-Precision < 1)", "Wrong Text, Correct Provenance (R-Precision == 1)"] modes_map = {modes[0]:"perfect", modes[1]:"double_failure", modes[2]:"wrong_passage", modes[3]:"wrong_text"} data = {} total_num_per_task = {} for task in tasks: data[task] = {} total_num_per_task[task] = 0 for mode in modes: data[task][mode] = [] with open(os.path.join(folder_path,"examples_"+task+"_"+modes_map[mode]+".json")) as f: data[task][mode] = json.load(f) total_num_per_task[task] += len(data[task][mode]) def render_examples(selected_mode): all_rendered=[] for i,task in enumerate(tasks): examples = random.sample(data[task][selected_mode], 10) rendered_examples = "## Statistics\nNumber of examples in this category: **"+str(len(data[task][selected_mode]))+"** ("+ \ str(round(len(data[task][selected_mode])/total_num_per_task[task]*100,2)) +"%)\n\n----\n" for example in examples: def render_prov(provenance): t="" for i, prov in enumerate(provenance): t+=f"**{i+1})** {prov['text']}\n\n" if len(provenance) == 0: t+="<< No provenance returned >>\n\n" return t rendered_examples += "#### Query \n"+example["query"]+ "\n"+ \ "#### Target Text"+ "\n" + "\n\n".join(example["target_text"])+ "\n" if "target_provenance" in example: rendered_examples+="#### Target Provenance"+ "\n"+ render_prov(example["target_provenance"])+ "\n" rendered_examples += "#### Output Text"+ "\n"+ str(example["output_text"])+ "\n"+ \ "#### Output Provenance"+ "\n"+ render_prov(example["output_provenance"])+ "\n"+ \ "\n----\n" all_rendered.append(rendered_examples) return all_rendered with gr.Blocks() as interface: gr.Markdown( "# FiD-Light Output Explorer \n"+ "This is a random data output explorer for the retrieval augmented generation model FiD-Light on six KILT tasks (showing static dev set results).\n\n"+ "*FiD-Light: Efficient and Effective Retrieval-Augmented Text Generation \nSebastian Hofstätter, Jiecao Chen, Karthik Raman, Hamed Zamani* \n[https://arxiv.org/abs/2209.14290](https://arxiv.org/abs/2209.14290) \n\n" + "*Every time you click on a result split we load up a new set of 10 random examples from this split for all the tasks.*") #with gr.Accordion("Open for More!"): # gr.Markdown("Look at me...") #with gr.Accordion("Open for More!"): # gr.Markdown("Look at me...") selected = gr.Radio(modes, value=modes[0], label="Result Split",interactive=True) text_fields = [] init_data = render_examples(selected.value) for i,task in enumerate(tasks): with gr.Tab(labels[i]): text_fields.append(gr.Markdown(init_data[i])) selected.change(fn=render_examples, inputs=selected, outputs=text_fields) interface.launch()