import gradio as gr import gem_metrics from datasets import load_dataset import random random.seed(42) raw_datasets = load_dataset("eli5") validation_len = len(raw_datasets["validation_eli5"]) random_insts = [random.randint(0, validation_len) for _ in range(10)] questions = [] q2ref = {} for inst in random_insts: question = raw_datasets["validation_eli5"][inst]["title"] ref = raw_datasets["validation_eli5"][inst]["answers"]["text"] questions.append(question) q2ref[question] = ref def calc_rouge_score(og_question, pred): if pred is None: return {} preds = gem_metrics.texts.Predictions([pred]) gold = q2ref[og_question] refs = gem_metrics.texts.References([gold]) result = gem_metrics.compute(preds, refs, metrics_list=['bleu', 'rouge']) return result iface = gr.Interface( fn=calc_rouge_score, inputs=[gr.Textbox(placeholder=questions[0]), "text"], outputs=["text"], examples=[[item] for item in questions]) iface.launch()