import gradio as gr from gradio_client import Client import os import json import datasets def save_to_dataset(image_path, question, answer_p2s_base, answer_p2s_large, answer_layoutlm, answer_donut): # Create a dataset dictionary dataset_dict = { "image": image_path, "question": question, "answer_p2s_base": answer_p2s_base, "answer_p2s_large": answer_p2s_large, "answer_layoutlm": answer_layoutlm, "answer_donut": answer_donut } # Convert the dictionary to a Dataset object dataset = datasets.Dataset.from_dict(dataset_dict) # Save the dataset to Hugging Face dataset.save_to_disk("img_question_dataset") def generate_answer(image_path, question, model_name, space_id): try: if model_name == "qtoino-pix2struct": client = Client(f"https://{model_name}.hf.space/--replicas/uax51/") else: client = Client(f"https://{model_name}.hf.space/") result = client.predict(image_path, question, api_name="/predict") if result.endswith(".json"): with open(result, "rb") as json_file: output = json.loads(json_file.read()) if model_name == "TusharGoel-LayoutLM-DocVQA": return output["label"] else: return output["answer"] else: return result except Exception: gr.Warning(f"The {model_name} Space is currently unavailable. Please try again later.") return "" def generate_answers(image_path, question): answer_p2s_base = generate_answer(image_path, question, model_name = "qtoino-pix2struct", space_id = "Pix2Struct") answer_p2s_large = generate_answer(image_path, question, model_name = "akdeniz27-pix2struct-DocVQA", space_id = "Pix2Struct Large") answer_layoutlm = generate_answer(image_path, question, model_name = "TusharGoel-LayoutLM-DocVQA", space_id = "LayoutLM DocVQA") answer_donut = generate_answer(image_path, question, model_name = "nielsr-donut-docvqa", space_id = "Donut DocVQA") # Save the data to the dataset save_to_dataset(image_path, question, answer_p2s_base, answer_p2s_large, answer_layoutlm, answer_donut) return answer_p2s_base, answer_p2s_large, answer_layoutlm, answer_donut examples = [["docvqa_example.png", "How many items are sold?"], ["document-question-answering-input.png", "What is the objective?"]] title = "# Interactive demo: comparing document question answering (VQA) models" css = """ #mkd { height: 500px; overflow: auto; border: 1px solid #ccc; } """ with gr.Blocks(css=css) as demo: gr.HTML("