import gradio as gr from docling.document_converter import DocumentConverter import spaces @spaces.GPU def convert_document(file, output_format): # Load document and convert it using Docling converter = DocumentConverter() result = converter.convert(file.name) # Check available attributes in DoclingDocument available_attributes = dir(result.document) # Choose the output format if output_format == "Markdown": converted_text = result.document.export_to_markdown() elif output_format == "JSON": converted_text = result.document.export_to_json() else: converted_text = "Unsupported format" # Placeholder metadata extraction based on available attributes metadata = { "Available Attributes": available_attributes } return converted_text, metadata # Define the Gradio interface using the new component syntax with gr.Blocks() as app: gr.Markdown("# Document Converter with Docling") gr.Markdown("Upload a document, choose the output format, and get the converted text with metadata.") file_input = gr.File(label="Upload Document") format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format") output_text = gr.Textbox(label="Converted Document") output_metadata = gr.JSON(label="Metadata") # Define the process button and bind it to the function convert_button = gr.Button("Convert") convert_button.click( convert_document, inputs=[file_input, format_input], outputs=[output_text, output_metadata] ) app.launch(debug=True)