Spaces:
Runtime error
Runtime error
| from langchain_docling import DoclingLoader | |
| from langchain_docling.loader import ExportType | |
| # Import required classes for building a custom converter | |
| from docling.document_converter import DocumentConverter, PdfFormatOption, InputFormat | |
| from docling.datamodel.pipeline_options import PdfPipelineOptions | |
| from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend | |
| import spaces | |
| def convert_to_markdown(file_objs, url, do_ocr, do_table_structure): | |
| file_path = file_objs if file_objs is not None else url | |
| pipeline_options = PdfPipelineOptions() | |
| pipeline_options.do_ocr = do_ocr | |
| pipeline_options.do_table_structure = do_table_structure | |
| pdf_format_options = PdfFormatOption( | |
| pipeline_options=pipeline_options, | |
| backend=PyPdfiumDocumentBackend, | |
| ) | |
| doc_converter = DocumentConverter( | |
| allowed_formats=[InputFormat.PDF], | |
| format_options={ | |
| InputFormat.PDF: pdf_format_options | |
| } | |
| ) | |
| # Pass the custom converter to the DoclingLoader. | |
| loader = DoclingLoader( | |
| file_path=file_path, | |
| export_type=ExportType.MARKDOWN, | |
| converter=doc_converter | |
| ) | |
| docs = loader.load() | |
| return docs[0].page_content | |