Spaces:
Running on Zero
Running on Zero
| import unittest | |
| from zsgdp.parsers.docling_parser import _export_markdown, normalize_docling_markdown | |
| from zsgdp.schema import DocumentProfile, PageProfile | |
| class FakeDoclingDocument: | |
| def export_to_markdown(self): | |
| return "# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 |" | |
| class DoclingParserTests(unittest.TestCase): | |
| def test_export_markdown_uses_docling_method(self): | |
| self.assertEqual(_export_markdown(FakeDoclingDocument()), "# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 |") | |
| def test_normalize_docling_markdown_emits_schema(self): | |
| profile = DocumentProfile( | |
| doc_id="d1", | |
| source_path="sample.pdf", | |
| file_type="pdf", | |
| page_count=1, | |
| extension=".pdf", | |
| pages=[PageProfile(page_num=1, digital_text_chars=20)], | |
| ) | |
| candidate = normalize_docling_markdown( | |
| markdown="# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 |", | |
| profile=profile, | |
| source_path="sample.pdf", | |
| ) | |
| self.assertEqual(candidate.parser_name, "docling") | |
| self.assertEqual(len(candidate.elements), 2) | |
| self.assertEqual(len(candidate.tables), 1) | |
| self.assertEqual(candidate.pages[0]["source_parser"], "docling") | |
| if __name__ == "__main__": | |
| unittest.main() | |