zeroshotGPU / tests /test_docling_parser.py
Arjunvir Singh
Initial commit: zeroshotGPU MVP with full eval surface
db06ffa
import unittest
from zsgdp.parsers.docling_parser import _export_markdown, normalize_docling_markdown
from zsgdp.schema import DocumentProfile, PageProfile
class FakeDoclingDocument:
def export_to_markdown(self):
return "# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 |"
class DoclingParserTests(unittest.TestCase):
def test_export_markdown_uses_docling_method(self):
self.assertEqual(_export_markdown(FakeDoclingDocument()), "# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 |")
def test_normalize_docling_markdown_emits_schema(self):
profile = DocumentProfile(
doc_id="d1",
source_path="sample.pdf",
file_type="pdf",
page_count=1,
extension=".pdf",
pages=[PageProfile(page_num=1, digital_text_chars=20)],
)
candidate = normalize_docling_markdown(
markdown="# Report\n\n| A | B |\n| --- | --- |\n| 1 | 2 |",
profile=profile,
source_path="sample.pdf",
)
self.assertEqual(candidate.parser_name, "docling")
self.assertEqual(len(candidate.elements), 2)
self.assertEqual(len(candidate.tables), 1)
self.assertEqual(candidate.pages[0]["source_parser"], "docling")
if __name__ == "__main__":
unittest.main()