Spaces:
Running on Zero
Running on Zero
| import unittest | |
| from zsgdp.schema import DocumentProfile, Element, PageProfile, ParseCandidate, TableObject | |
| from zsgdp.verify.parser_metrics import candidate_metrics, failure_metrics | |
| class ParserMetricsTests(unittest.TestCase): | |
| def test_candidate_metrics_reports_coverage_and_valid_tables(self): | |
| profile = DocumentProfile( | |
| doc_id="d1", | |
| source_path="sample.md", | |
| file_type="markdown", | |
| page_count=1, | |
| extension=".md", | |
| pages=[PageProfile(page_num=1, digital_text_chars=11)], | |
| ) | |
| candidate = ParseCandidate( | |
| parser_name="test", | |
| doc_id="d1", | |
| source_path="sample.md", | |
| file_type="markdown", | |
| pages=[{"page_num": 1}], | |
| elements=[ | |
| Element("e1", "d1", 1, "paragraph", text="hello world", bbox=(0, 0, 10, 10)), | |
| ], | |
| tables=[ | |
| TableObject( | |
| table_id="t1", | |
| page_nums=[1], | |
| markdown="| A | B |\n| --- | --- |\n| 1 | 2 |", | |
| ) | |
| ], | |
| confidence=0.9, | |
| ) | |
| metrics = candidate_metrics(candidate, profile, elapsed_seconds=0.25) | |
| self.assertEqual(metrics["parser"], "test") | |
| self.assertEqual(metrics["text_coverage_ratio"], 1.0) | |
| self.assertEqual(metrics["valid_table_ratio"], 1.0) | |
| self.assertTrue(metrics["has_bboxes"]) | |
| def test_failure_metrics_records_error(self): | |
| profile = DocumentProfile("d1", "sample.pdf", "pdf", 1, ".pdf") | |
| metrics = failure_metrics("docling", profile, "boom", elapsed_seconds=1.5) | |
| self.assertTrue(metrics["failed"]) | |
| self.assertEqual(metrics["error"], "boom") | |
| self.assertEqual(metrics["elapsed_seconds"], 1.5) | |
| if __name__ == "__main__": | |
| unittest.main() | |