zeroshotGPU / tests /test_parser_metrics.py
Arjunvir Singh
Initial commit: zeroshotGPU MVP with full eval surface
db06ffa
import unittest
from zsgdp.schema import DocumentProfile, Element, PageProfile, ParseCandidate, TableObject
from zsgdp.verify.parser_metrics import candidate_metrics, failure_metrics
class ParserMetricsTests(unittest.TestCase):
def test_candidate_metrics_reports_coverage_and_valid_tables(self):
profile = DocumentProfile(
doc_id="d1",
source_path="sample.md",
file_type="markdown",
page_count=1,
extension=".md",
pages=[PageProfile(page_num=1, digital_text_chars=11)],
)
candidate = ParseCandidate(
parser_name="test",
doc_id="d1",
source_path="sample.md",
file_type="markdown",
pages=[{"page_num": 1}],
elements=[
Element("e1", "d1", 1, "paragraph", text="hello world", bbox=(0, 0, 10, 10)),
],
tables=[
TableObject(
table_id="t1",
page_nums=[1],
markdown="| A | B |\n| --- | --- |\n| 1 | 2 |",
)
],
confidence=0.9,
)
metrics = candidate_metrics(candidate, profile, elapsed_seconds=0.25)
self.assertEqual(metrics["parser"], "test")
self.assertEqual(metrics["text_coverage_ratio"], 1.0)
self.assertEqual(metrics["valid_table_ratio"], 1.0)
self.assertTrue(metrics["has_bboxes"])
def test_failure_metrics_records_error(self):
profile = DocumentProfile("d1", "sample.pdf", "pdf", 1, ".pdf")
metrics = failure_metrics("docling", profile, "boom", elapsed_seconds=1.5)
self.assertTrue(metrics["failed"])
self.assertEqual(metrics["error"], "boom")
self.assertEqual(metrics["elapsed_seconds"], 1.5)
if __name__ == "__main__":
unittest.main()