Spaces:

DigPhil
/

apparatus-ocr

Runtime error

App Files Files Community

apparatus-ocr / src /about.py

al1808th

first commit

69dc570 3 days ago

raw

history blame contribute delete

2.45 kB

	from dataclasses import dataclass
	from enum import Enum


	@dataclass
	class Task:
	benchmark: str
	metric: str
	col_name: str


	class Tasks(Enum):
	easy_levenshtein = Task("easy_levenshtein", "score", "Easy Lev. ↑")
	easy_bleu = Task("easy_bleu", "score", "Easy BLEU ↑")
	hard_levenshtein = Task("hard_levenshtein", "score", "Hard Lev. ↑")
	hard_bleu = Task("hard_bleu", "score", "Hard BLEU ↑")


	TITLE = """<h1 align="center" id="space-title">Critical Apparatus OCR Leaderboard</h1>"""

	INTRODUCTION_TEXT = """
	This benchmark measures OCR quality on a Greek critical edition page from Lloyd-Jones' Sophocles.

	Systems must emit two JSON files:
	- `text.json`: the main text lines keyed by line number
	- `apparatus.json`: the critical apparatus keyed by line number or range

	There are two task variants:
	- `Easy`: input is already split into `text` and `apparatus` crops
	- `Hard`: input is the full page image and the system must separate the two outputs itself

	Each variant is scored with normalized Levenshtein similarity and BLEU. The leaderboard average is the mean of those four scores.
	"""

	LLM_BENCHMARKS_TEXT = """
	## How it works

	- Gold data lives in `data/lloyd-jones-soph-170/ocr/`
	- Hard input lives in `data/lloyd-jones-soph-170/png/lloyd-jones-fullpage.png`
	- Easy inputs live in `data/lloyd-jones-soph-170/png/lloyd-jones-text.png` and `data/lloyd-jones-soph-170/png/lloyd-jones-apparatus.png`

	The expected output schema is a JSON object mapping line numbers or ranges to OCR strings, matching the gold files already in the repo.

	## Metrics

	- `Levenshtein`: character-level similarity after flattening each JSON file into a deterministic text representation
	- `BLEU`: token-level overlap score on the same flattened representation

	## First seeded submission

	The queue is pre-seeded with `ibm-granite/granite-vision-3.3-2b`. Once a real result JSON is added for that model, it will appear in the leaderboard automatically.

	"""

	EVALUATION_QUEUE_TEXT = """
	Submit the Hugging Face model repo and revision you want evaluated on this OCR task.

	The evaluator should produce four scores in its result JSON:
	- `easy_levenshtein.score`
	- `easy_bleu.score`
	- `hard_levenshtein.score`
	- `hard_bleu.score`

	The queue shown below is local-first, so the Space can be previewed before the dedicated backend datasets are configured.
	"""

	CITATION_BUTTON_LABEL = "Citation snippet"
	CITATION_BUTTON_TEXT = r"""
	"""