"""Cross-encoder input formatting for CodeBERT.""" from __future__ import annotations QUESTION_TAG = "QUESTION:" SCHEMA_TAG = "SCHEMA:" STUDENT_TAG = "STUDENT_SQL:" CORRECT_TAG = "CORRECT_SQL:" def format_cross_encoder_input( question: str, schema: str, student_sql: str, correct_sql: str, ) -> str: """ Concatenate all fields into a single CodeBERT input sequence. The model attends jointly across question intent, schema, student SQL, and the reference solution — cross-encoder style in one forward pass. """ return ( f"{QUESTION_TAG}\n{question.strip()}\n\n" f"{SCHEMA_TAG}\n{schema.strip()}\n\n" f"{STUDENT_TAG}\n{student_sql.strip()}\n\n" f"{CORRECT_TAG}\n{correct_sql.strip()}" )