File size: 769 Bytes
9b2cded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""Cross-encoder input formatting for CodeBERT."""

from __future__ import annotations

QUESTION_TAG = "QUESTION:"
SCHEMA_TAG = "SCHEMA:"
STUDENT_TAG = "STUDENT_SQL:"
CORRECT_TAG = "CORRECT_SQL:"


def format_cross_encoder_input(
    question: str,
    schema: str,
    student_sql: str,
    correct_sql: str,
) -> str:
    """
    Concatenate all fields into a single CodeBERT input sequence.

    The model attends jointly across question intent, schema, student SQL,
    and the reference solution — cross-encoder style in one forward pass.
    """
    return (
        f"{QUESTION_TAG}\n{question.strip()}\n\n"
        f"{SCHEMA_TAG}\n{schema.strip()}\n\n"
        f"{STUDENT_TAG}\n{student_sql.strip()}\n\n"
        f"{CORRECT_TAG}\n{correct_sql.strip()}"
    )