Spaces:

wesslen
/

prodigy-rlhf-ranking

Sleeping

File size: 1,881 Bytes

9bb55ac

import prodigy
import itertools as it
from prodigy.util import set_hashes
from prodigy import get_stream


@prodigy.recipe(
    "rlhf.ranking",
    dataset=("Dataset to save answers to", "positional", None, str),
    source=("Datafile to load", "positional", None, str),
)
def ranking(dataset, source):
    # Load your own streams from anywhere you want
    stream = get_stream(source)
    
    def prep_stream(stream):
        for ex in stream:
            ex['text'] = ex['instruction']
            del ex['instruction']
            for c1, c2 in it.combinations(ex['options'], 2):
                ex['options'] = [
                    {"id": 0, "text": c1['response']},
                    {"id": 1, "text": c2['response']}
                ]
                yield set_hashes(ex)
            

    return {
        "dataset": dataset,
        "view_id": "choice",
        "stream": prep_stream(stream),
        "config":{
            "global_css": ".prodigy-option{font-size: 15px;}"
        }
    }


@prodigy.recipe(
    "rlhf.respond",
    dataset=("Dataset to save answers to", "positional", None, str),
    source=("Datafile to load", "positional", None, str),
)
def ranking(dataset, source):
    # Load your own streams from anywhere you want
    stream = get_stream(source)
    
    def prep_stream(stream):
        for ex in stream:
            ex['text'] = ex['instruction']
            del ex['instruction']
            yield ex

    return {
        "dataset": dataset,
        "view_id": "blocks",
        "stream": prep_stream(stream),
        "config":{
            "global_css": ".prodigy-option{font-size: 15px;}",
            "blocks":[
                {"view_id": "text"},
                {"view_id": "text_input", "field_autofocus": True, "field_rows": 4, "field_placeholder": "Try to use 2-3 sentences to answer the question."},
            ],
        }
    }