prodigy-rlhf-ranking / rlhf_ranking.py
wesslen's picture
rlhf-ranking (#1)
0daa081
import prodigy
import itertools as it
from prodigy.util import set_hashes
from prodigy import get_stream
@prodigy.recipe(
"rlhf.ranking",
dataset=("Dataset to save answers to", "positional", None, str),
source=("Datafile to load", "positional", None, str),
)
def ranking(dataset, source):
# Load your own streams from anywhere you want
stream = get_stream(source)
def prep_stream(stream):
for ex in stream:
ex['text'] = ex['instruction']
del ex['instruction']
for c1, c2 in it.combinations(ex['options'], 2):
ex['options'] = [
{"id": 0, "text": c1['response']},
{"id": 1, "text": c2['response']}
]
yield set_hashes(ex)
return {
"dataset": dataset,
"view_id": "choice",
"stream": prep_stream(stream),
"config":{
"global_css": ".prodigy-option{font-size: 15px;}"
}
}
@prodigy.recipe(
"rlhf.respond",
dataset=("Dataset to save answers to", "positional", None, str),
source=("Datafile to load", "positional", None, str),
)
def ranking(dataset, source):
# Load your own streams from anywhere you want
stream = get_stream(source)
def prep_stream(stream):
for ex in stream:
ex['text'] = ex['instruction']
del ex['instruction']
yield ex
return {
"dataset": dataset,
"view_id": "blocks",
"stream": prep_stream(stream),
"config":{
"global_css": ".prodigy-option{font-size: 15px;}",
"blocks":[
{"view_id": "text"},
{"view_id": "text_input", "field_autofocus": True, "field_rows": 4, "field_placeholder": "Try to use 2-3 sentences to answer the question."},
],
}
}