import prodigy import itertools as it from prodigy.util import set_hashes from prodigy import get_stream @prodigy.recipe( "rlhf.ranking", dataset=("Dataset to save answers to", "positional", None, str), source=("Datafile to load", "positional", None, str), ) def ranking(dataset, source): # Load your own streams from anywhere you want stream = get_stream(source) def prep_stream(stream): for ex in stream: ex['text'] = ex['instruction'] del ex['instruction'] for c1, c2 in it.combinations(ex['options'], 2): ex['options'] = [ {"id": 0, "text": c1['response']}, {"id": 1, "text": c2['response']} ] yield set_hashes(ex) return { "dataset": dataset, "view_id": "choice", "stream": prep_stream(stream), "config":{ "global_css": ".prodigy-option{font-size: 15px;}" } } @prodigy.recipe( "rlhf.respond", dataset=("Dataset to save answers to", "positional", None, str), source=("Datafile to load", "positional", None, str), ) def ranking(dataset, source): # Load your own streams from anywhere you want stream = get_stream(source) def prep_stream(stream): for ex in stream: ex['text'] = ex['instruction'] del ex['instruction'] yield ex return { "dataset": dataset, "view_id": "blocks", "stream": prep_stream(stream), "config":{ "global_css": ".prodigy-option{font-size: 15px;}", "blocks":[ {"view_id": "text"}, {"view_id": "text_input", "field_autofocus": True, "field_rows": 4, "field_placeholder": "Try to use 2-3 sentences to answer the question."}, ], } }