Spaces:

wesslen
/

prodigy-rlhf-ranking

Sleeping

App Files Files Community

wesslen commited on May 23, 2023

Commit

0daa081

•

1 Parent(s): 72983bb

rlhf-ranking (#1)

Browse files

- add rlhf_ranking (9bb55ac992cf7fad2827328dcd3152ac748339b5)

Files changed (4) hide show

Dockerfile +3 -1
data/dataset.jsonl +0 -0
prodigy.sh +1 -0
rlhf_ranking.py +64 -0

Dockerfile CHANGED Viewed

@@ -25,6 +25,8 @@ RUN chmod 777 .
 COPY prodigy.json .
 COPY data ./data/
 ENV PRODIGY_HOME /app
 ENV PRODIGY_LOGGING "verbose"
@@ -32,4 +34,4 @@ ENV PRODIGY_ALLOWED_SESSIONS "user1,user2"
 EXPOSE 7860
-CMD python -m prodigy ner.manual ner_news en_core_web_sm ./data/dataset.jsonl --label PERSON,ORG,PRODUCT

 COPY prodigy.json .
 COPY data ./data/
+COPY rlhf-ranking.py .
+COPY prodigy.sh .
 ENV PRODIGY_HOME /app
 ENV PRODIGY_LOGGING "verbose"
 EXPOSE 7860
+CMD CMD ["bash","prodigy.sh"]

data/dataset.jsonl CHANGED Viewed

The diff for this file is too large to render. See raw diff

prodigy.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python -m prodigy rlhf.ranking rlhf_data data/dataset.jsonl -F rlhf_ranking.py

rlhf_ranking.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import prodigy
+import itertools as it
+from prodigy.util import set_hashes
+from prodigy import get_stream
+@prodigy.recipe(
+    "rlhf.ranking",
+    dataset=("Dataset to save answers to", "positional", None, str),
+    source=("Datafile to load", "positional", None, str),
+)
+def ranking(dataset, source):
+    # Load your own streams from anywhere you want
+    stream = get_stream(source)
+    def prep_stream(stream):
+        for ex in stream:
+            ex['text'] = ex['instruction']
+            del ex['instruction']
+            for c1, c2 in it.combinations(ex['options'], 2):
+                ex['options'] = [
+                    {"id": 0, "text": c1['response']},
+                    {"id": 1, "text": c2['response']}
+                ]
+                yield set_hashes(ex)
+    return {
+        "dataset": dataset,
+        "view_id": "choice",
+        "stream": prep_stream(stream),
+        "config":{
+            "global_css": ".prodigy-option{font-size: 15px;}"
+        }
+    }
+@prodigy.recipe(
+    "rlhf.respond",
+    dataset=("Dataset to save answers to", "positional", None, str),
+    source=("Datafile to load", "positional", None, str),
+)
+def ranking(dataset, source):
+    # Load your own streams from anywhere you want
+    stream = get_stream(source)
+    def prep_stream(stream):
+        for ex in stream:
+            ex['text'] = ex['instruction']
+            del ex['instruction']
+            yield ex
+    return {
+        "dataset": dataset,
+        "view_id": "blocks",
+        "stream": prep_stream(stream),
+        "config":{
+            "global_css": ".prodigy-option{font-size: 15px;}",
+            "blocks":[
+                {"view_id": "text"},
+                {"view_id": "text_input", "field_autofocus": True, "field_rows": 4, "field_placeholder": "Try to use 2-3 sentences to answer the question."},
+            ],
+        }
+    }