wesslen commited on
Commit
9bb55ac
·
1 Parent(s): 72983bb

add rlhf_ranking

Browse files
Files changed (4) hide show
  1. Dockerfile +3 -1
  2. data/dataset.jsonl +0 -0
  3. prodigy.sh +1 -0
  4. rlhf_ranking.py +64 -0
Dockerfile CHANGED
@@ -25,6 +25,8 @@ RUN chmod 777 .
25
 
26
  COPY prodigy.json .
27
  COPY data ./data/
 
 
28
 
29
  ENV PRODIGY_HOME /app
30
  ENV PRODIGY_LOGGING "verbose"
@@ -32,4 +34,4 @@ ENV PRODIGY_ALLOWED_SESSIONS "user1,user2"
32
 
33
  EXPOSE 7860
34
 
35
- CMD python -m prodigy ner.manual ner_news en_core_web_sm ./data/dataset.jsonl --label PERSON,ORG,PRODUCT
 
25
 
26
  COPY prodigy.json .
27
  COPY data ./data/
28
+ COPY rlhf-ranking.py .
29
+ COPY prodigy.sh .
30
 
31
  ENV PRODIGY_HOME /app
32
  ENV PRODIGY_LOGGING "verbose"
 
34
 
35
  EXPOSE 7860
36
 
37
+ CMD CMD ["bash","prodigy.sh"]
data/dataset.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
prodigy.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m prodigy rlhf.ranking rlhf_data data/dataset.jsonl -F rlhf_ranking.py
rlhf_ranking.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import prodigy
2
+ import itertools as it
3
+ from prodigy.util import set_hashes
4
+ from prodigy import get_stream
5
+
6
+
7
+ @prodigy.recipe(
8
+ "rlhf.ranking",
9
+ dataset=("Dataset to save answers to", "positional", None, str),
10
+ source=("Datafile to load", "positional", None, str),
11
+ )
12
+ def ranking(dataset, source):
13
+ # Load your own streams from anywhere you want
14
+ stream = get_stream(source)
15
+
16
+ def prep_stream(stream):
17
+ for ex in stream:
18
+ ex['text'] = ex['instruction']
19
+ del ex['instruction']
20
+ for c1, c2 in it.combinations(ex['options'], 2):
21
+ ex['options'] = [
22
+ {"id": 0, "text": c1['response']},
23
+ {"id": 1, "text": c2['response']}
24
+ ]
25
+ yield set_hashes(ex)
26
+
27
+
28
+ return {
29
+ "dataset": dataset,
30
+ "view_id": "choice",
31
+ "stream": prep_stream(stream),
32
+ "config":{
33
+ "global_css": ".prodigy-option{font-size: 15px;}"
34
+ }
35
+ }
36
+
37
+
38
+ @prodigy.recipe(
39
+ "rlhf.respond",
40
+ dataset=("Dataset to save answers to", "positional", None, str),
41
+ source=("Datafile to load", "positional", None, str),
42
+ )
43
+ def ranking(dataset, source):
44
+ # Load your own streams from anywhere you want
45
+ stream = get_stream(source)
46
+
47
+ def prep_stream(stream):
48
+ for ex in stream:
49
+ ex['text'] = ex['instruction']
50
+ del ex['instruction']
51
+ yield ex
52
+
53
+ return {
54
+ "dataset": dataset,
55
+ "view_id": "blocks",
56
+ "stream": prep_stream(stream),
57
+ "config":{
58
+ "global_css": ".prodigy-option{font-size: 15px;}",
59
+ "blocks":[
60
+ {"view_id": "text"},
61
+ {"view_id": "text_input", "field_autofocus": True, "field_rows": 4, "field_placeholder": "Try to use 2-3 sentences to answer the question."},
62
+ ],
63
+ }
64
+ }