Spaces:
Runtime error
Runtime error
DSXiangLi
commited on
Commit
•
8bfb070
1
Parent(s):
1820e5d
- self/generate.py +6 -2
- self/prompt.py +1 -1
self/generate.py
CHANGED
@@ -35,7 +35,8 @@ class SELF(object):
|
|
35 |
def __init__(self, seed_file, openai_key, n_human, n_machine, n_instruct, prompt):
|
36 |
self.llm = OpenAI(openai_api_key=openai_key, temperature=1,
|
37 |
stop=[f'\n{n_instruct}', '{n_instruct}', '{n_instruct}.'], # 当已生成足够的指令则停止
|
38 |
-
logit_bias={'50259': -100}
|
|
|
39 |
) # 默认davinci-003
|
40 |
self.n_human, self.n_machine, self.n_instruct = n_human, n_machine, n_instruct
|
41 |
self.n_gen, self.n_keep = 0, 0
|
@@ -97,6 +98,8 @@ class SELF(object):
|
|
97 |
def decode_response(self, response):
|
98 |
if response is None:
|
99 |
return []
|
|
|
|
|
100 |
raw_instruct = SELF.prefix.format(id=self.first_id) + response['text']
|
101 |
raw_instruct = raw_instruct.split('###')
|
102 |
instruction_data = []
|
@@ -153,7 +156,8 @@ class SELF(object):
|
|
153 |
return keep_instruction
|
154 |
|
155 |
def step(self):
|
156 |
-
|
|
|
157 |
keep_instruct_data = self.sim_filter(new_instruct_data)
|
158 |
self.n_gen += len(new_instruct_data)
|
159 |
self.n_keep += len(keep_instruct_data)
|
|
|
35 |
def __init__(self, seed_file, openai_key, n_human, n_machine, n_instruct, prompt):
|
36 |
self.llm = OpenAI(openai_api_key=openai_key, temperature=1,
|
37 |
stop=[f'\n{n_instruct}', '{n_instruct}', '{n_instruct}.'], # 当已生成足够的指令则停止
|
38 |
+
logit_bias={'50259': -100}, # 不生成最后的停止符#
|
39 |
+
max_tokens=-1
|
40 |
) # 默认davinci-003
|
41 |
self.n_human, self.n_machine, self.n_instruct = n_human, n_machine, n_instruct
|
42 |
self.n_gen, self.n_keep = 0, 0
|
|
|
98 |
def decode_response(self, response):
|
99 |
if response is None:
|
100 |
return []
|
101 |
+
if '###' not in response['text']:
|
102 |
+
return []
|
103 |
raw_instruct = SELF.prefix.format(id=self.first_id) + response['text']
|
104 |
raw_instruct = raw_instruct.split('###')
|
105 |
instruction_data = []
|
|
|
156 |
return keep_instruction
|
157 |
|
158 |
def step(self):
|
159 |
+
response = self.generate()
|
160 |
+
new_instruct_data = self.decode_response(response)
|
161 |
keep_instruct_data = self.sim_filter(new_instruct_data)
|
162 |
self.n_gen += len(new_instruct_data)
|
163 |
self.n_keep += len(keep_instruct_data)
|
self/prompt.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
import re
|
3 |
|
4 |
#20个简化成5个
|
5 |
-
self_prompt = """你需要想出{n_instruct}
|
6 |
以下是要求:
|
7 |
1. 尽量不要在每个指令中重复使用动词,以最大化多样性
|
8 |
2. 指令的表达形式需要多样化。例如你可以把问题和祈使句结合起来
|
|
|
2 |
import re
|
3 |
|
4 |
#20个简化成5个
|
5 |
+
self_prompt = """你需要想出{n_instruct}个医学相关不同的任务指令。这些任务指令将输入GPT模型,我们将评估GPT模型完成指令的情况。
|
6 |
以下是要求:
|
7 |
1. 尽量不要在每个指令中重复使用动词,以最大化多样性
|
8 |
2. 指令的表达形式需要多样化。例如你可以把问题和祈使句结合起来
|