DSXiangLi commited on
Commit
8bfb070
1 Parent(s): 1820e5d
Files changed (2) hide show
  1. self/generate.py +6 -2
  2. self/prompt.py +1 -1
self/generate.py CHANGED
@@ -35,7 +35,8 @@ class SELF(object):
35
  def __init__(self, seed_file, openai_key, n_human, n_machine, n_instruct, prompt):
36
  self.llm = OpenAI(openai_api_key=openai_key, temperature=1,
37
  stop=[f'\n{n_instruct}', '{n_instruct}', '{n_instruct}.'], # 当已生成足够的指令则停止
38
- logit_bias={'50259': -100} # 不生成最后的停止符#
 
39
  ) # 默认davinci-003
40
  self.n_human, self.n_machine, self.n_instruct = n_human, n_machine, n_instruct
41
  self.n_gen, self.n_keep = 0, 0
@@ -97,6 +98,8 @@ class SELF(object):
97
  def decode_response(self, response):
98
  if response is None:
99
  return []
 
 
100
  raw_instruct = SELF.prefix.format(id=self.first_id) + response['text']
101
  raw_instruct = raw_instruct.split('###')
102
  instruction_data = []
@@ -153,7 +156,8 @@ class SELF(object):
153
  return keep_instruction
154
 
155
  def step(self):
156
- new_instruct_data = self.generate()
 
157
  keep_instruct_data = self.sim_filter(new_instruct_data)
158
  self.n_gen += len(new_instruct_data)
159
  self.n_keep += len(keep_instruct_data)
 
35
  def __init__(self, seed_file, openai_key, n_human, n_machine, n_instruct, prompt):
36
  self.llm = OpenAI(openai_api_key=openai_key, temperature=1,
37
  stop=[f'\n{n_instruct}', '{n_instruct}', '{n_instruct}.'], # 当已生成足够的指令则停止
38
+ logit_bias={'50259': -100}, # 不生成最后的停止符#
39
+ max_tokens=-1
40
  ) # 默认davinci-003
41
  self.n_human, self.n_machine, self.n_instruct = n_human, n_machine, n_instruct
42
  self.n_gen, self.n_keep = 0, 0
 
98
  def decode_response(self, response):
99
  if response is None:
100
  return []
101
+ if '###' not in response['text']:
102
+ return []
103
  raw_instruct = SELF.prefix.format(id=self.first_id) + response['text']
104
  raw_instruct = raw_instruct.split('###')
105
  instruction_data = []
 
156
  return keep_instruction
157
 
158
  def step(self):
159
+ response = self.generate()
160
+ new_instruct_data = self.decode_response(response)
161
  keep_instruct_data = self.sim_filter(new_instruct_data)
162
  self.n_gen += len(new_instruct_data)
163
  self.n_keep += len(keep_instruct_data)
self/prompt.py CHANGED
@@ -2,7 +2,7 @@
2
  import re
3
 
4
  #20个简化成5个
5
- self_prompt = """你需要想出{n_instruct}个不同的任务指令。这些任务指令将输入GPT模型,我们将评估GPT模型完成指令的情况。
6
  以下是要求:
7
  1. 尽量不要在每个指令中重复使用动词,以最大化多样性
8
  2. 指令的表达形式需要多样化。例如你可以把问题和祈使句结合起来
 
2
  import re
3
 
4
  #20个简化成5个
5
+ self_prompt = """你需要想出{n_instruct}个医学相关不同的任务指令。这些任务指令将输入GPT模型,我们将评估GPT模型完成指令的情况。
6
  以下是要求:
7
  1. 尽量不要在每个指令中重复使用动词,以最大化多样性
8
  2. 指令的表达形式需要多样化。例如你可以把问题和祈使句结合起来