DSXiangLi commited on
Commit
15a824f
1 Parent(s): a8e5b87
ape/__pycache__/ape.cpython-38.pyc ADDED
Binary file (1.69 kB). View file
 
ape/__pycache__/instance.cpython-38.pyc CHANGED
Binary files a/ape/__pycache__/instance.cpython-38.pyc and b/ape/__pycache__/instance.cpython-38.pyc differ
 
ape/__pycache__/llm.cpython-38.pyc ADDED
Binary file (2.32 kB). View file
 
ape/__pycache__/prompt.cpython-38.pyc CHANGED
Binary files a/ape/__pycache__/prompt.cpython-38.pyc and b/ape/__pycache__/prompt.cpython-38.pyc differ
 
ape/ape.py CHANGED
@@ -1,24 +1,48 @@
1
  # -*-coding:utf-8 -*-
2
 
3
- from ape.instance import Instance, LoadFactory
 
4
  from functools import partial
 
 
5
 
6
 
7
  def load_task(task, file):
8
  global instance
9
  if task:
10
  loader = LoadFactory[task]
11
- print(loader)
12
  else:
 
13
  print(file)
14
- loader = partial(LoadFactory['upload'], file=[i.name for i in file][0])
15
  instance = Instance.from_file(loader)
16
  print(instance.samples[0])
17
- return instance
18
 
19
 
20
  def sample_data(instance, n_train, n_few_shot, n_eval):
21
  instance.sample(n_train, n_few_shot, n_eval)
22
  train_str = instance.display(instance.train_samples)
23
  eval_str = instance.display(instance.eval_samples)
24
- return train_str, eval_str, instance
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # -*-coding:utf-8 -*-
2
 
3
+ from ape.instance import Instance, LoadFactory, upload_file
4
+ from ape.llm import LLMGPT
5
  from functools import partial
6
+ from itertools import chain
7
+ LLM = None
8
 
9
 
10
  def load_task(task, file):
11
  global instance
12
  if task:
13
  loader = LoadFactory[task]
 
14
  else:
15
+ file = [i.name for i in file]
16
  print(file)
17
+ loader = partial(upload_file, file=file[0])
18
  instance = Instance.from_file(loader)
19
  print(instance.samples[0])
20
+ return instance, f'{instance.n_sample} Data Loaded'
21
 
22
 
23
  def sample_data(instance, n_train, n_few_shot, n_eval):
24
  instance.sample(n_train, n_few_shot, n_eval)
25
  train_str = instance.display(instance.train_samples)
26
  eval_str = instance.display(instance.eval_samples)
27
+ return train_str, eval_str, instance, 'Sample Done'
28
+
29
+
30
+ def esttimate_cost(instance):
31
+ train_text = ''.join(chain(*instance.train_samples))
32
+ eval_text = ''.join(chain(*instance.eval_samples))
33
+ train_cost = LLMGPT.confirm_cost(train_text, 'train')
34
+ eval_cost = LLMGPT.confirm_cost(eval_text, 'eval')
35
+ return f'Train={train_cost} Eval={eval_cost}'
36
+
37
+
38
+ def generate(instance, openai_key, n_instruct):
39
+ global LLM
40
+ if LLM is None:
41
+ LLM = LLMGPT(openai_key, n_instruct)
42
+
43
+ instructions = []
44
+ train_iter = instance.get_train_iter()
45
+ for few_shot in train_iter:
46
+ instruction = LLM.generate_instruction(few_shot)
47
+ print(instruction)
48
+ return '\n'.join(instructions)
ape/instance.py CHANGED
@@ -11,10 +11,21 @@ class Instance(object):
11
 
12
  def __init__(self, loader=None):
13
  self.samples = loader()
 
 
 
 
14
  self.train_samples = []
15
  self.eval_samples = []
16
 
 
 
 
 
17
  def sample(self, n_train, n_few_shot, n_eval):
 
 
 
18
  n_train = n_train * n_few_shot
19
  if n_train + n_eval > len(self.samples):
20
  raise ValueError(f'Train + Eval > total samples {len(self.samples)}, decrease them')
@@ -24,6 +35,10 @@ class Instance(object):
24
  self.train_samples = [self.samples[i] for i in train_index]
25
  self.eval_samples = [self.samples[i] for i in train_index]
26
 
 
 
 
 
27
  @staticmethod
28
  def display(samples):
29
  s = ""
@@ -80,8 +95,7 @@ def upload_file(file):
80
  LoadFactory = {
81
  'paraphase': load_paraphase,
82
  'event_extract': load_event_extraction,
83
- 'search_intent': load_intent,
84
- 'upload': upload_file
85
  }
86
 
87
  if __name__ == '__main__':
@@ -100,3 +114,5 @@ if __name__ == '__main__':
100
  ('deliberately', 'accidentally'), ('off', 'on')])
101
  instance2.sample(n_train, few_shot, n_eval)
102
  print(instance2.display(instance2.train_samples))
 
 
 
11
 
12
  def __init__(self, loader=None):
13
  self.samples = loader()
14
+ self.n_few_shot = 0
15
+ self.n_train = 0
16
+ self.n_eval = 0
17
+ self.train_iter = None
18
  self.train_samples = []
19
  self.eval_samples = []
20
 
21
+ @property
22
+ def n_sample(self):
23
+ return len(self.samples)
24
+
25
  def sample(self, n_train, n_few_shot, n_eval):
26
+ self.n_train = n_train
27
+ self.n_few_shot = n_few_shot
28
+ self.n_eval = n_eval
29
  n_train = n_train * n_few_shot
30
  if n_train + n_eval > len(self.samples):
31
  raise ValueError(f'Train + Eval > total samples {len(self.samples)}, decrease them')
 
35
  self.train_samples = [self.samples[i] for i in train_index]
36
  self.eval_samples = [self.samples[i] for i in train_index]
37
 
38
+ def get_train_iter(self):
39
+ for i in range(self.n_train):
40
+ yield self.train_samples[(i*self.n_few_shot) :(i+1)* self.n_few_shot]
41
+
42
  @staticmethod
43
  def display(samples):
44
  s = ""
 
95
  LoadFactory = {
96
  'paraphase': load_paraphase,
97
  'event_extract': load_event_extraction,
98
+ 'search_intent': load_intent
 
99
  }
100
 
101
  if __name__ == '__main__':
 
114
  ('deliberately', 'accidentally'), ('off', 'on')])
115
  instance2.sample(n_train, few_shot, n_eval)
116
  print(instance2.display(instance2.train_samples))
117
+ train_iter = instance2.get_train_iter()
118
+ print(next(train_iter))
ape/llm.py CHANGED
@@ -19,9 +19,9 @@ Cost = {
19
 
20
 
21
  class LLMGPT(object):
22
- def __init__(self, openai_key, max_tokens, n_instruct):
23
- self.gen_llm = ChatOpenAI(openai_api_key=openai_key, max_tokens=max_tokens, temperature=0.7, n=n_instruct)
24
- self.eval_llm = OpenAI(openai_api_key=openai_key, max_tokens=max_tokens, temperature=0.7, echo=True)
25
  self.gen_chain = None
26
  self.eval_chain = None
27
  self.init()
 
19
 
20
 
21
  class LLMGPT(object):
22
+ def __init__(self, openai_key, n_instruct):
23
+ self.gen_llm = ChatOpenAI(openai_api_key=openai_key, max_tokens=2000, temperature=0.7, n=n_instruct)
24
+ self.eval_llm = OpenAI(openai_api_key=openai_key, max_tokens=2000, temperature=0.7, echo=True)
25
  self.gen_chain = None
26
  self.eval_chain = None
27
  self.init()
ape/prompt.py CHANGED
@@ -5,15 +5,13 @@ few_shot_prompt = "Input: {input}\nOutput: {output}"
5
 
6
  gen_user_prompt = '{few_shot}'
7
 
8
- gen_sys_prompt = """
9
- I want you to act as an AI assisted doctor. You are capable of answering anything related to medical. Given
10
- a list of input-output pairs, you must come up with the correct instruction in medical-related area.
11
- You must respond in the following format, and always respond in chinese.
12
- ```
13
- {{'instruction':"$YOUR_INSTRUCTION"}}
14
- ```
15
- Everything between the ``` must be valid json.
16
- """
17
 
18
  eval_prompt = "Instruction: {prompt}\nInput: {input}\nOutput: {output}"
19
 
 
5
 
6
  gen_user_prompt = '{few_shot}'
7
 
8
+ gen_sys_prompt = """I want you to act as an AI assisted doctor. You are capable of answering anything related to medical. Given
9
+ a list of input-output pairs, you must come up with the correct instruction in medical-related area.
10
+ You must respond in the following format, and always respond in chinese.
11
+ ```
12
+ {{'instruction':"$YOUR_INSTRUCTION"}}
13
+ ```
14
+ Everything between the ``` must be valid json. """
 
 
15
 
16
  eval_prompt = "Instruction: {prompt}\nInput: {input}\nOutput: {output}"
17
 
app.py CHANGED
@@ -6,13 +6,13 @@ from ape.prompt import MyTemplate
6
  from ape.ape import *
7
 
8
 
9
- with gr.Blocks(title="Automatic Prompt Engineer", css=None) as demo:
10
  gr.Markdown("# Automatic Prompt Engineer")
11
- openai_key = gr.Textbox(type='password', label='输入 API key')
12
-
13
  with gr.Row():
14
  with gr.Column(scale=2):
15
  gr.Markdown("## Configuration")
 
 
16
  with gr.Row():
17
  n_train = gr.Slider(label="Number of Train", minimum=1, maximum=20, step=1, value=5)
18
  n_few_shot = gr.Slider(label="Number of FewShot", minimum=1, maximum=20, step=1, value=5)
@@ -22,7 +22,7 @@ with gr.Blocks(title="Automatic Prompt Engineer", css=None) as demo:
22
  n_instruct = gr.Slider(label="Number of Prompt", minimum=1, maximum=5, step=1, value=2)
23
 
24
  with gr.Column(scale=3):
25
- gr.Markdown("## 加载数据集")
26
  with gr.Tab("Choose Dataset"):
27
  with gr.Row():
28
  file = gr.File(label='上传txt文件,input\toutput\n', file_types=['txt'])
@@ -31,7 +31,9 @@ with gr.Blocks(title="Automatic Prompt Engineer", css=None) as demo:
31
  with gr.Row():
32
  instance = gr.State()
33
  load_button = gr.Button("Load Task")
 
34
  sample_button = gr.Button('sample Data')
 
35
 
36
  with gr.Tab("Display Sampled Dataset"):
37
  with gr.Row():
@@ -39,41 +41,31 @@ with gr.Blocks(title="Automatic Prompt Engineer", css=None) as demo:
39
  eval_str = gr.Textbox(max_lines=100, lines=10, label="Data for scoring")
40
 
41
  with gr.Row():
42
- gr.Markdown("## Run APE")
43
  with gr.Column(scale=2):
44
- with gr.Row():
45
- gr.Markdown('1. Generate Prompt')
46
- gr.Markdown(MyTemplate['gen_sys_prompt'])
47
- gr.Markdown('2. Evaluate Prompt')
48
- gr.Markdown(MyTemplate['eval_prompt'])
49
 
50
  with gr.Row():
51
- basic_cost = gr.Textbox(lines=1, value="", label="Estimated Cost ($)", disabled=True)
52
- basic_cost_button = gr.Button("Estimate Cost")
53
- basic_ape_button = gr.Button("Run APE")
54
 
55
  with gr.Column(scale=3):
 
56
  with gr.Tab("APE Results"):
 
57
  # Display all generated prompt with log probs
58
  output_df = gr.DataFrame(type='pandas', headers=['Prompt', 'Likelihood'], wrap=True, interactive=False)
59
 
60
- with gr.Tab("Prompt Overview"):
61
- with gr.Row():
62
- generation_prompt_sample = gr.Textbox(lines=8, value="",
63
- label="Instruction Generation Prompts",
64
- disabled=True)
65
- evaluation_prompt_sample = gr.Textbox(lines=8, value="",
66
- label="Evaluation Prompts",
67
- disabled=True)
68
-
69
  with gr.Tab("Test Prompt"):
70
  # Test the output of LLM using prompt
71
  with gr.Row():
72
  with gr.Column(scale=1):
73
- test_prompt = gr.Textbox(lines=4, value="",
74
- label="Prompt to test")
75
- test_inputs = gr.Textbox(lines=1, value="",
76
- label="Input used to test prompt")
77
  answer_button = gr.Button("Test")
78
  with gr.Column(scale=1):
79
  test_output = gr.Textbox(lines=9, value="", label="Model Output")
@@ -93,16 +85,21 @@ with gr.Blocks(title="Automatic Prompt Engineer", css=None) as demo:
93
  Callback
94
  """
95
  # 1. 选择已有任务/上传文件,实例化Instance
96
- load_button.click(load_task, [task, file], [instance])
97
 
98
  # 2. 按 Configuration Sample数据 得到训练样本和验证集, 并在前端展示。支持重采样
99
- sample_button.click(sample_data, [instance, n_train, n_few_shot, n_eval], [train_str, eval_str, instance])
100
 
101
  # 3. Estimate Cost for train + Eval
 
 
 
 
102
 
 
103
 
104
- # 4. Run APE -> 所有指令,以及指令对应的log prob
105
 
106
- # 5. 指令单测
107
 
108
- # 6. 人工指令打分
 
6
  from ape.ape import *
7
 
8
 
9
+ with gr.Blocks(title="Automatic Prompt Engineer", theme=gr.themes.Glass()) as demo:
10
  gr.Markdown("# Automatic Prompt Engineer")
 
 
11
  with gr.Row():
12
  with gr.Column(scale=2):
13
  gr.Markdown("## Configuration")
14
+ with gr.Row():
15
+ openai_key = gr.Textbox(type='password', label='输入 API key')
16
  with gr.Row():
17
  n_train = gr.Slider(label="Number of Train", minimum=1, maximum=20, step=1, value=5)
18
  n_few_shot = gr.Slider(label="Number of FewShot", minimum=1, maximum=20, step=1, value=5)
 
22
  n_instruct = gr.Slider(label="Number of Prompt", minimum=1, maximum=5, step=1, value=2)
23
 
24
  with gr.Column(scale=3):
25
+ gr.Markdown("## Load Data")
26
  with gr.Tab("Choose Dataset"):
27
  with gr.Row():
28
  file = gr.File(label='上传txt文件,input\toutput\n', file_types=['txt'])
 
31
  with gr.Row():
32
  instance = gr.State()
33
  load_button = gr.Button("Load Task")
34
+ load_flag = gr.Textbox()
35
  sample_button = gr.Button('sample Data')
36
+ sample_flag = gr.Textbox()
37
 
38
  with gr.Tab("Display Sampled Dataset"):
39
  with gr.Row():
 
41
  eval_str = gr.Textbox(max_lines=100, lines=10, label="Data for scoring")
42
 
43
  with gr.Row():
 
44
  with gr.Column(scale=2):
45
+ gr.Markdown("## Run APE")
46
+ gen_prompt = gr.Textbox(max_lines=100, lines=10,
47
+ value=MyTemplate['gen_sys_prompt'], label="Prompt for generation")
48
+ eval_prompt = gr.Textbox(max_lines=100, lines=10,
49
+ value=MyTemplate['eval_prompt'], label="Prompt for Evaluation")
50
 
51
  with gr.Row():
52
+ cost = gr.Textbox(lines=1, value="", label="Estimated Cost ($)")
53
+ cost_button = gr.Button("Estimate Cost")
54
+ ape_button = gr.Button("Run APE")
55
 
56
  with gr.Column(scale=3):
57
+ gr.Markdown("## Get Result")
58
  with gr.Tab("APE Results"):
59
+ all_prompt = gr.Textbox(label='Generated Prompt')
60
  # Display all generated prompt with log probs
61
  output_df = gr.DataFrame(type='pandas', headers=['Prompt', 'Likelihood'], wrap=True, interactive=False)
62
 
 
 
 
 
 
 
 
 
 
63
  with gr.Tab("Test Prompt"):
64
  # Test the output of LLM using prompt
65
  with gr.Row():
66
  with gr.Column(scale=1):
67
+ test_prompt = gr.Textbox(lines=4, value="", label="Prompt to test")
68
+ test_inputs = gr.Textbox(lines=1, value="", label="Input used to test prompt")
 
 
69
  answer_button = gr.Button("Test")
70
  with gr.Column(scale=1):
71
  test_output = gr.Textbox(lines=9, value="", label="Model Output")
 
85
  Callback
86
  """
87
  # 1. 选择已有任务/上传文件,实例化Instance
88
+ load_button.click(load_task, [task, file], [instance, load_flag])
89
 
90
  # 2. 按 Configuration Sample数据 得到训练样本和验证集, 并在前端展示。支持重采样
91
+ sample_button.click(sample_data, [instance, n_train, n_few_shot, n_eval], [train_str, eval_str, instance, sample_flag])
92
 
93
  # 3. Estimate Cost for train + Eval
94
+ cost_button.click(esttimate_cost, [instance], [cost])
95
+
96
+ # 4. Run APE -> 所有指令
97
+ ape_button.click(generate, [instance, openai_key, n_instruct], [all_prompt])
98
 
99
+ # 5. Evaluate -> 得到所有指令的Log Prob
100
 
101
+ # 6. 输入指令单测
102
 
103
+ # 7. 输入指令打分
104
 
105
+ demo.launch(show_error=True)