Ligeng Zhu commited on
Commit
86514f4
1 Parent(s): 313b015
Files changed (2) hide show
  1. app.py +13 -29
  2. data +1 -0
app.py CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import Repository
7
 
8
  import openai
9
 
10
- HF_TOKEN = os.environ.get("TRL_TOKEN", None)
11
  API_URL = os.environ.get("API_URL")
12
 
13
  theme = gr.themes.Monochrome(
@@ -31,8 +31,8 @@ if HF_TOKEN:
31
 
32
  repo = Repository(
33
  local_dir="./data/",
34
- clone_from="trl-lib/stack-llama-prompts",
35
- use_auth_token=HF_TOKEN,
36
  repo_type="dataset",
37
  )
38
  repo.git_pull()
@@ -49,22 +49,7 @@ def save_inputs_and_outputs(inputs, outputs, generate_kwargs):
49
  )
50
  f.write("\n")
51
  commit_url = repo.push_to_hub()
52
-
53
- def generate(
54
- instruction,
55
- temperature=0.9,
56
- max_new_tokens=256,
57
- top_p=0.95,
58
- repetition_penalty=1.0,
59
- do_save=True,
60
- ):
61
- output = instruction + str(temperature)
62
- s = ""
63
- for ch in output:
64
- s += ch
65
- yield s
66
- return s
67
-
68
  example_system_prompt = [
69
  "You are a helpful and precise assistant for checking the quality of the answer."
70
  ]
@@ -75,14 +60,7 @@ examples = [
75
  ["You are a helpful and precise assistant for checking the quality of the answer.", "[Question]\n{question}\n\n[The Start of Assistant's Answer]\n{answer}\n[The End of of Assistant's Answer]\n\nWe would like to request your feedback on the performance of the AI assistant in response to the user question displayed above.\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only the value indicating the scores for the Assistant. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."]
76
  ]
77
 
78
-
79
- def process_example(args):
80
- for x in generate(args):
81
- pass
82
- return x
83
-
84
-
85
- def gpt_eval(system_prompt, prompt, question, answer, openai_key):
86
  if openai_key is None or len(openai_key) <= 10:
87
  yield "Please enter a valid openai API key"
88
  return
@@ -108,7 +86,13 @@ def gpt_eval(system_prompt, prompt, question, answer, openai_key):
108
  # print(content, end="")
109
  output += content
110
  yield input_str + output
111
-
 
 
 
 
 
 
112
 
113
  css = ".generating {visibility: hidden}" # + share_btn_css
114
 
@@ -175,7 +159,7 @@ with gr.Blocks(theme=theme, analytics_enabled=False, css=css) as demo:
175
 
176
  openai_model = gr.Textbox(
177
  value="gpt-3.5-turbo",
178
- label="Model (More opions coming soon) )",
179
  )
180
 
181
  # gr.Examples(
 
7
 
8
  import openai
9
 
10
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
  API_URL = os.environ.get("API_URL")
12
 
13
  theme = gr.themes.Monochrome(
 
31
 
32
  repo = Repository(
33
  local_dir="./data/",
34
+ clone_from="Ligeng-Zhu/gpt-eval-prompts",
35
+ token=HF_TOKEN,
36
  repo_type="dataset",
37
  )
38
  repo.git_pull()
 
49
  )
50
  f.write("\n")
51
  commit_url = repo.push_to_hub()
52
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  example_system_prompt = [
54
  "You are a helpful and precise assistant for checking the quality of the answer."
55
  ]
 
60
  ["You are a helpful and precise assistant for checking the quality of the answer.", "[Question]\n{question}\n\n[The Start of Assistant's Answer]\n{answer}\n[The End of of Assistant's Answer]\n\nWe would like to request your feedback on the performance of the AI assistant in response to the user question displayed above.\nPlease rate the helpfulness, relevance, accuracy, level of details of their responses. Each assistant receives an overall score on a scale of 1 to 10, where a higher score indicates better overall performance.\nPlease first output a single line containing only the value indicating the scores for the Assistant. In the subsequent line, please provide a comprehensive explanation of your evaluation, avoiding any potential bias and ensuring that the order in which the responses were presented does not affect your judgment."]
61
  ]
62
 
63
+ def gpt_eval(system_prompt, prompt, question, answer, openai_key, do_save=True):
 
 
 
 
 
 
 
64
  if openai_key is None or len(openai_key) <= 10:
65
  yield "Please enter a valid openai API key"
66
  return
 
86
  # print(content, end="")
87
  output += content
88
  yield input_str + output
89
+ if do_save and HF_TOKEN:
90
+ save_inputs_and_outputs(
91
+ inputs=system_prompt + "\n" + origin_input,
92
+ outputs=output,
93
+ generate_kwargs={}
94
+ )
95
+ return
96
 
97
  css = ".generating {visibility: hidden}" # + share_btn_css
98
 
 
159
 
160
  openai_model = gr.Textbox(
161
  value="gpt-3.5-turbo",
162
+ label="Model (More opions coming soon)",
163
  )
164
 
165
  # gr.Examples(
data ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 7dca8500183db713b51713e8977d00ff9842e516