Spaces:

yanolja
/

arena

Running

Kang Suhyun commited on Apr 12

Commit

43c8549

•

1 Parent(s): 2a0aa5a

[#71] Add custom prompt option (#77)

* [#71] Add custom prompt option

Changes:
- It will look for a summarizationInstruction or a translationInstruction config for each model and use it as a prompt
- A default prompt is used if no instruction is found
- The prompt requests a JSON format response

* Update prompts

* Reorder message content in check_models function

* prompt -> instruction

Files changed (3) hide show

app.py +8 -5
model.py +23 -18
response.py +17 -16

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ class VoteOptions(enum.Enum):
 def vote(vote_button, response_a, response_b, model_a_name, model_b_name,
-         user_prompt, instruction, category, source_lang, target_lang):
   doc_id = uuid4().hex
   winner = VoteOptions(vote_button).name.lower()
@@ -37,7 +37,7 @@ def vote(vote_button, response_a, response_b, model_a_name, model_b_name,
   doc = {
       "id": doc_id,
-      "prompt": user_prompt,
       "instruction": instruction,
       "model_a": model_a_name,
       "model_b": model_b_name,
@@ -116,7 +116,7 @@ with gr.Blocks(title="Arena", css=css) as app:
   model_names = [gr.State(None), gr.State(None)]
   response_boxes = [gr.State(None), gr.State(None)]
-  prompt = gr.TextArea(label="Prompt", lines=4)
   submit = gr.Button()
   with gr.Group():
@@ -166,7 +166,10 @@ with gr.Blocks(title="Arena", css=css) as app:
           category_radio, source_language, target_language, submit, vote_row,
           model_name_row
       ]).then(fn=get_responses,
-              inputs=[prompt, category_radio, source_language, target_language],
               outputs=response_boxes + model_names + [instruction_state])
   submit_event.success(fn=lambda: gr.Row(visible=True), outputs=vote_row)
   submit_event.then(
@@ -179,7 +182,7 @@ with gr.Blocks(title="Arena", css=css) as app:
       outputs=[category_radio, source_language, target_language, submit])
   common_inputs = response_boxes + model_names + [
-      prompt, instruction_state, category_radio, source_language,
       target_language
   ]
   common_outputs = [option_a, option_b, tie, model_name_row]

 def vote(vote_button, response_a, response_b, model_a_name, model_b_name,
+         prompt, instruction, category, source_lang, target_lang):
   doc_id = uuid4().hex
   winner = VoteOptions(vote_button).name.lower()
   doc = {
       "id": doc_id,
+      "prompt": prompt,
       "instruction": instruction,
       "model_a": model_a_name,
       "model_b": model_b_name,
   model_names = [gr.State(None), gr.State(None)]
   response_boxes = [gr.State(None), gr.State(None)]
+  prompt_textarea = gr.TextArea(label="Prompt", lines=4)
   submit = gr.Button()
   with gr.Group():
           category_radio, source_language, target_language, submit, vote_row,
           model_name_row
       ]).then(fn=get_responses,
+              inputs=[
+                  prompt_textarea, category_radio, source_language,
+                  target_language
+              ],
               outputs=response_boxes + model_names + [instruction_state])
   submit_event.success(fn=lambda: gr.Row(visible=True), outputs=vote_row)
   submit_event.then(
       outputs=[category_radio, source_language, target_language, submit])
   common_inputs = response_boxes + model_names + [
+      prompt_textarea, instruction_state, category_radio, source_language,
       target_language
   ]
   common_outputs = [option_a, option_b, tie, model_name_row]

model.py CHANGED Viewed

@@ -25,6 +25,9 @@ decoded_secret = models_secret.payload.data.decode("UTF-8")
 supported_models_json = json.loads(decoded_secret)
 class Model:
@@ -35,11 +38,25 @@ class Model:
       # The JSON keys are in camelCase. To unpack these keys into
       # Model attributes, we need to use the same camelCase names.
       apiKey: str = None,  # pylint: disable=invalid-name
-      apiBase: str = None):  # pylint: disable=invalid-name
     self.name = name
     self.provider = provider
     self.api_key = apiKey
     self.api_base = apiBase
 supported_models: List[Model] = [
@@ -48,27 +65,15 @@ supported_models: List[Model] = [
 ]
-def completion(model: Model, messages: List, max_tokens: float = None) -> str:
-  response = litellm.completion(model=model.provider + "/" +
-                                model.name if model.provider else model.name,
-                                api_key=model.api_key,
-                                api_base=model.api_base,
-                                messages=messages,
-                                max_tokens=max_tokens)
-  return response.choices[0].message.content
 def check_models(models: List[Model]):
   for model in models:
     print(f"Checking model {model.name}...")
     try:
-      completion(model=model,
-                 messages=[{
-                     "content": "Hello.",
-                     "role": "user"
-                 }],
-                 max_tokens=5)
       print(f"Model {model.name} is available.")
     # This check is designed to verify the availability of the models

 supported_models_json = json.loads(decoded_secret)
+DEFAULT_SUMMARIZE_INSTRUCTION = "Summarize the following text, maintaining the language of the text."  # pylint: disable=line-too-long
+DEFAULT_TRANSLATE_INSTRUCTION = "Translate the following text from {source_lang} to {target_lang}."  # pylint: disable=line-too-long
 class Model:
       # The JSON keys are in camelCase. To unpack these keys into
       # Model attributes, we need to use the same camelCase names.
       apiKey: str = None,  # pylint: disable=invalid-name
+      apiBase: str = None,  # pylint: disable=invalid-name
+      summarizeInstruction: str = None,  # pylint: disable=invalid-name
+      translateInstruction: str = None):  # pylint: disable=invalid-name
     self.name = name
     self.provider = provider
     self.api_key = apiKey
     self.api_base = apiBase
+    self.summarize_instruction = summarizeInstruction or DEFAULT_SUMMARIZE_INSTRUCTION  # pylint: disable=line-too-long
+    self.translate_instruction = translateInstruction or DEFAULT_TRANSLATE_INSTRUCTION  # pylint: disable=line-too-long
+  def completion(self, messages: List, max_tokens: float = None) -> str:
+    response = litellm.completion(model=self.provider + "/" +
+                                  self.name if self.provider else self.name,
+                                  api_key=self.api_key,
+                                  api_base=self.api_base,
+                                  messages=messages,
+                                  max_tokens=max_tokens)
+    return response.choices[0].message.content
 supported_models: List[Model] = [
 ]
 def check_models(models: List[Model]):
   for model in models:
     print(f"Checking model {model.name}...")
     try:
+      model.completion(messages=[{
+          "role": "user",
+          "content": "Hello."
+      }],
+                       max_tokens=5)
       print(f"Model {model.name} is available.")
     # This check is designed to verify the availability of the models

response.py CHANGED Viewed

@@ -11,7 +11,6 @@ from firebase_admin import firestore
 import gradio as gr
 from leaderboard import db
-from model import completion
 from model import Model
 from model import supported_models
@@ -39,14 +38,18 @@ class Category(enum.Enum):
 # TODO(#31): Let the model builders set the instruction.
-def get_instruction(category, source_lang, target_lang):
   if category == Category.SUMMARIZE.value:
-    return "Summarize the following text, maintaining the original language of the text in the summary."  # pylint: disable=line-too-long
   if category == Category.TRANSLATE.value:
-    return f"Translate the following text from {source_lang} to {target_lang}."
-def get_responses(user_prompt, category, source_lang, target_lang):
   if not category:
     raise gr.Error("Please select a category.")
@@ -55,21 +58,19 @@ def get_responses(user_prompt, category, source_lang, target_lang):
     raise gr.Error("Please select source and target languages.")
   models: List[Model] = sample(list(supported_models), 2)
-  instruction = get_instruction(category, source_lang, target_lang)
   responses = []
   for model in models:
     try:
       # TODO(#1): Allow user to set configuration.
-      response = completion(model=model,
-                            messages=[{
-                                "role": "system",
-                                "content": instruction
-                            }, {
-                                "role": "user",
-                                "content": user_prompt
-                            }])
-      create_history(model.name, instruction, user_prompt, response)
       responses.append(response)
     # TODO(#1): Narrow down the exception type.

 import gradio as gr
 from leaderboard import db
 from model import Model
 from model import supported_models
 # TODO(#31): Let the model builders set the instruction.
+def get_instruction(category: str, model: Model, source_lang: str,
+                    target_lang: str):
   if category == Category.SUMMARIZE.value:
+    return model.summarize_instruction
   if category == Category.TRANSLATE.value:
+    return model.translate_instruction.format(source_lang=source_lang,
+                                              target_lang=target_lang)
+def get_responses(prompt: str, category: str, source_lang: str,
+                  target_lang: str):
   if not category:
     raise gr.Error("Please select a category.")
     raise gr.Error("Please select source and target languages.")
   models: List[Model] = sample(list(supported_models), 2)
   responses = []
   for model in models:
+    instruction = get_instruction(category, model, source_lang, target_lang)
     try:
       # TODO(#1): Allow user to set configuration.
+      response = model.completion(messages=[{
+          "role": "system",
+          "content": instruction
+      }, {
+          "role": "user",
+          "content": prompt
+      }])
+      create_history(model.name, instruction, prompt, response)
       responses.append(response)
     # TODO(#1): Narrow down the exception type.