Spaces:

allenai
/

BaseChat

Running

App Files Files Community

yuchenlin commited on Aug 6, 2024

Commit

d8f6559

1 Parent(s): f45484b

side by side

Browse files

Files changed (9) hide show

.gitignore +1 -0
__pycache__/constant.cpython-311.pyc +0 -0
__pycache__/utils.cpython-311.pyc +0 -0
app.py +139 -51
app_single.py +117 -0
constant.py +54 -0
list_models.py +24 -0
together_model_ids.json +179 -0
utils.py +64 -30

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/

__pycache__/constant.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/constant.cpython-311.pyc and b/__pycache__/constant.cpython-311.pyc differ

__pycache__/utils.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-311.pyc and b/__pycache__/utils.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ import os
 from typing import List
 import logging
 import urllib.request
-from utils import model_name_mapping, urial_template, openai_base_request, DEFAULT_API_KEY
-from constant import js_code_label, HEADER_MD
 from openai import OpenAI
 import datetime
 # add logging info to console
@@ -19,28 +19,49 @@ STOP_STRS = ['"""', '# Query:', '# Answer:']
 addr_limit_counter = {}
 LAST_UPDATE_TIME = datetime.datetime.now()
 def respond(
     message,
     history: list[tuple[str, str]],
     max_tokens,
     temperature,
     top_p,
-    rp,
     model_name,
-    together_api_key,
     request:gr.Request
 ):
-    global STOP_STRS, urial_prompt, LAST_UPDATE_TIME, addr_limit_counter
-    rp = 1.0
-    prompt = urial_template(urial_prompt, history, message)
     # _model_name = "meta-llama/Llama-3-8b-hf"
     _model_name = model_name_mapping(model_name)
-    if together_api_key and len(together_api_key) == 64:
-        api_key = together_api_key
     else:
-        api_key = DEFAULT_API_KEY
     # headers = request.headers
     # if already 24 hours passed, reset the counter
@@ -53,12 +74,21 @@ def respond(
     if addr_limit_counter[host_addr] > 100:
         return "You have reached the limit of 100 requests for today. Please use your own API key."
-    infer_request = openai_base_request(prompt=prompt, model=_model_name,
-                                   temperature=temperature,
-                                   max_tokens=max_tokens,
-                                   top_p=top_p,
-                                   repetition_penalty=rp,
-                                   stop=STOP_STRS, api_key=api_key)
     addr_limit_counter[host_addr] += 1
     logging.info(f"Requesting chat completion from OpenAI API with model {_model_name}")
     logging.info(f"addr_limit_counter: {addr_limit_counter}; Last update time: {LAST_UPDATE_TIME};")
@@ -66,45 +96,103 @@ def respond(
     response = ""
     for msg in infer_request:
         # print(msg.choices[0].delta.keys())
-        token = msg.choices[0].delta["content"]
-        should_stop = False
-        for _stop in STOP_STRS:
-            if _stop in response + token:
-                should_stop = True
                 break
-        if should_stop:
-            break
         response += token
-        if response.endswith('\n"'):
-            response = response[:-1]
-        elif response.endswith('\n""'):
-            response = response[:-2]
-        yield response
-with gr.Blocks(gr.themes.Soft(), js=js_code_label) as demo:
     with gr.Row():
-        with gr.Column():
-            gr.Markdown(HEADER_MD)
-            model_name = gr.Radio(["Llama-3-8B", "Llama-3-70B", "Mistral-7B-v0.1",
-                                   "Mixtral-8x22B", "Qwen1.5-72B", "Yi-34B", "Llama-2-7B", "Llama-2-70B", "OLMO"]
-                                  , value="Llama-3-8B", label="Base LLM name")
-        with gr.Column():
-            together_api_key = gr.Textbox(label="🔑 Together APIKey", placeholder="Enter your Together API Key. Leave it blank to use our key with limited usage.", type="password", elem_id="api_key")
-            with gr.Column():
-                with gr.Row():
-                    max_tokens = gr.Textbox(value=256, label="Max tokens")
-                    temperature = gr.Textbox(value=0.5, label="Temperature")
-                    top_p = gr.Textbox(value=0.9, label="Top-p")
-                    rp = gr.Textbox(value=1.1, label="Repetition penalty")
-    chat = gr.ChatInterface(
-        respond,
-        additional_inputs=[max_tokens, temperature, top_p, rp, model_name, together_api_key],
-        # additional_inputs_accordion="⚙️ Parameters",
-        # fill_height=True,
-    )
-    chat.chatbot.label="Chat with Base LLMs via URIAL"
-    chat.chatbot.height = 550
-    chat.chatbot.show_copy_button = True
 if __name__ == "__main__":
     demo.launch(show_api=False)

 from typing import List
 import logging
 import urllib.request
+from utils import model_name_mapping, urial_template, openai_base_request, chat_template, openai_chat_request
+from constant import js_code_label, HEADER_MD, BASE_TO_ALIGNED, MODELS
 from openai import OpenAI
 import datetime
 # add logging info to console
 addr_limit_counter = {}
 LAST_UPDATE_TIME = datetime.datetime.now()
+models = MODELS
+# mega_hist = {
+#     "base": [],
+#     "aligned": []
+# }
 def respond(
     message,
     history: list[tuple[str, str]],
     max_tokens,
     temperature,
     top_p,
+    rp,
     model_name,
+    model_type,
+    api_key,
     request:gr.Request
 ):
+    global STOP_STRS, urial_prompt, LAST_UPDATE_TIME, addr_limit_counter
+    assert model_type in ["base", "aligned"]
+    # if history:
+    #     if model_type == "base":
+    #         mega_hist["base"] = history
+    #     else:
+    #         mega_hist["aligned"] = history
+    if model_type == "base":
+        prompt = urial_template(urial_prompt, history, message)
+    else:
+        messages = chat_template(history, message)
     # _model_name = "meta-llama/Llama-3-8b-hf"
     _model_name = model_name_mapping(model_name)
+    if api_key and len(api_key) == 64:
+        api_key = api_key
     else:
+        api_key = None
     # headers = request.headers
     # if already 24 hours passed, reset the counter
     if addr_limit_counter[host_addr] > 100:
         return "You have reached the limit of 100 requests for today. Please use your own API key."
+    if model_type == "base":
+        infer_request = openai_base_request(prompt=prompt, model=_model_name,
+                                    temperature=temperature,
+                                    max_tokens=max_tokens,
+                                    top_p=top_p,
+                                    repetition_penalty=rp,
+                                    stop=STOP_STRS, api_key=api_key)
+    else:
+        infer_request = openai_chat_request(messages=messages, model=_model_name,
+                                    temperature=temperature,
+                                    max_tokens=max_tokens,
+                                    top_p=top_p,
+                                    repetition_penalty=rp,
+                                    stop=STOP_STRS, api_key=api_key)
     addr_limit_counter[host_addr] += 1
     logging.info(f"Requesting chat completion from OpenAI API with model {_model_name}")
     logging.info(f"addr_limit_counter: {addr_limit_counter}; Last update time: {LAST_UPDATE_TIME};")
     response = ""
     for msg in infer_request:
         # print(msg.choices[0].delta.keys())
+        if hasattr(msg.choices[0], "delta"):
+            # Note: 'ChoiceDelta' object may or may not be not subscriptable
+            if "content" in msg.choices[0].delta:
+                token = msg.choices[0].delta["content"]
+            else:
+                token = msg.choices[0].delta.content
+        else:
+            token = msg.choices[0].text
+        if model_type == "base":
+            should_stop = False
+            for _stop in STOP_STRS:
+                if _stop in response + token:
+                    should_stop = True
+                    break
+            if should_stop:
                 break
+        if token is None:
+            continue
         response += token
+        if model_type == "base":
+            if response.endswith('\n"'):
+                response = response[:-1]
+            elif response.endswith('\n""'):
+                response = response[:-2]
+        yield history + [(message, response)]
+    # mega_hist[model_type].append((message, response))
+    # yield mega_hist[model_type]
+def load_models(base_model_name):
+    print(f"base_model_name={base_model_name}")
+    out_box = [gr.Chatbot(), gr.Chatbot(), gr.Dropdown()]
+    out_box[0] = (gr.update(label=f"Chat with Base LLM: {base_model_name}"))
+    aligned_model_name = BASE_TO_ALIGNED[base_model_name]
+    out_box[1] = (gr.update(label=f"Chat with Aligned LLM: {aligned_model_name}"))
+    out_box[2] = (gr.update(value=aligned_model_name, interactive=False))
+    return out_box[0], out_box[1], out_box[2]
+def clear_fn():
+    # mega_hist["base"] = []
+    # mega_hist["aligned"] = []
+    return None, None, None
+with gr.Blocks(gr.themes.Soft(), js=js_code_label) as demo:
+    api_key = gr.Textbox(label="🔑 APIKey", placeholder="Enter your Together/Hyperbolic API Key. Leave it blank to use our key with limited usage.", type="password", elem_id="api_key", visible=False)
+    gr.Markdown(HEADER_MD)
     with gr.Row():
+        chat_a = gr.Chatbot(height=500, label="Chat with Base LLMs via URIAL")
+        chat_b = gr.Chatbot(height=500, label="Chat with Aligned LLMs")
+    with gr.Group():
+        with gr.Row():
+            with gr.Column(scale=2):
+                message = gr.Textbox(label="Prompt", placeholder="Enter your message here")
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        with gr.Row():
+                            left_model_choice = gr.Dropdown(label="Base Model", choices=models, interactive=True)
+                            right_model_choice = gr.Textbox(label="Aligned Model", placeholder="xxx", visible=True)
+                        with gr.Row():
+                            btn = gr.Button("🚀 Chat")
+                        # gr.Markdown("---")
+                        with gr.Row():
+                            stop_btn = gr.Button("⏸️ Stop")
+                            clear_btn = gr.Button("🔁 Clear")
+                        with gr.Row():
+                            gr.Markdown("We thank for the support from [Hyperbolic AI](https://hyperbolic.xyz/).")
+            with gr.Column(scale=1):
+                with gr.Accordion("⚙️ Params for **Base** LLM", open=True):
+                    with gr.Row():
+                        max_tokens_1 = gr.Slider(label="Max new tokens", value=256, minimum=0, maximum=2048, step=16, interactive=True, visible=True)
+                        temperature_1 = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
+                    with gr.Row():
+                        top_p_1 = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
+                        rp_1 = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.1)
+                with gr.Accordion("⚙️ Params for **Aligned** LLM", open=True):
+                    with gr.Row():
+                        max_tokens_2 = gr.Slider(label="Max new tokens", value=256, minimum=0, maximum=2048, step=16, interactive=True, visible=True)
+                        temperature_2 = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
+                    with gr.Row():
+                        top_p_2 = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
+                        rp_2 = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.0)
+    left_model_choice.change(load_models, [left_model_choice], [chat_a, chat_b, right_model_choice])
+    model_type_left = gr.Textbox(visible=False, value="base")
+    model_type_right = gr.Textbox(visible=False, value="aligned")
+    go1 = btn.click(respond, [message, chat_a, max_tokens_1, temperature_1, top_p_1, rp_1, left_model_choice, model_type_left, api_key], chat_a)
+    go2 = btn.click(respond, [message, chat_b, max_tokens_2, temperature_2, top_p_2, rp_2, right_model_choice, model_type_right, api_key], chat_b)
+    stop_btn.click(None, None, None, cancels=[go1, go2])
+    clear_btn.click(clear_fn, None, [message, chat_a, chat_b])
 if __name__ == "__main__":
     demo.launch(show_api=False)

app_single.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import gradio as gr
+import os
+from typing import List
+import logging
+import urllib.request
+from utils import model_name_mapping, urial_template, openai_base_request
+from constant import js_code_label, HEADER_MD
+from openai import OpenAI
+import datetime
+# add logging info to console
+logging.basicConfig(level=logging.INFO)
+URIAL_VERSION = "inst_1k_v4.help"
+URIAL_URL = f"https://raw.githubusercontent.com/Re-Align/URIAL/main/urial_prompts/{URIAL_VERSION}.txt"
+urial_prompt = urllib.request.urlopen(URIAL_URL).read().decode('utf-8')
+urial_prompt = urial_prompt.replace("```", '"""') # new version of URIAL uses """ instead of ```
+STOP_STRS = ['"""', '# Query:', '# Answer:']
+addr_limit_counter = {}
+LAST_UPDATE_TIME = datetime.datetime.now()
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    max_tokens,
+    temperature,
+    top_p,
+    rp,
+    model_name,
+    api_key,
+    request:gr.Request
+):
+    global STOP_STRS, urial_prompt, LAST_UPDATE_TIME, addr_limit_counter
+    rp = 1.0
+    prompt = urial_template(urial_prompt, history, message)
+    # _model_name = "meta-llama/Llama-3-8b-hf"
+    _model_name = model_name_mapping(model_name)
+    if api_key and len(api_key) == 64:
+        api_key = api_key
+    else:
+        api_key = None
+    # headers = request.headers
+    # if already 24 hours passed, reset the counter
+    if datetime.datetime.now() - LAST_UPDATE_TIME > datetime.timedelta(days=1):
+        addr_limit_counter = {}
+        LAST_UPDATE_TIME = datetime.datetime.now()
+    host_addr = request.client.host
+    if host_addr not in addr_limit_counter:
+        addr_limit_counter[host_addr] = 0
+    if addr_limit_counter[host_addr] > 100:
+        return "You have reached the limit of 100 requests for today. Please use your own API key."
+    infer_request = openai_base_request(prompt=prompt, model=_model_name,
+                                   temperature=temperature,
+                                   max_tokens=max_tokens,
+                                   top_p=top_p,
+                                   repetition_penalty=rp,
+                                   stop=STOP_STRS, api_key=api_key)
+    addr_limit_counter[host_addr] += 1
+    logging.info(f"Requesting chat completion from OpenAI API with model {_model_name}")
+    logging.info(f"addr_limit_counter: {addr_limit_counter}; Last update time: {LAST_UPDATE_TIME};")
+    response = ""
+    for msg in infer_request:
+        # print(msg.choices[0].delta.keys())
+        if hasattr(msg.choices[0], "delta"):
+            token = msg.choices[0].delta["content"]
+        else:
+            token = msg.choices[0].text
+        should_stop = False
+        for _stop in STOP_STRS:
+            if _stop in response + token:
+                should_stop = True
+                break
+        if should_stop:
+            break
+        response += token
+        if response.endswith('\n"'):
+            response = response[:-1]
+        elif response.endswith('\n""'):
+            response = response[:-2]
+        yield response
+with gr.Blocks(gr.themes.Soft(), js=js_code_label) as demo:
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown(HEADER_MD)
+            model_name = gr.Radio(["Llama-3.1-405B-FP8", "Llama-3-70B", "Llama-3-8B",
+                                   "Mistral-7B-v0.1",
+                                   "Mixtral-8x22B", "Qwen1.5-72B", "Yi-34B", "Llama-2-7B", "Llama-2-70B", "OLMO"]
+                                  , value="Llama-3.1-405B-FP8", label="Base LLM name")
+        with gr.Column():
+            api_key = gr.Textbox(label="🔑 APIKey", placeholder="Enter your Together/Hyperbolic API Key. Leave it blank to use our key with limited usage.", type="password", elem_id="api_key", visible=False)
+            # with gr.Column():
+            with gr.Accordion("⚙️ Parameters for Base LLM", open=True):
+                with gr.Row():
+                    max_tokens = gr.Textbox(value=256, label="Max tokens")
+                    temperature = gr.Textbox(value=0.5, label="Temperature")
+                    top_p = gr.Textbox(value=0.9, label="Top-p")
+                    rp = gr.Textbox(value=1.1, label="Repetition penalty")
+    # with gr.Row():
+    chat = gr.ChatInterface(
+        respond,
+        additional_inputs=[max_tokens, temperature, top_p, rp, model_name, api_key],
+        # additional_inputs_accordion="⚙️ Parameters",
+        # fill_height=True,
+    )
+    chat.chatbot.label="Chat with Base LLMs via URIAL"
+    chat.chatbot.height = 550
+    chat.chatbot.show_copy_button = True
+if __name__ == "__main__":
+    demo.launch(show_api=False)

constant.py CHANGED Viewed

@@ -33,3 +33,57 @@ function addApiKeyLink() {
     }
 }
 """

     }
 }
 """
+MODELS = ["Llama-3.1-405B-FP8", "Llama-3-70B", "Llama-3-8B",
+            "Mistral-7B-v0.1",
+            "Mixtral-8x22B", "Qwen1.5-72B", "Yi-34B", "Llama-2-7B", "Llama-2-70B", "OLMo-7B"]
+HYPERBOLIC_MODELS = ["meta-llama/Meta-Llama-3.1-405B-FP8", "meta-llama/Meta-Llama-3.1-405B-Instruct"]
+BASE_TO_ALIGNED = {
+    "Llama-3-70B": "Llama-3-70B-Instruct",
+    "Llama-3-8B": "Llama-3-8B-Instruct",
+    "Mistral-7B-v0.1": "Mistral-7B-v0.1-Instruct",
+    "Mixtral-8x22B": "Mixtral-8x22B-Instruct",
+    "Qwen1.5-72B": "Qwen1.5-72B-Instruct",
+    "Llama-3.1-405B-FP8": "Llama-3.1-405B-FP8-Instruct",
+    "Yi-34B": "Yi-34B-chat",
+    "Llama-2-7B": "Llama-2-7B-chat",
+    "Llama-2-70B": "Llama-2-70B-chat",
+    "OLMo-7B": "OLMo-7B-Instruct",
+}
+MODEL_MAPPING = {
+    "Llama-3-8B": "meta-llama/Llama-3-8b-hf",
+    "Llama-3-70B": "meta-llama/Llama-3-70b-hf",
+    "Llama-2-7B": "meta-llama/Llama-2-7b-hf",
+    "Llama-2-70B": "meta-llama/Llama-2-70b-hf",
+    "Mistral-7B-v0.1": "mistralai/Mistral-7B-v0.1",
+    "Mixtral-8x22B": "mistralai/Mixtral-8x22B",
+    "Qwen1.5-72B": "Qwen/Qwen1.5-72B",
+    "Yi-34B": "zero-one-ai/Yi-34B",
+    "Yi-6B": "zero-one-ai/Yi-6B",
+    "OLMo-7B": "allenai/OLMo-7B",
+    "Llama-3.1-405B-FP8": "meta-llama/Meta-Llama-3.1-405B-FP8",
+    #  Aligned models below
+    "Llama-3-70B-Instruct": "meta-llama/Meta-Llama-3-70B-Instruct-Lite",
+    "Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct-Lite",
+    "Mistral-7B-v0.1-Instruct": "mistralai/Mistral-7B-Instruct-v0.1",
+    "Mixtral-8x22B-Instruct": "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "Qwen1.5-72B-Instruct": "Qwen/Qwen2-72B-Instruct",
+    "Yi-34B-chat": "zero-one-ai/Yi-34B-Chat",
+    "Llama-2-7B-chat": "meta-llama/Llama-2-7b-chat-hf",
+    "Llama-2-70B-chat": "meta-llama/Llama-2-70b-chat-hf",
+    "OLMo-7B-Instruct": "allenai/OLMo-7B-Instruct",
+    "Llama-3.1-405B-FP8-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
+}
+# import json
+# with open("together_model_ids.json", "r") as f:
+#     TOGETHER_MODEL_IDS = json.load(f)
+# for _, model_id in MODEL_MAPPING.items():
+#     if model_id not in TOGETHER_MODEL_IDS + HYPERBOLIC_MODELS:
+#         print(model_id)

list_models.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import requests
+import json
+import os
+url = "https://api.together.xyz/v1/models"
+headers = {
+    "accept": "application/json",
+    "Authorization": f"Bearer {os.getenv('TOGETHER_API_KEY')}"
+}
+response = requests.get(url, headers=headers)
+data = response.json()
+keywords = ["OLMO"]
+model_ids = []
+for item in data:
+    if any(keyword.lower() in item["id"].lower() for keyword in keywords):
+        print(item["id"])
+    model_ids.append(item["id"])
+with open("together_model_ids.json", "w") as f:
+    json.dump(model_ids, f, indent=4)

together_model_ids.json ADDED Viewed

	@@ -0,0 +1,179 @@

+[
+    "Nexusflow/NexusRaven-V2-13B",
+    "bert-base-uncased",
+    "WizardLM/WizardLM-13B-V1.2",
+    "codellama/CodeLlama-34b-Instruct-hf",
+    "google/gemma-7b",
+    "upstage/SOLAR-10.7B-Instruct-v1.0",
+    "zero-one-ai/Yi-34B",
+    "togethercomputer/StripedHyena-Hessian-7B",
+    "meta-llama/Llama-3-70b-chat-hf",
+    "teknium/OpenHermes-2-Mistral-7B",
+    "mistralai/Mixtral-8x7B-v0.1",
+    "WhereIsAI/UAE-Large-V1",
+    "hazyresearch/M2-BERT-2k-Retrieval-Encoder-V1",
+    "togethercomputer/Llama-2-7B-32K-Instruct",
+    "Undi95/ReMM-SLERP-L2-13B",
+    "meta-llama/Meta-Llama-Guard-3-8B",
+    "Undi95/Toppy-M-7B",
+    "Phind/Phind-CodeLlama-34B-v2",
+    "stabilityai/stable-diffusion-2-1",
+    "openchat/openchat-3.5-1210",
+    "Austism/chronos-hermes-13b",
+    "microsoft/phi-2",
+    "Qwen/Qwen1.5-0.5B",
+    "Qwen/Qwen1.5-1.8B",
+    "Qwen/Qwen1.5-4B",
+    "Qwen/Qwen1.5-7B",
+    "togethercomputer/m2-bert-80M-32k-retrieval",
+    "snorkelai/Snorkel-Mistral-PairRM-DPO",
+    "Qwen/Qwen1.5-7B-Chat",
+    "Qwen/Qwen1.5-14B",
+    "Qwen/Qwen1.5-14B-Chat",
+    "Qwen/Qwen1.5-72B",
+    "Qwen/Qwen1.5-1.8B-Chat",
+    "BAAI/bge-base-en-v1.5",
+    "Snowflake/snowflake-arctic-instruct",
+    "codellama/CodeLlama-13b-Python-hf",
+    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
+    "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "togethercomputer/m2-bert-80M-2k-retrieval",
+    "deepseek-ai/deepseek-coder-33b-instruct",
+    "codellama/CodeLlama-34b-Python-hf",
+    "NousResearch/Nous-Hermes-Llama2-13b",
+    "lmsys/vicuna-13b-v1.5",
+    "Qwen/Qwen1.5-0.5B-Chat",
+    "codellama/CodeLlama-70b-Python-hf",
+    "codellama/CodeLlama-7b-Instruct-hf",
+    "NousResearch/Nous-Hermes-2-Yi-34B",
+    "codellama/CodeLlama-13b-Instruct-hf",
+    "BAAI/bge-large-en-v1.5",
+    "togethercomputer/Llama-3-8b-chat-hf-int4",
+    "meta-llama/Llama-2-13b-hf",
+    "teknium/OpenHermes-2p5-Mistral-7B",
+    "NousResearch/Nous-Capybara-7B-V1p9",
+    "WizardLM/WizardCoder-Python-34B-V1.0",
+    "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+    "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
+    "togethercomputer/StripedHyena-Nous-7B",
+    "togethercomputer/alpaca-7b",
+    "garage-bAInd/Platypus2-70B-instruct",
+    "google/gemma-2b",
+    "google/gemma-2b-it",
+    "google/gemma-7b-it",
+    "meta-llama/Llama-2-7b-chat-hf",
+    "allenai/OLMo-7B",
+    "allenai/OLMo-7B-Instruct",
+    "Qwen/Qwen1.5-4B-Chat",
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    "Gryphe/MythoMax-L2-13b",
+    "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+    "meta-llama/LlamaGuard-2-8b",
+    "mistralai/Mistral-7B-Instruct-v0.1",
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    "meta-llama/Meta-Llama-3-8B",
+    "mistralai/Mistral-7B-v0.1",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+    "Open-Orca/Mistral-7B-OpenOrca",
+    "Qwen/Qwen1.5-32B",
+    "NousResearch/Nous-Hermes-llama-2-7b",
+    "Qwen/Qwen1.5-32B-Chat",
+    "mistralai/Mixtral-8x22B",
+    "Qwen/Qwen2-72B-Instruct",
+    "Qwen/Qwen1.5-72B-Chat",
+    "meta-llama/Meta-Llama-3-70B",
+    "meta-llama/Llama-3-8b-hf",
+    "deepseek-ai/deepseek-llm-67b-chat",
+    "sentence-transformers/msmarco-bert-base-dot-v5",
+    "zero-one-ai/Yi-6B",
+    "lmsys/vicuna-7b-v1.5",
+    "togethercomputer/m2-bert-80M-8k-retrieval",
+    "microsoft/WizardLM-2-8x22B",
+    "togethercomputer/Llama-3-8b-chat-hf-int8",
+    "wavymulder/Analog-Diffusion",
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    "Qwen/Qwen1.5-110B-Chat",
+    "runwayml/stable-diffusion-v1-5",
+    "prompthero/openjourney",
+    "meta-llama/Llama-2-7b-hf",
+    "SG161222/Realistic_Vision_V3.0_VAE",
+    "meta-llama/Llama-2-13b-chat-hf",
+    "google/gemma-2-27b-it",
+    "zero-one-ai/Yi-34B-Chat",
+    "meta-llama/Meta-Llama-3-70B-Instruct-Turbo",
+    "meta-llama/Meta-Llama-3-70B-Instruct-Lite",
+    "google/gemma-2-9b-it",
+    "google/gemma-2-9b",
+    "meta-llama/Llama-3-8b-chat-hf",
+    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "codellama/CodeLlama-70b-hf",
+    "togethercomputer/LLaMA-2-7B-32K",
+    "databricks/dbrx-instruct",
+    "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+    "meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
+    "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
+    "mistralai/Mixtral-8x22B-Instruct-v0.1",
+    "togethercomputer/evo-1-131k-base",
+    "meta-llama/Llama-2-70b-hf",
+    "codellama/CodeLlama-70b-Instruct-hf",
+    "meta-llama/Meta-Llama-3-8B-Instruct-Lite",
+    "togethercomputer/evo-1-8k-base",
+    "meta-llama/Llama-2-70b-chat-hf",
+    "codellama/CodeLlama-7b-Python-hf",
+    "Meta-Llama/Llama-Guard-7b",
+    "togethercomputer/Koala-7B",
+    "Qwen/Qwen2-1.5B-Instruct",
+    "Qwen/Qwen2-7B-Instruct",
+    "NousResearch/Nous-Hermes-13b",
+    "togethercomputer/guanaco-65b",
+    "togethercomputer/llama-2-7b",
+    "huggyllama/llama-7b",
+    "lmsys/vicuna-7b-v1.3",
+    "Qwen/Qwen2-72B",
+    "Phind/Phind-CodeLlama-34B-Python-v1",
+    "NumbersStation/nsql-llama-2-7B",
+    "NousResearch/Nous-Hermes-Llama2-70b",
+    "WizardLM/WizardLM-70B-V1.0",
+    "huggyllama/llama-65b",
+    "lmsys/vicuna-13b-v1.5-16k",
+    "HuggingFaceH4/zephyr-7b-beta",
+    "togethercomputer/llama-2-13b",
+    "togethercomputer/CodeLlama-7b-Instruct",
+    "togethercomputer/guanaco-13b",
+    "togethercomputer/CodeLlama-34b-Python",
+    "togethercomputer/CodeLlama-34b-Instruct",
+    "togethercomputer/CodeLlama-34b",
+    "togethercomputer/llama-2-70b",
+    "codellama/CodeLlama-13b-hf",
+    "Qwen/Qwen2-7B",
+    "Qwen/Qwen2-1.5B",
+    "togethercomputer/CodeLlama-13b-Instruct",
+    "togethercomputer/llama-2-13b-chat",
+    "lmsys/vicuna-13b-v1.3",
+    "huggyllama/llama-13b",
+    "huggyllama/llama-30b",
+    "togethercomputer/guanaco-33b",
+    "togethercomputer/Koala-13B",
+    "togethercomputer/llama-2-7b-chat",
+    "togethercomputer/SOLAR-10.7B-Instruct-v1.0-int4",
+    "togethercomputer/guanaco-7b",
+    "EleutherAI/llemma_7b",
+    "meta-llama/Meta-Llama-3-8B-Instruct",
+    "codellama/CodeLlama-34b-hf",
+    "meta-llama/Meta-Llama-3-70B-Instruct",
+    "meta-llama/Llama-3-70b-hf",
+    "togethercomputer/CodeLlama-7b-Python",
+    "NousResearch/Hermes-2-Theta-Llama-3-70B",
+    "carson/ml318bit",
+    "togethercomputer/CodeLlama-13b-Python",
+    "codellama/CodeLlama-7b-hf",
+    "togethercomputer/llama-2-70b-chat",
+    "carson/ml31405bit",
+    "carson/ml3170bit",
+    "carson/mlg38b",
+    "carson/ml318br",
+    "meta-llama/Meta-Llama-3.1-8B-Reference",
+    "gradientai/Llama-3-70B-Instruct-Gradient-1048k",
+    "meta-llama/Meta-Llama-3.1-70B-Instruct-Reference",
+    "meta-llama/Meta-Llama-3.1-70B-Reference"
+]

utils.py CHANGED Viewed

@@ -3,36 +3,15 @@ from openai import OpenAI
 import logging
 from typing import List
 import os
-BASE_URL = "https://api.together.xyz/v1"
-DEFAULT_API_KEY = os.getenv("TOGETHER_API_KEY")
 def model_name_mapping(model_name):
-    if model_name == "Llama-3-8B":
-        _model_name = "meta-llama/Llama-3-8b-hf"
-    elif model_name == "Llama-3-70B":
-        _model_name = "meta-llama/Llama-3-70b-hf"
-    elif model_name == "Llama-2-7B":
-        _model_name = "meta-llama/Llama-2-7b-hf"
-    elif model_name == "Llama-2-70B":
-        _model_name = "meta-llama/Llama-2-70b-hf"
-    elif model_name == "Mistral-7B-v0.1":
-        _model_name = "mistralai/Mistral-7B-v0.1"
-    elif model_name == "Mixtral-8x22B":
-        _model_name = "mistralai/Mixtral-8x22B"
-    elif model_name == "Qwen1.5-72B":
-        _model_name = "Qwen/Qwen1.5-72B"
-    elif model_name == "Yi-34B":
-        _model_name = "zero-one-ai/Yi-34B"
-    elif model_name == "Yi-6B":
-        _model_name = "zero-one-ai/Yi-6B"
-    elif model_name == "OLMO":
-        _model_name = "allenai/OLMo-7B"
-    elif model_name == "Qwen1.5-72B":
-        _model_name = "Qwen/Qwen1.5-72B"
     else:
-        raise ValueError("Invalid model name")
-    return _model_name
 def urial_template(urial_prompt, history, message):
@@ -41,7 +20,14 @@ def urial_template(urial_prompt, history, message):
         current_prompt += f'# Query:\n"""\n{user_msg}\n"""\n\n# Answer:\n"""\n{ai_msg}\n"""\n\n'
     current_prompt += f'# Query:\n"""\n{message}\n"""\n\n# Answer:\n"""\n'
     return current_prompt
 def openai_base_request(
     model: str=None,
@@ -54,11 +40,18 @@ def openai_base_request(
     stop: List[str]=None,
     api_key: str=None,
     ):
     if api_key is None:
         api_key = DEFAULT_API_KEY
-    client = OpenAI(api_key=api_key, base_url=BASE_URL)
-    # print(f"Requesting chat completion from OpenAI API with model {model}")
-    logging.info(f"Requesting chat completion from OpenAI API with model {model}")
     logging.info(f"Prompt: {prompt}")
     logging.info(f"Temperature: {temperature}")
     logging.info(f"Max tokens: {max_tokens}")
@@ -80,3 +73,44 @@ def openai_base_request(
     return request

 import logging
 from typing import List
 import os
+from constant import HYPERBOLIC_MODELS, MODEL_MAPPING
 def model_name_mapping(model_name):
+    model_mapping = MODEL_MAPPING
+    if model_name in model_mapping:
+        return model_mapping[model_name]
     else:
+        raise ValueError("Invalid model name:", model_name)
 def urial_template(urial_prompt, history, message):
         current_prompt += f'# Query:\n"""\n{user_msg}\n"""\n\n# Answer:\n"""\n{ai_msg}\n"""\n\n'
     current_prompt += f'# Query:\n"""\n{message}\n"""\n\n# Answer:\n"""\n'
     return current_prompt
+def chat_template(history, message):
+    messages = []
+    for user_msg, ai_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": ai_msg})
+    messages.append({"role": "user", "content": message})
+    return messages
 def openai_base_request(
     model: str=None,
     stop: List[str]=None,
     api_key: str=None,
     ):
+    if model in HYPERBOLIC_MODELS:
+        BASE_URL = "https://api.hyperbolic.xyz/v1"
+        DEFAULT_API_KEY = os.getenv("HYPERBOLIC_API_KEY")
+    else:
+        BASE_URL = "https://api.together.xyz/v1"
+        DEFAULT_API_KEY = os.getenv("TOGETHER_API_KEY")
     if api_key is None:
         api_key = DEFAULT_API_KEY
+    client = OpenAI(api_key=api_key, base_url=BASE_URL)
+    logging.info(f"Requesting base completion from OpenAI API with model {model}")
     logging.info(f"Prompt: {prompt}")
     logging.info(f"Temperature: {temperature}")
     logging.info(f"Max tokens: {max_tokens}")
     return request
+def openai_chat_request(
+    model: str=None,
+    temperature: float=0,
+    max_tokens: int=512,
+    top_p: float=1.0,
+    messages=None,
+    n: int=1,
+    repetition_penalty: float=1.0,
+    stop: List[str]=None,
+    api_key: str=None,
+    ):
+    if model in HYPERBOLIC_MODELS:
+        BASE_URL = "https://api.hyperbolic.xyz/v1"
+        DEFAULT_API_KEY = os.getenv("HYPERBOLIC_API_KEY")
+    else:
+        BASE_URL = "https://api.together.xyz/v1"
+        DEFAULT_API_KEY = os.getenv("TOGETHER_API_KEY")
+    if api_key is None:
+        api_key = DEFAULT_API_KEY
+    logging.info(f"Requesting chat completion from OpenAI API with model {model}")
+    client = OpenAI(api_key=api_key, base_url=BASE_URL)
+    request = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        temperature=float(temperature),
+        max_tokens=int(max_tokens),
+        top_p=float(top_p),
+        n=n,
+        extra_body={'repetition_penalty': float(repetition_penalty)},
+        stop=stop,
+        stream=True
+    )
+    return request