chat_llm_v3

Sleeping

App Files Files Community

daniloedu commited on Aug 4, 2023

Commit

94a4897

1 Parent(s): 8c5a3b3

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -30

app.py CHANGED Viewed

@@ -2,47 +2,37 @@ import os
 import requests
 import gradio as gr
 from dotenv import load_dotenv
-from transformers import AutoTokenizer
 load_dotenv()
-model_name = "tiiuae/falcon-7b-instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct"
 headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}
-def format_chat_prompt(message, instruction):
-    prompt = f"System:{instruction}\nUser: {message}\nAssistant:"
-    return prompt
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
     return response.json()
-def respond(message, instruction="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers."):
-    MAX_TOKENS = 1024  # limit for the model
-    prompt = format_chat_prompt(message, instruction)
-    # Check if the prompt is too long and, if so, truncate it
-    num_tokens = len(tokenizer.encode(prompt))
-    if num_tokens > MAX_TOKENS:
-        # Truncate the prompt to fit within the token limit
-        prompt = tokenizer.decode(tokenizer.encode(prompt)[-MAX_TOKENS:])
-    response = query({"inputs": prompt})
-    generated_text = response[0]['generated_text']
-    assistant_message = generated_text.split("Assistant:")[-1]
-    assistant_message = assistant_message.split("User:")[0].strip()  # Only keep the text before the first "User:"
-    return assistant_message
 iface = gr.Interface(
     respond,
-    inputs=[
-        gr.inputs.Textbox(label="Your question"),
-        gr.inputs.Textbox(label="System message", lines=2, default="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers.")
-    ],
     outputs=[
-        gr.outputs.Textbox(label="AI's response")
     ],
 )

 import requests
 import gradio as gr
 from dotenv import load_dotenv
 load_dotenv()
+API_URL_FALCON = "https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct"
+API_URL_GUANACO = "https://api-inference.huggingface.co/models/timdettmers/guanaco-33b-merged"
+API_URL_PYTHIA = "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
 headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}
+def query(api_url, payload):
+    response = requests.post(api_url, headers=headers, json=payload)
     return response.json()
+def respond(message):
+    response_falcon = query(API_URL_FALCON, {"inputs": message})
+    response_guanaco = query(API_URL_GUANACO, {"inputs": message})
+    response_pythia = query(API_URL_PYTHIA, {"inputs": message})
+    generated_text_falcon = response_falcon[0]['generated_text']
+    generated_text_guanaco = response_guanaco[0]['generated_text']
+    generated_text_pythia = response_pythia[0]['generated_text']
+    return generated_text_falcon, generated_text_guanaco, generated_text_pythia
 iface = gr.Interface(
     respond,
+    inputs=gr.inputs.Textbox(label="Prompt"),
     outputs=[
+        gr.outputs.Textbox(label="Falcon Response"),
+        gr.outputs.Textbox(label="Guanaco Response"),
+        gr.outputs.Textbox(label="Pythia Response")
     ],
 )