|
import requests |
|
import os |
|
import gradio as gr |
|
import time |
|
|
|
API_TOKEN = os.environ['HF_TOKEN'] |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/microsoft/Phi-3-mini-4k-instruct" |
|
headers = {"Authorization": f"Bearer {API_TOKEN}"} |
|
|
|
def query(payload): |
|
response = requests.post(API_URL, headers=headers, json=payload,timeout=120) |
|
return response.json() |
|
|
|
|
|
|
|
|
|
|
|
def chat(message,history): |
|
chat_history = '' |
|
for chats in history: |
|
|
|
chat_history += chats[1] |
|
|
|
prompt = f"<|user|>\n{message}<|end|>\n<|assistant|>" |
|
user_input = chat_history + prompt |
|
inp_dict = {"inputs":user_input, |
|
"parameters": {"max_new_tokens":2000}} |
|
output = query(inp_dict) |
|
try: |
|
output_text = output[0]['generated_text'] |
|
formatted_assistant_msg = output_text.replace(chat_history,'').strip().removesuffix('<|end|>') |
|
except: |
|
if type(output) == dict: |
|
formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and keys of output are: {output.keys()}" |
|
else: |
|
formatted_assistant_msg = f"Error has occured, type of output is {type(output)} and length of output is: {len(output)}" |
|
|
|
|
|
|
|
return formatted_assistant_msg |
|
|
|
demo = gr.ChatInterface(chat) |
|
|
|
if __name__ == '__main__': |
|
demo.launch() |