fxmarty commited on
Commit
d6e5fcb
1 Parent(s): 384f0b8
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ import time
4
+
5
+ from huggingface_hub import InferenceClient
6
+ from transformers import AutoTokenizer
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
9
+
10
+ client = InferenceClient(model="https://770c-20-63-4-233.ngrok-free.app")
11
+
12
+ SYSTEM_COMMAND = {"role": "system", "content": "Context: date: Monday 20th May 2024; location: NYC; running on: 8 AMD Instinct MI300 GPU; model name: Llama 70B. Only provide these information if asked. You are a knowledgeable assistant trained to provide accurate and helpful information. Please respond to the user's queries promptly and politely."}
13
+
14
+ IGNORED_TOKENS = {None, "<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"}
15
+ STOP_TOKENS = ["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"]
16
+
17
+ with gr.Blocks() as demo:
18
+ tfs_history = gr.State([SYSTEM_COMMAND])
19
+ chatbot = gr.Chatbot()
20
+ msg = gr.Textbox()
21
+ clear = gr.Button("Clear")
22
+
23
+ def user(user_message, history, dict_history):
24
+ data = {"role": "user", "content": user_message}
25
+ dict_history.append(data)
26
+ return "", history + [[user_message, None]], dict_history
27
+
28
+ def bot(history, dict_history):
29
+ history[-1][1] = ""
30
+ response = {"role": "assistant", "content": ""}
31
+ start_tokenize = time.perf_counter()
32
+ text_input = tokenizer.apply_chat_template(dict_history, tokenize=False, add_generation_prompt=True)
33
+ end_tokenize = time.perf_counter()
34
+
35
+ try:
36
+ for token in client.text_generation(prompt=text_input, max_new_tokens=100, stop_sequences=STOP_TOKENS, stream=True):
37
+ if token not in IGNORED_TOKENS:
38
+ history[-1][1] += token
39
+ response["content"] += token
40
+ yield history
41
+ finally:
42
+ dict_history.append(response)
43
+
44
+ msg.submit(
45
+ user,
46
+ inputs=[msg, chatbot, tfs_history],
47
+ outputs=[msg, chatbot, tfs_history],
48
+ queue=False).then(
49
+ bot,
50
+ [chatbot, tfs_history],
51
+ chatbot
52
+ )
53
+ clear.click(lambda: None, None, chatbot, queue=False)
54
+
55
+ demo.queue()
56
+ demo.launch()