sotirios-slv commited on
Commit
73c11b0
β€’
1 Parent(s): 0d0810c

Switched template

Browse files
Files changed (2) hide show
  1. app.py +104 -151
  2. requirements.txt +1 -3
app.py CHANGED
@@ -1,158 +1,111 @@
1
  import gradio as gr
2
- import os
3
- import spaces
4
- from transformers import GemmaTokenizer, AutoModelForCausalLM
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
- from threading import Thread
7
-
8
- # Set an environment variable
9
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
-
11
-
12
- DESCRIPTION = """
13
- <div>
14
- <h1 style="text-align: center;">Meta Llama3 8B</h1>
15
- <p>This Space demonstrates the instruction-tuned model <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct"><b>Meta Llama3 8b Chat</b></a>. Meta Llama3 is the new open LLM and comes in two sizes: 8b and 70b. Feel free to play with it, or duplicate to run privately!</p>
16
- <p>πŸ”Ž For more details about the Llama3 release and how to use the model with <code>transformers</code>, take a look <a href="https://huggingface.co/blog/llama3">at our blog post</a>.</p>
17
- <p>πŸ¦• Looking for an even more powerful model? Check out the <a href="https://huggingface.co/chat/"><b>Hugging Chat</b></a> integration for Meta Llama 3 70b</p>
18
- </div>
19
- """
20
-
21
- LICENSE = """
22
- <p/>
23
- ---
24
- Built with Meta Llama 3
25
- """
26
-
27
- PLACEHOLDER = """
28
- <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
29
- <img src="./images/val_speaking_transparent.gif" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; ">
30
- <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Val</h1>
31
- <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Hi i'm Val, ask me anything about working for VPS...</p>
32
- </div>
33
- """
34
-
35
-
36
- css = """
37
- h1 {
38
- text-align: center;
39
- display: block;
40
- }
41
- #duplicate-button {
42
- margin: auto;
43
- color: white;
44
- background: #1565c0;
45
- border-radius: 100vh;
46
- }
47
- """
48
-
49
- # Load the tokenizer and model
50
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
51
- model = AutoModelForCausalLM.from_pretrained(
52
- "meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto"
53
- ) # to("cuda:0")
54
- terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]
55
-
56
-
57
- @spaces.GPU(duration=120)
58
- def chat_llama3_8b(
59
- message: str, history: list, temperature: float, max_new_tokens: int
60
- ) -> str:
61
- """
62
- Generate a streaming response using the llama3-8b model.
63
- Args:
64
- message (str): The input message.
65
- history (list): The conversation history used by ChatInterface.
66
- temperature (float): The temperature for generating the response.
67
- max_new_tokens (int): The maximum number of new tokens to generate.
68
- Returns:
69
- str: The generated response.
70
- """
71
- conversation = []
72
- for user, assistant in history:
73
- conversation.extend(
74
- [
75
- {"role": "user", "content": user},
76
- {"role": "assistant", "content": assistant},
77
- ]
78
- )
79
- conversation.append({"role": "user", "content": message})
80
-
81
- input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(
82
- model.device
83
  )
84
-
85
- streamer = TextIteratorStreamer(
86
- tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  )
88
 
89
- generate_kwargs = dict(
90
- input_ids=input_ids,
91
- streamer=streamer,
92
- max_new_tokens=max_new_tokens,
93
- do_sample=True,
94
- temperature=temperature,
95
- eos_token_id=terminators,
 
 
 
 
 
 
 
 
 
 
96
  )
97
- # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
98
- if temperature == 0:
99
- generate_kwargs["do_sample"] = False
100
-
101
- t = Thread(target=model.generate, kwargs=generate_kwargs)
102
- t.start()
103
-
104
- outputs = []
105
- for text in streamer:
106
- outputs.append(text)
107
- # print(outputs)
108
- yield "".join(outputs)
109
-
110
-
111
- # Gradio block
112
- chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label="Gradio ChatInterface")
113
-
114
- with gr.Blocks(fill_height=True, css=css) as demo:
115
-
116
- gr.Markdown(DESCRIPTION)
117
- gr.DuplicateButton(
118
- value="Duplicate Space for private use", elem_id="duplicate-button"
119
  )
120
- gr.ChatInterface(
121
- fn=chat_llama3_8b,
122
- chatbot=chatbot,
123
- fill_height=True,
124
- additional_inputs_accordion=gr.Accordion(
125
- label="βš™οΈ Parameters", open=False, render=False
126
- ),
127
- additional_inputs=[
128
- gr.Slider(
129
- minimum=0,
130
- maximum=1,
131
- step=0.1,
132
- value=0.95,
133
- label="Temperature",
134
- render=False,
135
- ),
136
- gr.Slider(
137
- minimum=128,
138
- maximum=4096,
139
- step=1,
140
- value=512,
141
- label="Max new tokens",
142
- render=False,
143
- ),
144
- ],
145
- examples=[
146
- ["Where is the nearest ."],
147
- ["Tell me about working for the Victorian Public Sector."],
148
- ["How do I book leave?"],
149
- ["Tell me about my organisations Disability Network"],
150
- [""],
151
- ],
152
- cache_examples=False,
153
- )
154
-
155
- gr.Markdown(LICENSE)
156
 
157
- if __name__ == "__main__":
158
- demo.launch()
 
1
  import gradio as gr
2
+ from http import HTTPStatus
3
+ import dashscope
4
+ from dashscope import Generation
5
+ from dashscope.api_entities.dashscope_response import Role
6
+ from typing import List, Optional, Tuple, Dict
7
+ from urllib.error import HTTPError
8
+
9
+ default_system = "You are a helpful assistant."
10
+
11
+ YOUR_API_TOKEN = os.getenv("YOUR_API_TOKEN")
12
+ dashscope.api_key = YOUR_API_TOKEN
13
+
14
+ History = List[Tuple[str, str]]
15
+ Messages = List[Dict[str, str]]
16
+
17
+
18
+ def clear_session() -> History:
19
+ return "", []
20
+
21
+
22
+ def modify_system_session(system: str) -> str:
23
+ if system is None or len(system) == 0:
24
+ system = default_system
25
+ return system, system, []
26
+
27
+
28
+ def history_to_messages(history: History, system: str) -> Messages:
29
+ messages = [{"role": Role.SYSTEM, "content": system}]
30
+ for h in history:
31
+ messages.append({"role": Role.USER, "content": h[0]})
32
+ messages.append({"role": Role.ASSISTANT, "content": h[1]})
33
+ return messages
34
+
35
+
36
+ def messages_to_history(messages: Messages) -> Tuple[str, History]:
37
+ assert messages[0]["role"] == Role.SYSTEM
38
+ system = messages[0]["content"]
39
+ history = []
40
+ for q, r in zip(messages[1::2], messages[2::2]):
41
+ history.append([q["content"], r["content"]])
42
+ return system, history
43
+
44
+
45
+ def model_chat(
46
+ query: Optional[str], history: Optional[History], system: str
47
+ ) -> Tuple[str, str, History]:
48
+ if query is None:
49
+ query = ""
50
+ if history is None:
51
+ history = []
52
+ messages = history_to_messages(history, system)
53
+ messages.append({"role": Role.USER, "content": query})
54
+ gen = Generation.call(
55
+ model="qwen1.5-72b-chat",
56
+ messages=messages,
57
+ result_format="message",
58
+ stream=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  )
60
+ for response in gen:
61
+ if response.status_code == HTTPStatus.OK:
62
+ role = response.output.choices[0].message.role
63
+ response = response.output.choices[0].message.content
64
+ system, history = messages_to_history(
65
+ messages + [{"role": role, "content": response}]
66
+ )
67
+ yield "", history, system
68
+ else:
69
+ raise HTTPError(
70
+ "Request id: %s, Status code: %s, error code: %s, error message: %s"
71
+ % (
72
+ response.request_id,
73
+ response.status_code,
74
+ response.code,
75
+ response.message,
76
+ )
77
+ )
78
+
79
+
80
+ with gr.Blocks() as demo:
81
+ gr.Markdown("""<center><font size=8>Qwen1.5-72B-Chat</center>""")
82
+ gr.Markdown(
83
+ """<center><font size=4>Qwen1.5-72B-Chat is the 72-billion parameter chat model of the Qwen series.</center>"""
84
  )
85
 
86
+ with gr.Row():
87
+ with gr.Column(scale=3):
88
+ system_input = gr.Textbox(value=default_system, lines=1, label="System")
89
+ with gr.Column(scale=1):
90
+ modify_system = gr.Button("πŸ› οΈ Set system prompt and clear history.", scale=2)
91
+ system_state = gr.Textbox(value=default_system, visible=False)
92
+ chatbot = gr.Chatbot(label="Qwen1.5-72B-Chat")
93
+ textbox = gr.Textbox(lines=2, label="Input")
94
+
95
+ with gr.Row():
96
+ clear_history = gr.Button("🧹 Clear history")
97
+ sumbit = gr.Button("πŸš€ Send")
98
+
99
+ sumbit.click(
100
+ model_chat,
101
+ inputs=[textbox, chatbot, system_state],
102
+ outputs=[textbox, chatbot, system_input],
103
  )
104
+ clear_history.click(fn=clear_session, inputs=[], outputs=[textbox, chatbot])
105
+ modify_system.click(
106
+ fn=modify_system_session,
107
+ inputs=[system_input],
108
+ outputs=[system_state, system_input, chatbot],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ demo.queue(api_open=False).launch(max_threads=10, height=800, share=False)
 
requirements.txt CHANGED
@@ -1,3 +1 @@
1
- accelerate
2
- transformers
3
- SentencePiece
 
1
+ dashscope