MaziyarPanahi commited on
Commit
2d8310a
โ€ข
1 Parent(s): dabe47b

Add application file

Browse files
Files changed (2) hide show
  1. README.md +6 -5
  2. app.py +215 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
  title: Chat With Phi 2
3
- emoji: ๐Ÿ˜ป
4
- colorFrom: green
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 4.25.0
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Chat With Phi 2
3
+ emoji: ๐Ÿš€
4
+ colorFrom: red
5
+ colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.22.0
8
  app_file: app.py
9
+ pinned: true
10
+ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import requests
4
+ import json
5
+ import os
6
+ from urllib3.util.retry import Retry
7
+ from requests.adapters import HTTPAdapter
8
+
9
+ API_URL = os.getenv("API_URL")
10
+ API_KEY = os.getenv("API_KEY")
11
+
12
+ print(f"API_URL: {API_URL}")
13
+ print(f"API_KEY: {API_KEY}")
14
+
15
+ url = f"{API_URL}/v1/chat/completions"
16
+
17
+ # The headers for the HTTP request
18
+ headers = {
19
+ "accept": "application/json",
20
+ "Content-Type": "application/json",
21
+ "Authorization": f"Bearer {API_KEY}",
22
+ }
23
+
24
+
25
+ def is_valid_json(data):
26
+ try:
27
+ parsed_data = json.loads(data)
28
+ return True, parsed_data
29
+ except ValueError as e:
30
+ return False, str(e)
31
+
32
+
33
+ with gr.Blocks() as demo:
34
+
35
+ markup = gr.Markdown(
36
+ """
37
+ # Phi-2
38
+ This is a demo of the Phi-2 quantized model in GGUF (phi-2.Q5_K_M.gguf) hosted on K8s cluster.
39
+
40
+ The original models can be found [MaziyarPanahi/MaziyarPanahi/phi-2-GGUF](https://huggingface.co/MaziyarPanahi/phi-2-GGUF)"""
41
+ )
42
+ chatbot = gr.Chatbot(height=500)
43
+ msg = gr.Textbox(lines=1, label="User Message")
44
+ clear = gr.Button("Clear")
45
+ with gr.Row():
46
+
47
+ with gr.Column(scale=2):
48
+ system_prompt_input = gr.Textbox(
49
+ label="System Prompt",
50
+ placeholder="Type system prompt here...",
51
+ value="You are a helpful assistant.",
52
+ )
53
+ temperature_input = gr.Slider(
54
+ label="Temperature", minimum=0.0, maximum=1.0, value=0.9, step=0.01
55
+ )
56
+ max_new_tokens_input = gr.Slider(
57
+ label="Max New Tokens", minimum=0, maximum=1024, value=256, step=1
58
+ )
59
+
60
+ with gr.Column(scale=2):
61
+ top_p_input = gr.Slider(
62
+ label="Top P", minimum=0.0, maximum=1.0, value=0.95, step=0.01
63
+ )
64
+ top_k_input = gr.Slider(
65
+ label="Top K", minimum=1, maximum=100, value=50, step=1
66
+ )
67
+ repetition_penalty_input = gr.Slider(
68
+ label="Repetition Penalty",
69
+ minimum=1.0,
70
+ maximum=2.0,
71
+ value=1.1,
72
+ step=0.01,
73
+ )
74
+
75
+ def update_globals(
76
+ system_prompt, temperature, max_new_tokens, top_p, top_k, repetition_penalty
77
+ ):
78
+ global global_system_prompt, global_temperature, global_max_new_tokens, global_top_p, global_repetition_penalty, global_top_k
79
+ global_system_prompt = system_prompt
80
+ global_temperature = temperature
81
+ global_max_new_tokens = max_new_tokens
82
+ global_top_p = top_p
83
+ global_top_k = top_k
84
+ global_repetition_penalty = repetition_penalty
85
+
86
+ def user(user_message, history):
87
+ return "", history + [[user_message, None]]
88
+
89
+ def bot(
90
+ history,
91
+ system_prompt,
92
+ temperature,
93
+ max_new_tokens,
94
+ top_p,
95
+ top_k,
96
+ repetition_penalty,
97
+ ):
98
+ print(f"History in bot: {history}")
99
+ print(f"System Prompt: {system_prompt}")
100
+ print(f"Temperature: {temperature}")
101
+ print(f"Max New Tokens: {max_new_tokens}")
102
+ print(f"Top P: {top_p}")
103
+ print(f"Top K: {top_k}")
104
+ print(f"Repetition Penalty: {repetition_penalty}")
105
+
106
+ history_messages = [{"content": h[0], "role": "user"} for h in history if h[0]]
107
+ history[-1][1] = ""
108
+ sys_msg = [
109
+ {
110
+ "content": (
111
+ system_prompt if system_prompt else "You are a helpful assistant."
112
+ ),
113
+ "role": "system",
114
+ }
115
+ ]
116
+ history_messages = sys_msg + history_messages
117
+ print(history_messages)
118
+
119
+ # Create a session object
120
+ session = requests.Session()
121
+
122
+ # Define the retry strategy
123
+ retries = Retry(
124
+ total=5, # Total number of retries to allow
125
+ backoff_factor=1, # A backoff factor to apply between attempts
126
+ status_forcelist=[
127
+ 500,
128
+ 502,
129
+ 503,
130
+ 504,
131
+ ], # A set of HTTP status codes that we should force a retry on
132
+ allowed_methods=[
133
+ "HEAD",
134
+ "GET",
135
+ "OPTIONS",
136
+ "POST",
137
+ ], # HTTP methods to retry on
138
+ )
139
+ data = {
140
+ "messages": history_messages,
141
+ "stream": True,
142
+ "temprature": temperature,
143
+ "top_k": top_k,
144
+ "top_p": top_p,
145
+ "seed": 42,
146
+ "repeat_penalty": repetition_penalty,
147
+ "chat_format": "mistral-instruct",
148
+ "max_tokens": max_new_tokens,
149
+ # "response_format": {
150
+ # "type": "json_object",
151
+ # },
152
+ }
153
+
154
+ # Mount it for http usage
155
+ session.mount("http://", HTTPAdapter(max_retries=retries))
156
+
157
+ # Making the POST request with increased timeout and retry logic
158
+ try:
159
+ response = session.post(
160
+ url,
161
+ headers=headers,
162
+ data=json.dumps(data),
163
+ stream=True,
164
+ timeout=(10, 30),
165
+ )
166
+ if response.status_code == 200:
167
+ for line in response.iter_lines():
168
+ # Filter out keep-alive new lines
169
+ if line:
170
+ data = line.decode("utf-8").lstrip("data: ")
171
+ # Check if the examples are valid
172
+ valid_check = is_valid_json(data)
173
+ if valid_check[0]:
174
+ try:
175
+ # Attempt to parse the JSON dataa
176
+ # json_data = json.loads(data)
177
+ json_data = valid_check[1]
178
+
179
+ delta_content = (
180
+ json_data.get("choices", [{}])[0]
181
+ .get("delta", {})
182
+ .get("content", "")
183
+ )
184
+
185
+ if delta_content: # Ensure there's content to print
186
+ history[-1][1] += delta_content
187
+ time.sleep(0.05)
188
+ yield history
189
+ except json.JSONDecodeError as e:
190
+ print(f"Error decoding JSON: {e} date: {data}")
191
+ except requests.exceptions.RequestException as e:
192
+ print(f"An error occurred: {e}")
193
+
194
+ msg.submit(
195
+ user, [msg, chatbot], [msg, chatbot], queue=True, concurrency_limit=10
196
+ ).then(
197
+ bot,
198
+ inputs=[
199
+ chatbot,
200
+ system_prompt_input,
201
+ temperature_input,
202
+ max_new_tokens_input,
203
+ top_p_input,
204
+ top_k_input,
205
+ repetition_penalty_input,
206
+ ],
207
+ outputs=chatbot,
208
+ )
209
+
210
+ clear.click(lambda: None, None, chatbot, queue=False)
211
+
212
+
213
+ demo.queue(default_concurrency_limit=20, max_size=20, api_open=False)
214
+ if __name__ == "__main__":
215
+ demo.launch(show_api=False, share=False)