wop commited on
Commit
1cb6527
1 Parent(s): b81c54e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -33
app.py CHANGED
@@ -1,63 +1,67 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
  client = InferenceClient("wop/kosmox")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
- message,
12
  history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
  ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
 
28
  response = ""
29
 
 
30
  for message in client.chat_completion(
31
- messages,
32
  max_tokens=max_tokens,
33
  stream=True,
34
  temperature=temperature,
35
  top_p=top_p,
36
  ):
37
  token = message.choices[0].delta.content
38
-
39
  response += token
40
  yield response
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
  demo = gr.ChatInterface(
46
- respond,
47
  additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
  ],
59
  )
60
 
61
-
62
  if __name__ == "__main__":
63
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from unsloth.chat_templates import get_chat_template
4
 
5
+ # Initialize the InferenceClient with the appropriate model
 
 
6
  client = InferenceClient("wop/kosmox")
7
 
8
+ # Define the chat template and tokenizer configuration
9
+ tokenizer = get_chat_template(
10
+ tokenizer=None, # Assuming you need to pass an actual tokenizer here
11
+ chat_template="phi-3",
12
+ mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
13
+ )
14
+
15
+ def format_messages(system_message, history, user_message):
16
+ # Create a formatted string according to the specified chat template
17
+ formatted_message = "<s>\n"
18
+ if system_message:
19
+ formatted_message += f"{system_message}\n"
20
+
21
+ for user_msg, assistant_msg in history:
22
+ if user_msg:
23
+ formatted_message += f"{user_msg}\n"
24
+ if assistant_msg:
25
+ formatted_message += f"{assistant_msg}\n"
26
+
27
+ formatted_message += f"{user_message}\n"
28
+ return formatted_message
29
 
30
  def respond(
31
+ message: str,
32
  history: list[tuple[str, str]],
33
+ system_message: str,
34
+ max_tokens: int,
35
+ temperature: float,
36
+ top_p: float,
37
  ):
38
+ # Format the messages
39
+ formatted_message = format_messages(system_message, history, message)
 
 
 
 
 
 
 
40
 
41
  response = ""
42
 
43
+ # Stream the response from the model
44
  for message in client.chat_completion(
45
+ formatted_message,
46
  max_tokens=max_tokens,
47
  stream=True,
48
  temperature=temperature,
49
  top_p=top_p,
50
  ):
51
  token = message.choices[0].delta.content
 
52
  response += token
53
  yield response
54
 
55
+ # Define the Gradio interface
 
 
56
  demo = gr.ChatInterface(
57
+ fn=respond,
58
  additional_inputs=[
59
+ gr.Textbox(value="You are AI.", label="System message"),
60
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
61
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
62
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
63
  ],
64
  )
65
 
 
66
  if __name__ == "__main__":
67
+ demo.launch()