Shriharsh commited on
Commit
1af4ceb
β€’
1 Parent(s): 37db6a9
Files changed (1) hide show
  1. app.py +97 -0
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ import gradio as gr
3
+
4
+ client = InferenceClient(
5
+ "BioMistral/BioMistral-7B"
6
+ )
7
+
8
+ def format_prompt(message, history):
9
+ prompt = "<s>"
10
+ for user_prompt, bot_response in history:
11
+ prompt += f"[INST] {user_prompt} [/INST]"
12
+ prompt += f" {bot_response}</s> "
13
+ prompt += f"[INST] {message} [/INST]"
14
+ return prompt
15
+
16
+ def generate(
17
+ prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
18
+ ):
19
+ temperature = float(temperature)
20
+ if temperature < 1e-2:
21
+ temperature = 1e-2
22
+ top_p = float(top_p)
23
+
24
+ generate_kwargs = dict(
25
+ temperature=temperature,
26
+ max_new_tokens=max_new_tokens,
27
+ top_p=top_p,
28
+ repetition_penalty=repetition_penalty,
29
+ do_sample=True,
30
+ seed=42,
31
+ )
32
+
33
+ formatted_prompt = format_prompt(prompt, history)
34
+
35
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
36
+ output = ""
37
+
38
+ for response in stream:
39
+ output += response.token.text
40
+ yield output
41
+ return output
42
+
43
+
44
+ additional_inputs=[
45
+ gr.Slider(
46
+ label="Temperature",
47
+ value=0.9,
48
+ minimum=0.0,
49
+ maximum=1.0,
50
+ step=0.05,
51
+ interactive=True,
52
+ info="Higher values produce more diverse outputs",
53
+ ),
54
+ gr.Slider(
55
+ label="Max new tokens",
56
+ value=512,
57
+ minimum=0,
58
+ maximum=1048,
59
+ step=64,
60
+ interactive=True,
61
+ info="The maximum numbers of new tokens",
62
+ ),
63
+ gr.Slider(
64
+ label="Top-p (nucleus sampling)",
65
+ value=0.90,
66
+ minimum=0.0,
67
+ maximum=1,
68
+ step=0.05,
69
+ interactive=True,
70
+ info="Higher values sample more low-probability tokens",
71
+ ),
72
+ gr.Slider(
73
+ label="Repetition penalty",
74
+ value=1.2,
75
+ minimum=1.0,
76
+ maximum=2.0,
77
+ step=0.05,
78
+ interactive=True,
79
+ info="Penalize repeated tokens",
80
+ )
81
+ ]
82
+
83
+ # Create a Chatbot object with the desired height
84
+ chatbot = gr.Chatbot(height=450,
85
+ layout="bubble")
86
+
87
+ with gr.Blocks() as demo:
88
+ gr.HTML("<h1><center>πŸ€– Mistral-7B-Chat πŸ’¬<h1><center>")
89
+ gr.ChatInterface(
90
+ generate,
91
+ chatbot=chatbot, # Use the created Chatbot object
92
+ additional_inputs=additional_inputs,
93
+ examples=[["Give me the code for Binary Search in C++"], ["Explain the chapter of The Grand Inquistor from The Brothers Karmazov"]],
94
+
95
+ )
96
+
97
+ demo.queue().launch(debug=True)