jacobwilsonx commited on
Commit
4bc038a
1 Parent(s): 9b63933

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ import gradio as gr
3
+
4
+ model = 'HuggingFaceH4/zephyr-7b-alpha'
5
+
6
+ client = InferenceClient(model)
7
+
8
+ def format_prompt(message, history):
9
+ prompt = "<s>"
10
+ for user_prompt, bot_response in history:
11
+ prompt += f"[INST] {user_prompt} [/INST]"
12
+ prompt += f" {bot_response}</s> "
13
+ prompt += f"[INST] {message} [/INST]"
14
+ return prompt
15
+
16
+ def generate(
17
+ prompt, history, temperature=0.3, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
18
+ ):
19
+ temperature = float(temperature)
20
+ if temperature < 1e-2:
21
+ temperature = 1e-2
22
+ top_p = float(top_p)
23
+
24
+ generate_kwargs = dict(
25
+ temperature=temperature,
26
+ max_new_tokens=max_new_tokens,
27
+ top_p=top_p,
28
+ repetition_penalty=repetition_penalty,
29
+ do_sample=True,
30
+ seed=42,
31
+ )
32
+
33
+ formatted_prompt = format_prompt(prompt, history)
34
+
35
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
36
+ output = ""
37
+
38
+ for response in stream:
39
+ output += response.token.text
40
+ yield output
41
+ return output
42
+
43
+ demo = gr.ChatInterface(fn=generate,
44
+ title="Zephyr-7B-Alpha Chat",
45
+ retry_btn=None,
46
+ undo_btn=None,
47
+ clear_btn=None
48
+ )
49
+
50
+ demo.queue().launch()