Tomoniai commited on
Commit
76ce008
1 Parent(s): 51c0647

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ import gradio as gr
3
+ import random
4
+
5
+ client = InferenceClient("google/gemma-2b-it")
6
+
7
+ def format_prompt(message, history):
8
+ prompt = ""
9
+ if history:
10
+ for user_prompt, bot_response in history:
11
+ prompt += f"<start_of_turn>user{user_prompt}<end_of_turn>"
12
+ prompt += f"<start_of_turn>model{bot_response}"
13
+ prompt += f"<start_of_turn>user{message}<end_of_turn><start_of_turn>model"
14
+ return prompt
15
+
16
+
17
+ def generate(prompt, history, temperature=0.7, max_new_tokens=1024, top_p=0.90, repetition_penalty=0.9):
18
+ temperature = float(temperature)
19
+ if temperature < 1e-2:
20
+ temperature = 1e-2
21
+ top_p = float(top_p)
22
+
23
+ if not history:
24
+ history = []
25
+
26
+ rand_seed = random.randint(1, 1111111111111111)
27
+
28
+ generate_kwargs = dict(
29
+ temperature=temperature,
30
+ max_new_tokens=max_new_tokens,
31
+ top_p=top_p,
32
+ repetition_penalty=repetition_penalty,
33
+ do_sample=True,
34
+ seed=rand_seed,
35
+ )
36
+
37
+ formatted_prompt = format_prompt(prompt, history)
38
+
39
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
40
+ output = ""
41
+
42
+ for response in stream:
43
+ output += response.token.text
44
+ yield output
45
+ history.append((prompt, output))
46
+ return output
47
+
48
+
49
+ mychatbot = gr.Chatbot(
50
+ avatar_images=["./user.png", "./botgm.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
51
+
52
+ additional_inputs=[
53
+ gr.Slider(
54
+ label="Temperature",
55
+ value=0.7,
56
+ minimum=0.0,
57
+ maximum=1.0,
58
+ step=0.01,
59
+ interactive=True,
60
+ info="Higher values generate more diverse outputs",
61
+ ),
62
+ gr.Slider(
63
+ label="Max new tokens",
64
+ value=6400,
65
+ minimum=0,
66
+ maximum=8000,
67
+ step=64,
68
+ interactive=True,
69
+ info="The maximum numbers of new tokens",
70
+ ),
71
+ gr.Slider(
72
+ label="Top-p",
73
+ value=0.90,
74
+ minimum=0.0,
75
+ maximum=1,
76
+ step=0.01,
77
+ interactive=True,
78
+ info="Higher values sample more low-probability tokens",
79
+ ),
80
+ gr.Slider(
81
+ label="Repetition penalty",
82
+ value=1.0,
83
+ minimum=0.1,
84
+ maximum=2.0,
85
+ step=0.1,
86
+ interactive=True,
87
+ info="Penalize repeated tokens",
88
+ )
89
+ ]
90
+
91
+ iface = gr.ChatInterface(fn=generate,
92
+ chatbot=mychatbot,
93
+ additional_inputs=additional_inputs,
94
+ retry_btn=None,
95
+ undo_btn=None
96
+ )
97
+
98
+ with gr.Blocks() as demo:
99
+ gr.HTML("<center><h1>Tomoniai's Chat with Google's Gemma</h1></center>")
100
+ iface.render()
101
+
102
+ demo.queue().launch(show_api=False)