Files changed (1) hide show
  1. app.py +46 -50
app.py CHANGED
@@ -1,41 +1,38 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import random
4
- models=[
 
5
  "google/gemma-7b",
6
  "google/gemma-7b-it",
7
  "google/gemma-2b",
8
  "google/gemma-2b-it"
9
  ]
10
- clients=[
11
- InferenceClient(models[0]),
12
- InferenceClient(models[1]),
13
- InferenceClient(models[2]),
14
- InferenceClient(models[3]),
 
15
  ]
 
16
  def format_prompt(message, history):
17
  prompt = ""
18
  if history:
19
- #<start_of_turn>userHow does the brain work?<end_of_turn><start_of_turn>model
20
  for user_prompt, bot_response in history:
21
- prompt += f"<start_of_turn>user{user_prompt}<end_of_turn>"
22
- prompt += f"<start_of_turn>model{bot_response}"
23
- prompt += f"<start_of_turn>user{message}<end_of_turn><start_of_turn>model"
24
  return prompt
25
 
26
-
27
-
28
- def chat_inf(system_prompt,prompt,history,client_choice,seed,temp,tokens,top_p,rep_p):
29
- #token max=8192
30
- client=clients[int(client_choice)-1]
31
  if not history:
32
  history = []
33
- hist_len=0
34
  if history:
35
- hist_len=len(history)
36
- print(hist_len)
37
-
38
- #seed = random.randint(1,1111111111111111)
39
  generate_kwargs = dict(
40
  temperature=temp,
41
  max_new_tokens=tokens,
@@ -44,57 +41,56 @@ def chat_inf(system_prompt,prompt,history,client_choice,seed,temp,tokens,top_p,r
44
  do_sample=True,
45
  seed=seed,
46
  )
47
- #formatted_prompt=prompt
48
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
49
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
50
  output = ""
51
-
52
  for response in stream:
53
  output += response.token.text
54
- yield [(prompt,output)]
55
- history.append((prompt,output))
56
  yield history
57
 
58
  def clear_fn():
59
- return None,None,None
60
- rand_val=random.randint(1,1111111111111111)
61
- def check_rand(inp,val):
62
- if inp==True:
63
- return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=random.randint(1,1111111111111111))
64
- else:
65
- return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
66
 
 
 
 
 
 
 
 
67
 
68
-
69
  with gr.Blocks() as app:
70
- gr.HTML("""<center><h1 style='font-size:xx-large;'>Google Gemma Models</h1><br><h3>running on Huggingface Inference Client</h3><br><h7>EXPERIMENTAL""")
71
  chat_b = gr.Chatbot(height=500)
72
  with gr.Group():
73
  with gr.Row():
74
  with gr.Column(scale=3):
75
  inp = gr.Textbox(label="Prompt")
76
- sys_inp = gr.Textbox(label="System Prompt (optional)")
77
  with gr.Row():
78
  with gr.Column(scale=2):
79
- btn = gr.Button("Chat")
80
  with gr.Column(scale=1):
81
  with gr.Group():
82
- stop_btn=gr.Button("Stop")
83
- clear_btn=gr.Button("Clear")
84
- client_choice=gr.Dropdown(label="Models",type='index',choices=[c for c in models],value=models[0],interactive=True)
85
 
86
  with gr.Column(scale=1):
87
  with gr.Group():
88
- rand = gr.Checkbox(label="Random Seed", value=True)
89
- seed=gr.Slider(label="Seed", minimum=1, maximum=1111111111111111,step=1, value=rand_val)
90
- tokens = gr.Slider(label="Max new tokens",value=6400,minimum=0,maximum=8000,step=64,interactive=True, visible=True,info="The maximum number of tokens")
91
- temp=gr.Slider(label="Temperature",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
92
- top_p=gr.Slider(label="Top-P",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
93
- rep_p=gr.Slider(label="Repetition Penalty",step=0.1, minimum=0.1, maximum=2.0, value=1.0)
94
-
95
 
96
-
97
- go=btn.click(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,client_choice,seed,temp,tokens,top_p,rep_p],chat_b)
98
- stop_btn.click(None,None,None,cancels=go)
99
- clear_btn.click(clear_fn,None,[inp,sys_inp,chat_b])
100
- app.queue(default_concurrency_limit=10).launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import random
4
+
5
+ models = [
6
  "google/gemma-7b",
7
  "google/gemma-7b-it",
8
  "google/gemma-2b",
9
  "google/gemma-2b-it"
10
  ]
11
+
12
+ clients = [
13
+ InferenceClient(models[0]),
14
+ InferenceClient(models[1]),
15
+ InferenceClient(models[2]),
16
+ InferenceClient(models[3]),
17
  ]
18
+
19
  def format_prompt(message, history):
20
  prompt = ""
21
  if history:
 
22
  for user_prompt, bot_response in history:
23
+ prompt += f"<start_of_turn>usuário{user_prompt}<end_of_turn>"
24
+ prompt += f"<start_of_turn>modelo{bot_response}"
25
+ prompt += f"<start_of_turn>usuário{message}<end_of_turn><start_of_turn>modelo"
26
  return prompt
27
 
28
+ def chat_inf(system_prompt, prompt, history, client_choice, seed, temp, tokens, top_p, rep_p):
29
+ client = clients[int(client_choice) - 1]
 
 
 
30
  if not history:
31
  history = []
32
+ hist_len = 0
33
  if history:
34
+ hist_len = len(history)
35
+
 
 
36
  generate_kwargs = dict(
37
  temperature=temp,
38
  max_new_tokens=tokens,
 
41
  do_sample=True,
42
  seed=seed,
43
  )
44
+
45
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
46
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
47
  output = ""
48
+
49
  for response in stream:
50
  output += response.token.text
51
+ yield [(prompt, output)]
52
+ history.append((prompt, output))
53
  yield history
54
 
55
  def clear_fn():
56
+ return None, None, None
 
 
 
 
 
 
57
 
58
+ rand_val = random.randint(1, 1111111111111111)
59
+
60
+ def check_rand(inp, val):
61
+ if inp == True:
62
+ return gr.Slider(label="Semente", minimum=1, maximum=1111111111111111, value=random.randint(1, 1111111111111111))
63
+ else:
64
+ return gr.Slider(label="Semente", minimum=1, maximum=1111111111111111, value=int(val))
65
 
 
66
  with gr.Blocks() as app:
67
+ gr.HTML("""<center><h1 style='font-size:xx-large;'>Modelos Google Gemma</h1><br><h3>Executando no Cliente de Inferência Huggingface</h3><br><h7>EXPERIMENTAL""")
68
  chat_b = gr.Chatbot(height=500)
69
  with gr.Group():
70
  with gr.Row():
71
  with gr.Column(scale=3):
72
  inp = gr.Textbox(label="Prompt")
73
+ sys_inp = gr.Textbox(label="Prompt do Sistema (opcional)")
74
  with gr.Row():
75
  with gr.Column(scale=2):
76
+ btn = gr.Button("Conversar")
77
  with gr.Column(scale=1):
78
  with gr.Group():
79
+ stop_btn = gr.Button("Parar")
80
+ clear_btn = gr.Button("Limpar")
81
+ client_choice = gr.Dropdown(label="Modelos", type='index', choices=[c for c in models], value=models[0], interactive=True)
82
 
83
  with gr.Column(scale=1):
84
  with gr.Group():
85
+ rand = gr.Checkbox(label="Semente Aleatória", value=True)
86
+ seed = gr.Slider(label="Semente", minimum=1, maximum=1111111111111111, step=1, value=rand_val)
87
+ tokens = gr.Slider(label="Máximo de novos tokens", value=6400, minimum=0, maximum=8000, step=64, interactive=True, visible=True, info="O número máximo de tokens")
88
+ temp = gr.Slider(label="Temperatura", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
89
+ top_p = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9)
90
+ rep_p = gr.Slider(label="Penalidade de Repetição", step=0.1, minimum=0.1, maximum=2.0, value=1.0)
 
91
 
92
+ go = btn.click(check_rand, [rand, seed], seed).then(chat_inf, [sys_inp, inp, chat_b, client_choice, seed, temp, tokens, top_p, rep_p], chat_b)
93
+ stop_btn.click(None, None, None, cancels=go)
94
+ clear_btn.click(clear_fn, None, [inp, sys_inp, chat_b])
95
+
96
+ app.queue(default_concurrency_limit=10).launch()