aps19 commited on
Commit
389c863
1 Parent(s): 822424b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -21
app.py CHANGED
@@ -1,11 +1,18 @@
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
- from werkzeug.urls import url_unquote
 
4
 
5
- client = InferenceClient("HuggingFaceH4/zephyr-7b-alpha")
 
 
 
 
 
 
6
 
7
  def format_prompt(message, history):
8
- system = "\nYou are a helpful virtual assistant that answers user's questions in a friendly manner.</s>\n"
9
  prompt = ""
10
  for user_prompt, bot_response in history:
11
  prompt += f"\n{user_prompt}</s>\n"
@@ -13,7 +20,14 @@ def format_prompt(message, history):
13
  prompt += f"\n{message}</s>\n"
14
  return prompt
15
 
16
- def generate_bot_response(prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0):
 
 
 
 
 
 
 
17
  temperature = float(temperature)
18
  if temperature < 1e-2:
19
  temperature = 1e-2
@@ -30,7 +44,13 @@ def generate_bot_response(prompt, history, temperature=0.9, max_new_tokens=500,
30
 
31
  formatted_prompt = format_prompt(prompt, history)
32
 
33
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
 
 
 
 
 
 
34
  output = ""
35
 
36
  for response in stream:
@@ -38,25 +58,65 @@ def generate_bot_response(prompt, history, temperature=0.9, max_new_tokens=500,
38
  yield output
39
  return output
40
 
41
- # Define your agents and prompts here
42
- agents = [
43
- {"role": "convincing", "prompt": "I'm here to assist you. Please feel comfortable sharing your information."},
44
- {"role": "extract", "prompt": "Great! Please provide your name."},
45
- {"role": "extract", "prompt": "Thank you! Now, please provide your email address."},
46
- {"role": "smalltalk", "prompt": "I understand that sharing personal information can be sensitive. Let's talk about something else for a moment."},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  ]
48
 
49
- with gr.Blocks() as inf:
 
 
 
 
 
 
 
 
 
 
 
 
50
  gr.ChatInterface(
51
- generate_bot_response,
52
- inputs=["", []],
53
- outputs=["", []],
54
- parameters=[
55
- gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
56
- gr.Slider(label="Max new tokens", value=256, minimum=0, maximum=1048, step=64, interactive=True, info="The maximum number of new tokens"),
57
- gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
58
- gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens"),
59
  ],
60
  )
61
 
62
- inf.queue().launch()
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
 
6
+ # Initialize the DialoGPT tokenizer and model
7
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large", padding_side="left")
8
+ model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
9
+
10
+ client = InferenceClient(
11
+ "HuggingFaceH4/zephyr-7b-alpha"
12
+ )
13
 
14
  def format_prompt(message, history):
15
+ system = "\nYou are a helpful virtual assistant that answers user's questions with easy-to-understand words.</s>\n"
16
  prompt = ""
17
  for user_prompt, bot_response in history:
18
  prompt += f"\n{user_prompt}</s>\n"
 
20
  prompt += f"\n{message}</s>\n"
21
  return prompt
22
 
23
+ def generate(
24
+ prompt,
25
+ history,
26
+ temperature=0.9,
27
+ max_new_tokens=500,
28
+ top_p=0.95,
29
+ repetition_penalty=1.0,
30
+ ):
31
  temperature = float(temperature)
32
  if temperature < 1e-2:
33
  temperature = 1e-2
 
44
 
45
  formatted_prompt = format_prompt(prompt, history)
46
 
47
+ stream = client.text_generation(
48
+ formatted_prompt,
49
+ **generate_kwargs,
50
+ stream=True,
51
+ details=True,
52
+ return_full_text=False,
53
+ )
54
  output = ""
55
 
56
  for response in stream:
 
58
  yield output
59
  return output
60
 
61
+ additional_inputs = [
62
+ gr.Slider(
63
+ label="Temperature",
64
+ value=0.9,
65
+ minimum=0.0,
66
+ maximum=1.0,
67
+ step=0.05,
68
+ interactive=True,
69
+ info="Higher values produce more diverse outputs",
70
+ ),
71
+ gr.Slider(
72
+ label="Max new tokens",
73
+ value=256,
74
+ minimum=0,
75
+ maximum=1048,
76
+ step=64,
77
+ interactive=True,
78
+ info="The maximum number of new tokens",
79
+ ),
80
+ gr.Slider(
81
+ label="Top-p (nucleus sampling)",
82
+ value=0.90,
83
+ minimum=0.0,
84
+ maximum=1,
85
+ step=0.05,
86
+ interactive=True,
87
+ info="Higher values sample more low-probability tokens",
88
+ ),
89
+ gr.Slider(
90
+ label="Repetition penalty",
91
+ value=1.2,
92
+ minimum=1.0,
93
+ maximum=2.0,
94
+ step=0.05,
95
+ interactive=True,
96
+ info="Penalize repeated tokens",
97
+ ),
98
  ]
99
 
100
+ css = """
101
+ #mkd {
102
+ height: 500px;
103
+ overflow: auto;
104
+ border: 1px solid #ccc;
105
+ }
106
+ """
107
+
108
+ with gr.Blocks(css=css) as inf:
109
+ gr.HTML("<h1><center>DialoGPT-large<h1><center>")
110
+ gr.HTML(
111
+ "<h3><center>In this demo, you can chat with <a href='https://huggingface.co/microsoft/DialoGPT-large'>DialoGPT-large</a> model. 💬<h3><center>"
112
+ )
113
  gr.ChatInterface(
114
+ generate,
115
+ additional_inputs=additional_inputs,
116
+ examples=[
117
+ ["Can a squirrel swim?"],
118
+ ["Write a poem about a squirrel."],
 
 
 
119
  ],
120
  )
121
 
122
+ inf.queue().launch(share=True)