xu song commited on
Commit
f0929ee
·
1 Parent(s): 12a161c
Files changed (3) hide show
  1. app.py +45 -38
  2. app_util.py +5 -3
  3. models/cpp_qwen2.py +10 -0
app.py CHANGED
@@ -12,7 +12,6 @@ system_list = [
12
  "你是一个心理咨询师。",
13
  ]
14
 
15
-
16
  doc = """\
17
  There are maily two types of user simulator:
18
  - prompt-based user-simulator (role-play)
@@ -24,48 +23,55 @@ with gr.Blocks() as demo:
24
  gr.HTML("""<h1 align="center">Distilling the Knowledge through Self Chatting</h1>""")
25
  gr.Markdown(doc, visible=False)
26
  with gr.Row():
27
- gr.Dropdown(
28
- ["Qwen2-0.5B-Instruct", "llama3.1", "gemini"],
29
- value="Qwen2-0.5B-Instruct",
30
- label="model",
31
- interactive=True,
32
- scale=1,
33
- visible=False
34
- )
35
- system = gr.Dropdown(
36
- choices=system_list,
37
- value=system_list[0],
38
- allow_custom_value=True,
39
- interactive=True,
40
- label="System message",
41
- scale=5,
42
- )
43
-
44
- chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
45
- with gr.Row():
46
- with gr.Column():
47
- slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
48
- value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
49
- slider_temperature = gr.Slider(minimum=0.1, maximum=10.0,
50
- value=config.DEFAULT_TEMPERATURE, step=0.1, label="Temperature",
51
- info="Larger temperature increase the randomness")
52
- slider_top_p = gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=config.DEFAULT_TOP_P,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
  )
59
 
60
- with gr.Column(scale=4):
 
61
  generated_text = gr.Textbox(show_label=False, placeholder="...", lines=10, visible=False)
62
  generate_btn = gr.Button("🤔️ Generate", variant="primary")
63
  with gr.Row():
64
- retry_btn = gr.Button("🔄 Regenerate")
65
- undo_btn = gr.Button("↩️ Undo")
66
- clear_btn = gr.Button("🗑️ Clear") # 🧹 Clear History (清除历史)
67
- # stop_btn = gr.Button("停止生成", variant="primary", visible=False)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
 
71
  ########
@@ -81,8 +87,9 @@ with gr.Blocks() as demo:
81
  undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history])
82
 
83
  slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
84
- slider_top_p.change(set_top_p, inputs=[slider_top_p])
85
  slider_temperature.change(set_temperature, inputs=[slider_temperature])
 
 
86
 
87
  # demo.queue().launch(share=False, server_name="0.0.0.0")
88
  # demo.queue().launch(concurrency_count=1, max_size=5)
 
12
  "你是一个心理咨询师。",
13
  ]
14
 
 
15
  doc = """\
16
  There are maily two types of user simulator:
17
  - prompt-based user-simulator (role-play)
 
23
  gr.HTML("""<h1 align="center">Distilling the Knowledge through Self Chatting</h1>""")
24
  gr.Markdown(doc, visible=False)
25
  with gr.Row():
26
+ with gr.Column(scale=5):
27
+ system = gr.Dropdown(
28
+ choices=system_list,
29
+ value=system_list[0],
30
+ allow_custom_value=True,
31
+ interactive=True,
32
+ label="System message",
33
+ scale=5,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  )
35
 
36
+ chatbot = gr.Chatbot(avatar_images=("assets/man.png", "assets/bot.png"))
37
+
38
  generated_text = gr.Textbox(show_label=False, placeholder="...", lines=10, visible=False)
39
  generate_btn = gr.Button("🤔️ Generate", variant="primary")
40
  with gr.Row():
41
+ retry_btn = gr.Button("🔄 Regenerate", variant="secondary", size="sm", )
42
+ undo_btn = gr.Button("↩️ Undo", variant="secondary", size="sm", )
43
+ clear_btn = gr.Button("🗑️ Clear", variant="secondary", size="sm", ) # 🧹 Clear History (清除历史)
44
+ # stop_btn = gr.Button("停止生成", variant="stop", visible=False)
45
 
46
+ with gr.Column(variant="compact"):
47
+ # with gr.Column():
48
+ model = gr.Dropdown(
49
+ ["Qwen2-0.5B-Instruct", "llama3.1", "gemini"],
50
+ value="Qwen2-0.5B-Instruct",
51
+ label="Model",
52
+ interactive=True,
53
+ # visible=False
54
+ )
55
+ with gr.Accordion(label="Parameters", open=True):
56
+ slider_max_tokens = gr.Slider(minimum=1, maximum=config.MAX_SEQUENCE_LENGTH,
57
+ value=config.DEFAULT_MAX_TOKENS, step=1, label="Max tokens")
58
+ slider_temperature = gr.Slider(minimum=0.1, maximum=10.0,
59
+ value=config.DEFAULT_TEMPERATURE, step=0.1, label="Temperature",
60
+ info="Larger temperature increase the randomness")
61
+ slider_top_p = gr.Slider(
62
+ minimum=0.1,
63
+ maximum=1.0,
64
+ value=config.DEFAULT_TOP_P,
65
+ step=0.05,
66
+ label="Top-p (nucleus sampling)",
67
+ )
68
+ slider_top_k = gr.Slider(
69
+ minimum=1,
70
+ maximum=200,
71
+ value=config.DEFAULT_TOP_K,
72
+ step=1,
73
+ label="Top-k",
74
+ )
75
 
76
 
77
  ########
 
87
  undo_btn.click(undo_generate, [chatbot, history], outputs=[generated_text, chatbot, history])
88
 
89
  slider_max_tokens.change(set_max_tokens, inputs=[slider_max_tokens])
 
90
  slider_temperature.change(set_temperature, inputs=[slider_temperature])
91
+ slider_top_p.change(set_top_p, inputs=[slider_top_p])
92
+ slider_top_k.change(set_top_k, inputs=[slider_top_k])
93
 
94
  # demo.queue().launch(share=False, server_name="0.0.0.0")
95
  # demo.queue().launch(concurrency_count=1, max_size=5)
app_util.py CHANGED
@@ -98,10 +98,12 @@ def reset_state(system):
98
  def set_max_tokens(max_tokens):
99
  bot.generation_kwargs["max_tokens"] = max_tokens
100
 
101
-
 
102
  def set_top_p(top_p):
103
  bot.generation_kwargs["top_p"] = top_p
104
 
 
 
 
105
 
106
- def set_temperature(temperature):
107
- bot.generation_kwargs["temperature"] = temperature
 
98
  def set_max_tokens(max_tokens):
99
  bot.generation_kwargs["max_tokens"] = max_tokens
100
 
101
+ def set_temperature(temperature):
102
+ bot.generation_kwargs["temperature"] = temperature
103
  def set_top_p(top_p):
104
  bot.generation_kwargs["top_p"] = top_p
105
 
106
+ def set_top_k(top_k):
107
+ bot.generation_kwargs["top_k"] = top_k
108
+
109
 
 
 
models/cpp_qwen2.py CHANGED
@@ -13,6 +13,13 @@ python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen1.5-0.5B-C
13
 
14
  ./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "You are a helpful assistant" -cnv
15
 
 
 
 
 
 
 
 
16
  """
17
 
18
  import json
@@ -37,6 +44,7 @@ class Qwen2Simulator(Simulator):
37
  tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
38
  n_ctx=config.MAX_SEQUENCE_LENGTH, #
39
  # n_threads=None, # 默认会根据cpu数来设置 n_threads
 
40
  verbose=False,
41
  )
42
  else:
@@ -45,6 +53,8 @@ class Qwen2Simulator(Simulator):
45
  repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
46
  filename="*fp16.gguf",
47
  tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
 
 
48
  verbose=False,
49
  )
50
  logger.info(f"llm has been initialized: {self.llm}, n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}")
 
13
 
14
  ./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "You are a helpful assistant" -cnv
15
 
16
+
17
+ ## reference
18
+
19
+ - https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/llms/llamacpp.py
20
+ - https://github.com/abetlen/llama-cpp-python/blob/main/examples/gradio_chat/server.py
21
+ - https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/server/app.py
22
+
23
  """
24
 
25
  import json
 
44
  tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
45
  n_ctx=config.MAX_SEQUENCE_LENGTH, #
46
  # n_threads=None, # 默认会根据cpu数来设置 n_threads
47
+ use_mlock=True,
48
  verbose=False,
49
  )
50
  else:
 
53
  repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
54
  filename="*fp16.gguf",
55
  tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
56
+ n_ctx=config.MAX_SEQUENCE_LENGTH,
57
+ use_mlock=True,
58
  verbose=False,
59
  )
60
  logger.info(f"llm has been initialized: {self.llm}, n_threads={self.llm.n_threads}, n_ctx={self.llm.n_ctx}")