wq2012 commited on
Commit
fcc93c7
·
verified ·
1 Parent(s): 9b9a3bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -30
app.py CHANGED
@@ -2,22 +2,15 @@ import gradio as gr
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
4
 
5
- title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"
6
 
7
  description = """
8
- 🔎 [Mistral AI's Mistral 7B Instruct v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) [GGUF format model](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF) , 4-bit quantization balanced quality gguf version, running on CPU. English Only (Also support other languages but the quality's not good). Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all).
9
-
10
- 🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue.
11
-
12
- Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing).
13
- """
14
-
15
- """
16
- [Model From TheBloke/Mistral-7B-Instruct-v0.1-GGUF](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF)
17
- [Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
18
  """
19
 
20
  model_path = "models"
 
 
21
  model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
22
  hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
23
 
@@ -25,35 +18,37 @@ print("Start the model init process")
25
  model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
26
  print("Finish the model init process")
27
 
28
- model.config["promptTemplate"] = "[INST] {0} [/INST]"
29
  model.config["systemPrompt"] = ""
30
  model._is_chat_session_activated = False
31
 
32
  max_new_tokens = 2048
33
 
 
 
34
  def generater(message, history, temperature, top_p, top_k):
35
- prompt = "<s>"
36
- for user_message, assistant_message in history:
37
- prompt += model.config["promptTemplate"].format(user_message)
38
- prompt += assistant_message + "</s>"
39
- prompt += model.config["promptTemplate"].format(message)
40
  outputs = []
41
  for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
42
  outputs.append(token)
43
  yield "".join(outputs)
44
 
 
45
  def vote(data: gr.LikeData):
46
  if data.liked:
47
  return
48
  else:
49
  return
50
 
51
- chatbot = gr.Chatbot(avatar_images=('resourse/user-icon.png', 'resourse/chatbot-icon.png'),bubble_full_width = False)
 
 
52
 
 
53
  additional_inputs=[
54
  gr.Slider(
55
  label="temperature",
56
- value=0.5,
57
  minimum=0.0,
58
  maximum=2.0,
59
  step=0.05,
@@ -71,7 +66,7 @@ additional_inputs=[
71
  ),
72
  gr.Slider(
73
  label="top_k",
74
- value=40,
75
  minimum=0,
76
  maximum=1000,
77
  step=1,
@@ -79,9 +74,7 @@ additional_inputs=[
79
  info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
80
  )
81
  ]
82
-
83
- character = "Sherlock Holmes"
84
- series = "Arthur Conan Doyle's novel"
85
 
86
  iface = gr.ChatInterface(
87
  fn = generater,
@@ -90,17 +83,17 @@ iface = gr.ChatInterface(
90
  chatbot=chatbot,
91
  additional_inputs=additional_inputs,
92
  examples=[
93
- ["Hello there! How are you doing?"],
94
- ["How many hours does it take a man to eat a Helicopter?"],
95
- ["You are a helpful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."],
96
- ["I want you to act as a spoken English teacher and improver. I will speak to you in English and you will reply to me in English to practice my spoken English. I want you to strictly correct my grammar mistakes, typos, and factual errors. I want you to ask me a question in your reply. Now let's start practicing, you could ask me a question first. Remember, I want you to strictly correct my grammar mistakes, typos, and factual errors."],
97
- [f"I want you to act like {character} from {series}. I want you to respond and answer like {character} using the tone, manner and vocabulary {character} would use. Do not write any explanations. Only answer like {character}. You must know all of the knowledge of {character}."]
98
- ]
99
  )
100
 
101
- with gr.Blocks(css="resourse/style/custom.css") as demo:
 
 
102
  chatbot.like(vote, None, None)
103
  iface.render()
104
 
 
 
105
  if __name__ == "__main__":
106
  demo.queue(max_size=3).launch()
 
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
4
 
5
+ title = "DiarizationLM GGUF inference on CPU"
6
 
7
  description = """
8
+ DiarizationLM GGUF inference on CPU
 
 
 
 
 
 
 
 
 
9
  """
10
 
11
  model_path = "models"
12
+ # model_name = "model-unsloth.Q4_K_M.gguf"
13
+ # hf_hub_download(repo_id="google/DiarizationLM-13b-Fisher-v1", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
14
  model_name = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
15
  hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
16
 
 
18
  model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
19
  print("Finish the model init process")
20
 
21
+ model.config["promptTemplate"] = "{0} --> "
22
  model.config["systemPrompt"] = ""
23
  model._is_chat_session_activated = False
24
 
25
  max_new_tokens = 2048
26
 
27
+ print("Finish the model config process")
28
+
29
  def generater(message, history, temperature, top_p, top_k):
30
+ prompt = model.config["promptTemplate"].format(message)
 
 
 
 
31
  outputs = []
32
  for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
33
  outputs.append(token)
34
  yield "".join(outputs)
35
 
36
+
37
  def vote(data: gr.LikeData):
38
  if data.liked:
39
  return
40
  else:
41
  return
42
 
43
+ print("Create chatbot")
44
+ chatbot = gr.Chatbot()
45
+ print("Created chatbot")
46
 
47
+ print("Add additional_inputs")
48
  additional_inputs=[
49
  gr.Slider(
50
  label="temperature",
51
+ value=0.0,
52
  minimum=0.0,
53
  maximum=2.0,
54
  step=0.05,
 
66
  ),
67
  gr.Slider(
68
  label="top_k",
69
+ value=50,
70
  minimum=0,
71
  maximum=1000,
72
  step=1,
 
74
  info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
75
  )
76
  ]
77
+ print("Added additional_inputs")
 
 
78
 
79
  iface = gr.ChatInterface(
80
  fn = generater,
 
83
  chatbot=chatbot,
84
  additional_inputs=additional_inputs,
85
  examples=[
86
+ ["<speaker:1> Hello, how are you doing <speaker:2> today? I am doing well."],
87
+ ]
 
 
 
 
88
  )
89
 
90
+ print("Added iface")
91
+
92
+ with gr.Blocks() as demo:
93
  chatbot.like(vote, None, None)
94
  iface.render()
95
 
96
+ print("Rendered iface")
97
+
98
  if __name__ == "__main__":
99
  demo.queue(max_size=3).launch()