Staticaliza commited on
Commit
d03ac46
1 Parent(s): c932dfe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -52
app.py CHANGED
@@ -2,11 +2,12 @@ import gradio as gr
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
4
 
5
- model_path = "models"
6
  model_name = "openchat_3.5.Q4_K_M.gguf"
7
- hf_hub_download(repo_id="TheBloke/openchat_3.5-GGUF", filename=model_name, local_dir=model_path, local_dir_use_symlinks=False)
8
 
9
- model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
 
 
10
 
11
  model.config["promptTemplate"] = "[INST] {0} [/INST]"
12
  model.config["systemPrompt"] = ""
@@ -14,58 +15,61 @@ model._is_chat_session_activated = False
14
 
15
  max_new_tokens = 2048
16
 
17
- def generater(message, history, temperature, top_p, top_k):
18
- prompt = "<s>"
19
- for user_message, assistant_message in history:
20
- prompt += model.config["promptTemplate"].format(user_message)
21
- prompt += assistant_message + "</s>"
22
- prompt += model.config["promptTemplate"].format(message)
23
- outputs = []
24
- for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
25
- outputs.append(token)
26
- yield "".join(outputs)
27
-
28
- chatbot = gr.Chatbot()
29
 
30
- additional_inputs=[
31
- gr.Slider(
32
- label="temperature",
33
- value=0.5,
34
- minimum=0.0,
35
- maximum=2.0,
36
- step=0.05,
37
- interactive=True,
38
- info="Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.",
39
- ),
40
- gr.Slider(
41
- label="top_p",
42
- value=1.0,
43
- minimum=0.0,
44
- maximum=1.0,
45
- step=0.01,
46
- interactive=True,
47
- info="0.1 means only the tokens comprising the top 10% probability mass are considered. Suggest set to 1 and use temperature. 1 means 100% and will disable it",
48
- ),
49
- gr.Slider(
50
- label="top_k",
51
- value=40,
52
- minimum=0,
53
- maximum=1000,
54
- step=1,
55
- interactive=True,
56
- info="limits candidate tokens to a fixed number after sorting by probability. Setting it higher than the vocabulary size deactivates this limit.",
57
  )
58
- ]
59
 
60
- iface = gr.ChatInterface(
61
- fn = generater,
62
- title="AI Demo",
63
- chatbot=chatbot,
64
- additional_inputs=additional_inputs,
65
- )
66
 
67
  with gr.Blocks() as demo:
68
- iface.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- if __name__ == "__main__":
71
- demo.queue(max_size=3).launch()
 
 
 
 
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
4
 
5
+ repo_id = "TheBloke/openchat_3.5-GGUF"
6
  model_name = "openchat_3.5.Q4_K_M.gguf"
 
7
 
8
+ hf_hub_download(repo_id=repo_id, filename=model_name, local_dir="models", local_dir_use_symlinks=False)
9
+
10
+ model = model = GPT4All(model_name, "models", allow_download = False, device="cpu")
11
 
12
  model.config["promptTemplate"] = "[INST] {0} [/INST]"
13
  model.config["systemPrompt"] = ""
 
15
 
16
  max_new_tokens = 2048
17
 
18
+ def generater(input, instruction, history, temperature, top_p, top_k, rep_p, max_tokens):
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ history = history or []
21
+ formatted_input = "<s>"
22
+
23
+ for user_message, assistant_message in history:
24
+ formatted_input += model.config["promptTemplate"].format(user_message)
25
+ formatted_input += assistant_message + "</s>"
26
+
27
+ formatted_input += model.config["promptTemplate"].format(input)
28
+
29
+ result = model.generate(
30
+ formatted_input,
31
+ temperature = temperature,
32
+ max_new_tokens = max_tokens,
33
+ top_p = top_p,
34
+ top_k = top_k,
35
+ repetition_penalty = rep_p,
36
+ stop_sequences = stops,
37
+ do_sample = True,
38
+ seed = seed,
39
+ stream = False,
40
+ details = False,
41
+ return_full_text = False
 
 
 
 
 
42
  )
 
43
 
44
+ history = history + [[input, result]]
45
+
46
+ return (result, input, history)
 
 
 
47
 
48
  with gr.Blocks() as demo:
49
+ with gr.Row(variant = "panel"):
50
+ gr.Markdown("A AI model test demo.")
51
+
52
+ with gr.Row():
53
+ with gr.Column():
54
+ history = gr.Chatbot(abel = "History", elem_id = "chatbot")
55
+ input = gr.Textbox(label = "Input", value = "", lines = 2)
56
+ instruction = gr.Textbox(label = "Instruction", value = "You are an AI chatbot.", lines = 4)
57
+ run = gr.Button("▶")
58
+ clear = gr.Button("🗑️")
59
+
60
+ with gr.Column():
61
+ temperature = gr.Slider( minimum = 0, maximum = 2, value = 1, step = 0.01, interactive = True, label = "Temperature" )
62
+ top_p = gr.Slider( minimum = 0.01, maximum = 0.99, value = 0.95, step = 0.01, interactive = True, label = "Top P" )
63
+ top_k = gr.Slider( minimum = 1, maximum = 2048, value = 50, step = 1, interactive = True, label = "Top K" )
64
+ rep_p = gr.Slider( minimum = 0.01, maximum = 2, value = 1.2, step = 0.01, interactive = True, label = "Repetition Penalty" )
65
+ max_tokens = gr.Slider( minimum = 1, maximum = 2048, value = 32, step = 64, interactive = True, label = "Max New Tokens" )
66
+
67
+ with gr.Row():
68
+ with gr.Column():
69
+ output = gr.Textbox(label = "Output", value = "", lines = 50)
70
 
71
+ run.click(predict, inputs = [input, instruction, history, temperature, top_p, top_k, rep_p, max_tokens], outputs = [output, input, history])
72
+ clear.click(clear_history, [], history)
73
+ cloud.click(maintain_cloud, inputs = [], outputs = [input, output])
74
+
75
+ demo.queue(concurrency_count = 500, api_open = True).launch(show_api = True)