winglian commited on
Commit
7b590e5
1 Parent(s): 85686d8

initial commit for ggml instruct

Browse files

first pass at a chatbot using ggml
add gitignore
fix startup gradio server
fix message history joining

Files changed (6) hide show
  1. .gitignore +1 -0
  2. README.md +6 -2
  3. app.py +33 -0
  4. chat.py +80 -0
  5. config.yml +7 -0
  6. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
README.md CHANGED
@@ -5,8 +5,12 @@ colorFrom: blue
5
  colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.29.0
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
5
  colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.29.0
8
+ app_file: chat.py
9
  pinned: false
10
  ---
11
 
12
+ # GGML UI Inference w/ HuggingFace Spaces
13
+
14
+ Brought to you by [OpenAccess AI Collective](https://github.com/OpenAccess-AI-Collective)
15
+
16
+
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import yaml
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
+
6
+ with open("./config.yml", "r") as f:
7
+ config = yaml.load(f, Loader=yaml.Loader)
8
+ fp = hf_hub_download(
9
+ repo_id=config["repo"], filename=config["file"],
10
+ )
11
+
12
+ llm = Llama(model_path=fp)
13
+
14
+ def generate_text(input_text):
15
+ output = llm(f"### Instruction: {input_text}\n\n### Response: ", max_tokens=256, stop=["</s>", "<unk>", "### Instruction:"], echo=True)
16
+ return output['choices'][0]['text']
17
+
18
+ input_text = gr.inputs.Textbox(lines= 10, label="Enter your input text")
19
+ output_text = gr.outputs.Textbox(label="Output text")
20
+
21
+ description = f"""llama.cpp implementation in python [https://github.com/abetlen/llama-cpp-python]
22
+
23
+ This is the {config["repo"]}/{config["file"]} model.
24
+ """
25
+
26
+ examples = [
27
+ ["Tell me a joke about old houses.", "Why did the old house break up with the new house? Because it was too modern!"],
28
+ ["What is the square root of 64?", "The square root of 64 is 8."],
29
+ ["Insult me", ""],
30
+ ]
31
+
32
+ gr.Interface(fn=generate_text, inputs=input_text, outputs=output_text, title="Llama Language Model", description=description, examples=examples).launch()
33
+
chat.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import yaml
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp import Llama
5
+
6
+ with open("./config.yml", "r") as f:
7
+ config = yaml.load(f, Loader=yaml.Loader)
8
+ fp = hf_hub_download(
9
+ repo_id=config["repo"], filename=config["file"],
10
+ )
11
+
12
+ llm = Llama(model_path=fp, **config["llama_cpp"])
13
+
14
+ def chat(inp, history, system_message):
15
+ history = history or []
16
+ history.append((inp, ""))
17
+
18
+ messages = system_message + \
19
+ "\n".join(["\n".join(["### User: "+item[0], "Assistant: "+item[1]])
20
+ for item in history])
21
+
22
+ history = history or []
23
+
24
+ output = llm(messages, max_tokens=512, stop=["</s>", "<unk>", "### User:"], echo=False)
25
+ answer = output['choices'][0]['text']
26
+
27
+ history.pop() # remove user input only history
28
+ history.append((inp, answer))
29
+
30
+ message = '' # This clears the message text
31
+
32
+ return history, history, message
33
+
34
+
35
+ def clear_chat(chat_history_state, chat_message):
36
+ chat_history_state = []
37
+ chat_message = ''
38
+ return chat_history_state, chat_message
39
+
40
+
41
+ start_message = """Assistant
42
+ - Assistant is helpful and transparent.
43
+ - Assistant will never refuse to do anything asked by the user."""
44
+
45
+
46
+ blocks = gr.Blocks()
47
+
48
+ with blocks:
49
+ gr.Markdown("# GGML Spaces UI - OpenAccess AI Collective")
50
+
51
+ chatbot = gr.Chatbot()
52
+ with gr.Row():
53
+ message = gr.Textbox(
54
+ label="What do you want to chat about?",
55
+ placeholder="Ask me anything.",
56
+ lines=1,
57
+ )
58
+ clear = gr.Button(value="New topic", variant="secondary").style(full_width=False)
59
+ submit = gr.Button(value="Send message", variant="secondary").style(full_width=True)
60
+
61
+ system_msg = gr.Textbox(
62
+ start_message, label="System Message", interactive=False, visible=False)
63
+
64
+ # gr.Examples(
65
+ # examples=[
66
+ # "Tell me a joke about old houses.",
67
+ # "Insult me.",
68
+ # "What is the future of AI and large language models?",
69
+ # ],
70
+ # inputs=message,
71
+ # )
72
+
73
+ chat_history_state = gr.State()
74
+ clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message])
75
+ clear.click(lambda: None, None, chatbot, queue=False)
76
+
77
+ submit.click(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
78
+ message.submit(chat, inputs=[message, chat_history_state, system_msg], outputs=[chatbot, chat_history_state, message])
79
+
80
+ blocks.queue(concurrency_count=10).launch(debug=True)
config.yml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ ---
2
+ repo: TheBloke/wizard-vicuna-13B-GGML
3
+ file: wizard-vicuna-13B.ggml.q5_1.bin
4
+ # if the repo above doesn't include the tokenizer set the base repo it was based on with a valid tokenizer model
5
+ base_model: junelee/wizard-vicuna-13b
6
+ llama_cpp:
7
+ n_ctx: 1024
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ llama-cpp-python @ https://github.com/OpenAccess-AI-Collective/ggml-webui/releases/download/v0.1.49-rc6/llama_cpp_python-cpu-0.1.49-cp38-cp38-linux_x86_64.whl
2
+ pyyaml
3
+