Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- Dockerfile-llama-cpp-wheel +1 -1
- README.md +4 -4
- config.yml +3 -5
- tabbed.py +11 -10
Dockerfile-llama-cpp-wheel
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
|
2 |
|
3 |
-
ARG LLAMA_CPP_VERSION="0.1.
|
4 |
ARG CMAKE_VERSION=3.26
|
5 |
ARG CMAKE_VERSION_PATCH=3.26.3
|
6 |
ARG CMAKE_OS=linux
|
|
|
1 |
FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
|
2 |
|
3 |
+
ARG LLAMA_CPP_VERSION="0.1.53"
|
4 |
ARG CMAKE_VERSION=3.26
|
5 |
ARG CMAKE_VERSION_PATCH=3.26.3
|
6 |
ARG CMAKE_OS=linux
|
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.29.0
|
8 |
app_file: tabbed.py
|
|
|
1 |
---
|
2 |
+
title: Llama 2 13B Novel French GGML
|
3 |
+
emoji: ππ
|
4 |
+
colorFrom: #0055A4
|
5 |
+
colorTo: #EF3340
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.29.0
|
8 |
app_file: tabbed.py
|
config.yml
CHANGED
@@ -1,11 +1,9 @@
|
|
1 |
---
|
2 |
hub:
|
3 |
-
repo_id:
|
4 |
-
filename:
|
5 |
-
# repo_id: TheBloke/Wizard-Vicuna-13B-Uncensored-GGML
|
6 |
-
# filename: Wizard-Vicuna-13B-Uncensored.ggmlv3.q4_1.bin
|
7 |
llama_cpp:
|
8 |
-
n_ctx:
|
9 |
# n_gpu_layers: 40 # llama 13b has 40 layers
|
10 |
chat:
|
11 |
stop:
|
|
|
1 |
---
|
2 |
hub:
|
3 |
+
repo_id: Nekochu/Llama-2-13B-fp16-french
|
4 |
+
filename: llama-2-13b-chat-fp16FR.ggmlv3.q4_K_M.bin
|
|
|
|
|
5 |
llama_cpp:
|
6 |
+
n_ctx: 4096
|
7 |
# n_gpu_layers: 40 # llama 13b has 40 layers
|
8 |
chat:
|
9 |
stop:
|
tabbed.py
CHANGED
@@ -114,19 +114,20 @@ instruct_interface = gr.Interface(
|
|
114 |
outputs=gr.outputs.Textbox(label="Output text"),
|
115 |
)
|
116 |
|
|
|
117 |
with gr.Blocks() as demo:
|
118 |
with gr.Row():
|
119 |
with gr.Column():
|
120 |
gr.Markdown(f"""
|
121 |
-
###
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
""")
|
131 |
with gr.Tab("Chatbot"):
|
132 |
gr.Markdown("# GGML Spaces Chatbot Demo")
|
@@ -177,4 +178,4 @@ with gr.Blocks() as demo:
|
|
177 |
gr.Markdown("# GGML Spaces Instruct Demo")
|
178 |
instruct_interface.render()
|
179 |
|
180 |
-
demo.queue(**config["queue"]).launch(debug=True, server_name="0.0.0.0", server_port=7860)
|
|
|
114 |
outputs=gr.outputs.Textbox(label="Output text"),
|
115 |
)
|
116 |
|
117 |
+
|
118 |
with gr.Blocks() as demo:
|
119 |
with gr.Row():
|
120 |
with gr.Column():
|
121 |
gr.Markdown(f"""
|
122 |
+
### This is the [{config["hub"]["repo_id"]}](https://huggingface.co/{config["hub"]["repo_id"]}) quantized model file [{config["hub"]["filename"]}](https://huggingface.co/{config["hub"]["repo_id"]}/blob/main/{config["hub"]["filename"]})
|
123 |
+
|
124 |
+
<details>
|
125 |
+
<summary><a href="https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true">Duplicate the Space</a> to skip the queue and run in a private space or to use your own GGML models, simply update the <a href="https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui/blob/main/config.yml">config.yml</a></summary>
|
126 |
+
<ul>
|
127 |
+
<li>This Space uses GGML with GPU support, so it can quickly run larger models on smaller GPUs & VRAM. [<a href="https://github.com/OpenAccess-AI-Collective/ggml-webui">Contribute</a>]</li>
|
128 |
+
<li>This is running on a smaller, shared GPU, so it may take a few seconds to respond.</li>
|
129 |
+
</ul>
|
130 |
+
</details>
|
131 |
""")
|
132 |
with gr.Tab("Chatbot"):
|
133 |
gr.Markdown("# GGML Spaces Chatbot Demo")
|
|
|
178 |
gr.Markdown("# GGML Spaces Instruct Demo")
|
179 |
instruct_interface.render()
|
180 |
|
181 |
+
demo.queue(**config["queue"]).launch(debug=True, server_name="0.0.0.0", server_port=7860)
|