Nekochu commited on
Commit
01359d3
Β·
1 Parent(s): cab2adc

Upload 8 files

Browse files
Files changed (4) hide show
  1. Dockerfile-llama-cpp-wheel +1 -1
  2. README.md +4 -4
  3. config.yml +3 -5
  4. tabbed.py +11 -10
Dockerfile-llama-cpp-wheel CHANGED
@@ -1,6 +1,6 @@
1
  FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
2
 
3
- ARG LLAMA_CPP_VERSION="0.1.52"
4
  ARG CMAKE_VERSION=3.26
5
  ARG CMAKE_VERSION_PATCH=3.26.3
6
  ARG CMAKE_OS=linux
 
1
  FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04
2
 
3
+ ARG LLAMA_CPP_VERSION="0.1.53"
4
  ARG CMAKE_VERSION=3.26
5
  ARG CMAKE_VERSION_PATCH=3.26.3
6
  ARG CMAKE_OS=linux
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Vicuna V1.3 GGML
3
- emoji: πŸƒ
4
- colorFrom: blue
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.29.0
8
  app_file: tabbed.py
 
1
  ---
2
+ title: Llama 2 13B Novel French GGML
3
+ emoji: πŸ“–πŸ“š
4
+ colorFrom: #0055A4
5
+ colorTo: #EF3340
6
  sdk: gradio
7
  sdk_version: 3.29.0
8
  app_file: tabbed.py
config.yml CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
  hub:
3
- repo_id: TheBloke/vicuna-13b-v1.3.0-GGML
4
- filename: vicuna-13b-v1.3.0.ggmlv3.q2_K.bin
5
- # repo_id: TheBloke/Wizard-Vicuna-13B-Uncensored-GGML
6
- # filename: Wizard-Vicuna-13B-Uncensored.ggmlv3.q4_1.bin
7
  llama_cpp:
8
- n_ctx: 2048
9
  # n_gpu_layers: 40 # llama 13b has 40 layers
10
  chat:
11
  stop:
 
1
  ---
2
  hub:
3
+ repo_id: Nekochu/Llama-2-13B-fp16-french
4
+ filename: llama-2-13b-chat-fp16FR.ggmlv3.q4_K_M.bin
 
 
5
  llama_cpp:
6
+ n_ctx: 4096
7
  # n_gpu_layers: 40 # llama 13b has 40 layers
8
  chat:
9
  stop:
tabbed.py CHANGED
@@ -114,19 +114,20 @@ instruct_interface = gr.Interface(
114
  outputs=gr.outputs.Textbox(label="Output text"),
115
  )
116
 
 
117
  with gr.Blocks() as demo:
118
  with gr.Row():
119
  with gr.Column():
120
  gr.Markdown(f"""
121
- ### brought to you by OpenAccess AI Collective
122
- - Unquantized model available at https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg
123
- - This is the [{config["hub"]["repo_id"]}](https://huggingface.co/{config["hub"]["repo_id"]}) model file [{config["hub"]["filename"]}](https://huggingface.co/{config["hub"]["repo_id"]}/blob/main/{config["hub"]["filename"]})
124
- - This Space uses GGML with GPU support, so it can quickly run larger models on smaller GPUs & VRAM.
125
- - This is running on a smaller, shared GPU, so it may take a few seconds to respond.
126
- - [Duplicate the Space](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true) to skip the queue and run in a private space or to use your own GGML models.
127
- - When using your own models, simply update the [config.yml](https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui/blob/main/config.yml)
128
- - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
129
- - Many thanks to [TheBloke](https://huggingface.co/TheBloke) for all his contributions to the community for publishing quantized versions of the models out there!
130
  """)
131
  with gr.Tab("Chatbot"):
132
  gr.Markdown("# GGML Spaces Chatbot Demo")
@@ -177,4 +178,4 @@ with gr.Blocks() as demo:
177
  gr.Markdown("# GGML Spaces Instruct Demo")
178
  instruct_interface.render()
179
 
180
- demo.queue(**config["queue"]).launch(debug=True, server_name="0.0.0.0", server_port=7860)
 
114
  outputs=gr.outputs.Textbox(label="Output text"),
115
  )
116
 
117
+
118
  with gr.Blocks() as demo:
119
  with gr.Row():
120
  with gr.Column():
121
  gr.Markdown(f"""
122
+ ### This is the [{config["hub"]["repo_id"]}](https://huggingface.co/{config["hub"]["repo_id"]}) quantized model file [{config["hub"]["filename"]}](https://huggingface.co/{config["hub"]["repo_id"]}/blob/main/{config["hub"]["filename"]})
123
+
124
+ <details>
125
+ <summary><a href="https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui?duplicate=true">Duplicate the Space</a> to skip the queue and run in a private space or to use your own GGML models, simply update the <a href="https://huggingface.co/spaces/openaccess-ai-collective/ggml-ui/blob/main/config.yml">config.yml</a></summary>
126
+ <ul>
127
+ <li>This Space uses GGML with GPU support, so it can quickly run larger models on smaller GPUs & VRAM. [<a href="https://github.com/OpenAccess-AI-Collective/ggml-webui">Contribute</a>]</li>
128
+ <li>This is running on a smaller, shared GPU, so it may take a few seconds to respond.</li>
129
+ </ul>
130
+ </details>
131
  """)
132
  with gr.Tab("Chatbot"):
133
  gr.Markdown("# GGML Spaces Chatbot Demo")
 
178
  gr.Markdown("# GGML Spaces Instruct Demo")
179
  instruct_interface.render()
180
 
181
+ demo.queue(**config["queue"]).launch(debug=True, server_name="0.0.0.0", server_port=7860)