Spaces:
Running
Running
ffreemt
commited on
Commit
·
89b26fd
1
Parent(s):
bd9d50e
Update remove mpt-20 reference
Browse files
app.py
CHANGED
|
@@ -148,7 +148,7 @@ class GenerationConfig:
|
|
| 148 |
|
| 149 |
|
| 150 |
def format_prompt(system_prompt: str, user_prompt: str):
|
| 151 |
-
"""Format prompt based on: https://huggingface.co/spaces/mosaicml/
|
| 152 |
# TODO im_start/im_end possible fix for WizardCoder
|
| 153 |
|
| 154 |
system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
|
|
@@ -176,22 +176,22 @@ def generate(
|
|
| 176 |
|
| 177 |
|
| 178 |
logger.info("start dl")
|
| 179 |
-
_ = """full url: https://huggingface.co/TheBloke/
|
| 180 |
|
| 181 |
-
# https://huggingface.co/TheBloke/
|
| 182 |
_ = """
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
"""
|
| 189 |
MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
|
| 190 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
| 191 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
| 192 |
DESTINATION_FOLDER = "models"
|
| 193 |
|
| 194 |
-
REPO_ID = "TheBloke/
|
| 195 |
if "WizardCoder" in MODEL_FILENAME:
|
| 196 |
REPO_ID = "TheBloke/WizardCoder-15B-1.0-GGML"
|
| 197 |
|
|
@@ -200,7 +200,7 @@ download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
|
|
| 200 |
logger.info("done dl")
|
| 201 |
|
| 202 |
# if "mpt" in model_filename:
|
| 203 |
-
# config = AutoConfig.from_pretrained("mosaicml/
|
| 204 |
# llm = AutoModelForCausalLM.from_pretrained(
|
| 205 |
# os.path.abspath(f"models/{model_filename}"),
|
| 206 |
# model_type="mpt",
|
|
@@ -264,14 +264,14 @@ css = """
|
|
| 264 |
"""
|
| 265 |
|
| 266 |
with gr.Blocks(
|
| 267 |
-
# title="
|
| 268 |
title=f"{MODEL_FILENAME}",
|
| 269 |
theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
|
| 270 |
css=css,
|
| 271 |
) as block:
|
| 272 |
with gr.Accordion("🎈 Info", open=False):
|
| 273 |
# gr.HTML(
|
| 274 |
-
# """<center><a href="https://huggingface.co/spaces/mikeee/
|
| 275 |
# )
|
| 276 |
gr.Markdown(
|
| 277 |
f"""<h4><center>{MODEL_FILENAME}</center></h4>
|
|
|
|
| 148 |
|
| 149 |
|
| 150 |
def format_prompt(system_prompt: str, user_prompt: str):
|
| 151 |
+
"""Format prompt based on: https://huggingface.co/spaces/mosaicml/m pt-30b-chat/blob/main/app.py."""
|
| 152 |
# TODO im_start/im_end possible fix for WizardCoder
|
| 153 |
|
| 154 |
system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
|
|
|
|
| 176 |
|
| 177 |
|
| 178 |
logger.info("start dl")
|
| 179 |
+
_ = """full url: https://huggingface.co/TheBloke/m pt-30B-chat-GGML/blob/main/m pt-30b-chat.ggmlv0.q4_1.bin"""
|
| 180 |
|
| 181 |
+
# https://huggingface.co/TheBloke/m pt-30B-chat-GGML
|
| 182 |
_ = """
|
| 183 |
+
m pt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
|
| 184 |
+
m pt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
|
| 185 |
+
m pt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
|
| 186 |
+
m pt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
|
| 187 |
+
m pt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
|
| 188 |
"""
|
| 189 |
MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
|
| 190 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin" # 10.7G
|
| 191 |
MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin" # 11.9G
|
| 192 |
DESTINATION_FOLDER = "models"
|
| 193 |
|
| 194 |
+
REPO_ID = "TheBloke/m pt-30B-chat-GGML"
|
| 195 |
if "WizardCoder" in MODEL_FILENAME:
|
| 196 |
REPO_ID = "TheBloke/WizardCoder-15B-1.0-GGML"
|
| 197 |
|
|
|
|
| 200 |
logger.info("done dl")
|
| 201 |
|
| 202 |
# if "mpt" in model_filename:
|
| 203 |
+
# config = AutoConfig.from_pretrained("mosaicml/m pt-30b-cha t", context_length=8192)
|
| 204 |
# llm = AutoModelForCausalLM.from_pretrained(
|
| 205 |
# os.path.abspath(f"models/{model_filename}"),
|
| 206 |
# model_type="mpt",
|
|
|
|
| 264 |
"""
|
| 265 |
|
| 266 |
with gr.Blocks(
|
| 267 |
+
# title="m pt-30b-chat-ggml",
|
| 268 |
title=f"{MODEL_FILENAME}",
|
| 269 |
theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
|
| 270 |
css=css,
|
| 271 |
) as block:
|
| 272 |
with gr.Accordion("🎈 Info", open=False):
|
| 273 |
# gr.HTML(
|
| 274 |
+
# """<center><a href="https://huggingface.co/spaces/mikeee/m pt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
|
| 275 |
# )
|
| 276 |
gr.Markdown(
|
| 277 |
f"""<h4><center>{MODEL_FILENAME}</center></h4>
|