WizardCoder

Running

App Files Files Community

ffreemt commited on Jul 28, 2023

Commit

89b26fd

1 Parent(s): bd9d50e

Update remove mpt-20 reference

Browse files

Files changed (1) hide show

app.py +12 -12

app.py CHANGED Viewed

@@ -148,7 +148,7 @@ class GenerationConfig:
 def format_prompt(system_prompt: str, user_prompt: str):
-    """Format prompt based on: https://huggingface.co/spaces/mosaicml/mpt-30b-chat/blob/main/app.py."""
     # TODO im_start/im_end possible fix for WizardCoder
     system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
@@ -176,22 +176,22 @@ def generate(
 logger.info("start dl")
-_ = """full url: https://huggingface.co/TheBloke/mpt-30B-chat-GGML/blob/main/mpt-30b-chat.ggmlv0.q4_1.bin"""
-# https://huggingface.co/TheBloke/mpt-30B-chat-GGML
 _ = """
-mpt-30b-chat.ggmlv0.q4_0.bin 	q4_0 	4 	16.85 GB 	19.35 GB 	4-bit.
-mpt-30b-chat.ggmlv0.q4_1.bin 	q4_1 	4 	18.73 GB 	21.23 GB 	4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
-mpt-30b-chat.ggmlv0.q5_0.bin 	q5_0 	5 	20.60 GB 	23.10 GB
-mpt-30b-chat.ggmlv0.q5_1.bin 	q5_1 	5 	22.47 GB 	24.97 GB
-mpt-30b-chat.ggmlv0.q8_0.bin 	q8_0 	8 	31.83 GB 	34.33 GB
 """
 MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
 MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin"  # 10.7G
 MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin"  # 11.9G
 DESTINATION_FOLDER = "models"
-REPO_ID = "TheBloke/mpt-30B-chat-GGML"
 if "WizardCoder" in MODEL_FILENAME:
     REPO_ID = "TheBloke/WizardCoder-15B-1.0-GGML"
@@ -200,7 +200,7 @@ download_quant(DESTINATION_FOLDER, REPO_ID, MODEL_FILENAME)
 logger.info("done dl")
 # if "mpt" in model_filename:
-#     config = AutoConfig.from_pretrained("mosaicml/mpt-30b-cha t", context_length=8192)
 #     llm = AutoModelForCausalLM.from_pretrained(
 #         os.path.abspath(f"models/{model_filename}"),
 #         model_type="mpt",
@@ -264,14 +264,14 @@ css = """
 """
 with gr.Blocks(
-    # title="mpt-30b-chat-ggml",
     title=f"{MODEL_FILENAME}",
     theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
     css=css,
 ) as block:
     with gr.Accordion("🎈 Info", open=False):
         # gr.HTML(
-        #     """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
         # )
         gr.Markdown(
             f"""<h4><center>{MODEL_FILENAME}</center></h4>

 def format_prompt(system_prompt: str, user_prompt: str):
+    """Format prompt based on: https://huggingface.co/spaces/mosaicml/m pt-30b-chat/blob/main/app.py."""
     # TODO im_start/im_end possible fix for WizardCoder
     system_prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
 logger.info("start dl")
+_ = """full url: https://huggingface.co/TheBloke/m pt-30B-chat-GGML/blob/main/m pt-30b-chat.ggmlv0.q4_1.bin"""
+# https://huggingface.co/TheBloke/m pt-30B-chat-GGML
 _ = """
+m pt-30b-chat.ggmlv0.q4_0.bin 	q4_0 	4 	16.85 GB 	19.35 GB 	4-bit.
+m pt-30b-chat.ggmlv0.q4_1.bin 	q4_1 	4 	18.73 GB 	21.23 GB 	4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
+m pt-30b-chat.ggmlv0.q5_0.bin 	q5_0 	5 	20.60 GB 	23.10 GB
+m pt-30b-chat.ggmlv0.q5_1.bin 	q5_1 	5 	22.47 GB 	24.97 GB
+m pt-30b-chat.ggmlv0.q8_0.bin 	q8_0 	8 	31.83 GB 	34.33 GB
 """
 MODEL_FILENAME = "m pt-30b-chat.ggmlv0.q4_1.bin"
 MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_0.bin"  # 10.7G
 MODEL_FILENAME = "WizardCoder-15B-1.0.ggmlv3.q4_1.bin"  # 11.9G
 DESTINATION_FOLDER = "models"
+REPO_ID = "TheBloke/m pt-30B-chat-GGML"
 if "WizardCoder" in MODEL_FILENAME:
     REPO_ID = "TheBloke/WizardCoder-15B-1.0-GGML"
 logger.info("done dl")
 # if "mpt" in model_filename:
+#     config = AutoConfig.from_pretrained("mosaicml/m pt-30b-cha t", context_length=8192)
 #     llm = AutoModelForCausalLM.from_pretrained(
 #         os.path.abspath(f"models/{model_filename}"),
 #         model_type="mpt",
 """
 with gr.Blocks(
+    # title="m pt-30b-chat-ggml",
     title=f"{MODEL_FILENAME}",
     theme=gr.themes.Soft(text_size="sm", spacing_size="sm"),
     css=css,
 ) as block:
     with gr.Accordion("🎈 Info", open=False):
         # gr.HTML(
+        #     """<center><a href="https://huggingface.co/spaces/mikeee/m pt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
         # )
         gr.Markdown(
             f"""<h4><center>{MODEL_FILENAME}</center></h4>