saiga_mistral_7b_gguf

Sleeping

App Files Files Community

IlyaGusev commited on Jan 14

Commit

d42be36

•

1 Parent(s): cc2ccba

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -30

app.py CHANGED Viewed

@@ -26,34 +26,27 @@ def get_system_tokens(model):
     return get_message_tokens(model, **system_message)
-directory = "."
-model_url = "https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf/resolve/main/model-q4_K.gguf"
-model_name = "model-q4_K.gguf"
-final_model_path = os.path.join(directory, model_name)
-print("Downloading all files...")
-rm_files = [os.path.join(directory, f) for f in os.listdir(directory)]
-for f in rm_files:
-   if os.path.isfile(f):
-       os.remove(f)
-   else:
-       shutil.rmtree(f)
-if not os.path.exists(final_model_path):
-    with open(final_model_path, "wb") as f:
-        http_get(model_url, f)
-os.chmod(final_model_path, 0o777)
-print("Files downloaded!")
-model = Llama(
-    model_path=final_model_path,
-    verbose=True,
-    use_mmap=True,
-    use_mlock=False,
-    n_ctx=2000,
-)
-print("Model loaded!")
-max_new_tokens = 1500
 def user(message, history):
@@ -92,7 +85,7 @@ def bot(
     partial_text = ""
     for i, token in enumerate(generator):
-        if token == model.token_eos() or (max_new_tokens is not None and i >= max_new_tokens):
             break
         partial_text += model.detokenize([token]).decode("utf-8", "ignore")
         history[-1][1] = partial_text
@@ -116,7 +109,7 @@ with gr.Blocks(
     with gr.Row():
         with gr.Column(scale=5):
             system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
-            chatbot = gr.Chatbot(label="Диалог").style(height=400)
         with gr.Column(min_width=80, scale=1):
             with gr.Tab(label="Параметры генерации"):
                 top_p = gr.Slider(

     return get_message_tokens(model, **system_message)
+def load_model(
+    directory: str = ".",
+    model_name: str = "model-q4_K.gguf",
+    model_url: str = "https://huggingface.co/IlyaGusev/saiga_mistral_7b_gguf/resolve/main/model-q4_K.gguf"
+):
+    final_model_path = os.path.join(directory, model_name)
+    print("Downloading all files...")
+    if not os.path.exists(final_model_path):
+        with open(final_model_path, "wb") as f:
+            http_get(model_url, f)
+    os.chmod(final_model_path, 0o777)
+    print("Files downloaded!")
+    model = Llama(
+        model_path=final_model_path,
+        n_ctx=2048
+    )
+    print("Model loaded!")
+    return model
 def user(message, history):
     partial_text = ""
     for i, token in enumerate(generator):
+        if token == model.token_eos():
             break
         partial_text += model.detokenize([token]).decode("utf-8", "ignore")
         history[-1][1] = partial_text
     with gr.Row():
         with gr.Column(scale=5):
             system_prompt = gr.Textbox(label="Системный промпт", placeholder="", value=SYSTEM_PROMPT, interactive=False)
+            chatbot = gr.Chatbot(label="Диалог", height=400)
         with gr.Column(min_width=80, scale=1):
             with gr.Tab(label="Параметры генерации"):
                 top_p = gr.Slider(