Spaces:

SergeyHakim
/

HFchat

Runtime error

App Files Files Community

SergeyHakim commited on Nov 2, 2024

Commit

e8d9a2a

verified ·

1 Parent(s): 2db6d6a

Upload 4 files

Browse files

Files changed (4) hide show

README.md +12 -0
app.py +179 -0
gitattributes +35 -0
requirements.txt +11 -0

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Open Source LLM Chatbot
+emoji: 🤖
+colorFrom: indigo
+colorTo: yellow
+sdk: gradio
+sdk_version: 4.14.0
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,179 @@

+from huggingface_hub import hf_hub_download
+import logging
+import sys
+import gradio as gr
+from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
+from llama_index.llms import LlamaCPP
+from llama_index.llms.llama_utils import (
+    messages_to_prompt,
+    completion_to_prompt,
+)
+def download(model):
+    repo_id = model_info[model]["repo_id"]
+    filename = model_info[model]["filename"]
+    model_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=filename,
+        resume_download=True,
+        cache_dir=MODELS_PATH,
+    )
+    return model_path
+MODELS_PATH = "./models"
+models = ["Llama-2-13B-chat", "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta", "vicuna-7B-v1.5", "CodeLlama-7B"]
+model_info = {
+    "Llama-2-13B-chat": {
+        "repo_id": "TheBloke/Llama-2-13B-chat-GGUF",
+        "filename": "llama-2-13b-chat.Q4_K_M.gguf",
+    },
+    "Mistral-7B-Instruct-v0.2": {
+        "repo_id": "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
+        "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+    },
+    "zephyr-7B-beta": {
+        "repo_id": "TheBloke/zephyr-7B-beta-GGUF",
+        "filename": "zephyr-7b-beta.Q4_K_M.gguf",
+    },
+    "vicuna-7B-v1.5": {
+        "repo_id": "TheBloke/vicuna-7B-v1.5-GGUF",
+        "filename": "vicuna-7b-v1.5.Q4_K_M.gguf",
+    },
+    "CodeLlama-7B": {
+        "repo_id": "TheBloke/CodeLlama-7B-GGUF",
+        "filename": "codellama-7b.Q4_K_M.gguf",
+    },
+#    "Falcon-7B-Instruct": {
+#        "repo_id": "TheBloke/Falcon-7B-Instruct-GGML",
+#        "filename": "falcon-7b-instruct.ggccv1.q4_1.bin",
+#    },
+}
+for model_name in models:
+    download(model_name)
+mistral_model_path = hf_hub_download(
+    repo_id= "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
+    filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+    resume_download=True,
+    cache_dir=MODELS_PATH,)
+"""Step 3 : if you use GPU then make sure ( n_gpu_layers":1) at least 1, you can increase or decrease it based on your GPU performance"""
+llm = LlamaCPP(
+    # You can pass in the URL to a GGML model to download it automatically
+    # model_url=model_url,
+    # optionally, you can set the path to a pre-downloaded model instead of model_url
+    model_path=mistral_model_path,
+    temperature=0.1,
+    max_new_tokens=256,
+    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
+    context_window=3900,
+    # kwargs to pass to __call__()
+    generate_kwargs={},
+    # kwargs to pass to __init__()
+    # set to at least 1 to use GPU
+    model_kwargs={"n_gpu_layers": -1},
+    # transform inputs into Llama2 format
+    messages_to_prompt=messages_to_prompt,
+    completion_to_prompt=completion_to_prompt,
+    verbose=True,
+)
+def model_initialization(model):
+    if(model !=""):
+       gr.Info("model downloading and configuration process has been started, please wait...")
+    MODELS_PATH = "./models"
+    repo_id=""
+    filename=""
+    if(model=="Llama-2-13B-chat"):
+      repo_id="TheBloke/Llama-2-13B-chat-GGUF"
+      filename="llama-2-13b-chat.Q4_K_M.gguf"
+    elif(model=="Mistral-7B-Instruct-v0.2") :
+      repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+      filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+    elif(model=="zephyr-7B-beta"):
+      repo_id="TheBloke/zephyr-7B-beta-GGUF "
+      filename="zephyr-7b-beta.Q4_K_M.gguf"
+    elif(model=="vicuna-7B-v1.5"):
+      repo_id="TheBloke/vicuna-7B-v1.5-GGUF"
+      filename="vicuna-7b-v1.5.Q4_K_M.gguf"
+#    elif(model=="Falcon-7B-Instruct"):
+#      repo_id="TheBloke/Falcon-7B-Instruct-GGML"
+#      filename="falcon-7b-instruct.ggccv1.q4_1.bin"
+    elif(model=="CodeLlama-7B"):
+      repo_id="TheBloke/CodeLlama-7B-GGUF"
+      filename="codellama-7b.Q4_K_M.gguf"
+    else:
+      gr.Warning("please select at least one model")
+    mistral_model_path = hf_hub_download(
+    repo_id= repo_id,
+    filename= filename,
+    resume_download=True,
+    cache_dir=MODELS_PATH,)
+    llm = LlamaCPP(
+    # You can pass in the URL to a GGML model to download it automatically
+    # model_url=model_url,
+    # optionally, you can set the path to a pre-downloaded model instead of model_url
+    model_path=mistral_model_path,
+    temperature=0.1,
+    max_new_tokens=256,
+    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
+    context_window=3900,
+    # kwargs to pass to __call__()
+    generate_kwargs={},
+    # set to at least 1 to use GPU
+    model_kwargs={"n_gpu_layers": -1},
+    # transform inputs into Llama2 format
+    messages_to_prompt=messages_to_prompt,
+    completion_to_prompt=completion_to_prompt,
+    verbose=True,
+)
+    gr.Info("model has been configured and ready to chat")
+    return "model has been configured and ready to chat, your current model is "+model
+def predict(message, history):
+    messages = []
+    answer = []
+    response = llm.stream_complete(message)
+    for bot_response in response:
+        token = bot_response.delta
+        answer.append(token)
+        final_answer = " ".join(answer)
+        yield final_answer
+with gr.Blocks() as UI:
+         models=gr.Dropdown(["CodeLlama-7B","Llama-2-13B-chat" ,"Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
+                       "vicuna-7B-v1.5"],value=["CodeLlama-7B","Llama-2-13B-chat", "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
+                       "vicuna-7B-v1.5"], label="please select at least one model", info="default model is Mistral-7B-Instruct-v0.2")
+         textInfo = gr.Textbox(value="current model is Mistral-7B-Instruct-v0.2",label="Model Status");
+          # Chatbot interface
+         chatUI= gr.ChatInterface(
+                            predict,
+                            title="Open Source LLM ChatBot",
+                            description="Ask any question",
+                            theme="soft",
+                            examples=["Hello", "are you LLM model?", "how can i finetune a pre-trained LLM model?","How can i build a chatbot using local open-souce LLM ?"],
+                            cache_examples=False,
+                            submit_btn="Send Message",
+                            retry_btn=None,
+                            undo_btn="Delete Previous",
+                            clear_btn="Clear",
+                        )
+         models.change(fn=model_initialization,inputs=[models],outputs=[textInfo])
+if __name__ == "__main__":
+    UI.launch(debug=True) #

gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+# GRADIO
+gradio
+# python binding for LLM
+llama-cpp-python
+# to download model
+huggingface_hub
+# llamaindex
+llama-index