Mahadih534 commited on
Commit
5c8fa79
1 Parent(s): a843b1b

file removed

Browse files
Files changed (1) hide show
  1. app.py +0 -130
app.py DELETED
@@ -1,130 +0,0 @@
1
- from huggingface_hub import hf_hub_download
2
- import logging
3
- import sys
4
- import gradio as gr
5
- from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt
6
- from llama_index.llms import LlamaCPP
7
- from llama_index.llms.llama_utils import (
8
- messages_to_prompt,
9
- completion_to_prompt,
10
- )
11
-
12
- MODELS_PATH = "./models"
13
-
14
- mistral_model_path = hf_hub_download(
15
- repo_id= "TheBloke/Mistral-7B-Instruct-v0.2-GGUF",
16
- filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf",
17
- resume_download=True,
18
- cache_dir=MODELS_PATH,)
19
-
20
-
21
- llm = LlamaCPP(
22
- # You can pass in the URL to a GGML model to download it automatically
23
- # model_url=model_url,
24
- # optionally, you can set the path to a pre-downloaded model instead of model_url
25
- model_path=mistral_model_path,
26
- temperature=0.1,
27
- max_new_tokens=256,
28
- # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
29
- context_window=3900,
30
- # kwargs to pass to __call__()
31
- generate_kwargs={},
32
- # kwargs to pass to __init__()
33
- # set to at least 1 to use GPU
34
- model_kwargs={"n_gpu_layers": -1},
35
- # transform inputs into Llama2 format
36
- messages_to_prompt=messages_to_prompt,
37
- completion_to_prompt=completion_to_prompt,
38
- verbose=True,
39
- )
40
-
41
-
42
- def model_initialization(model):
43
- if(model !=""):
44
- gr.Info("model downloading and configuration process has been started, please wait...")
45
- MODELS_PATH = "./models"
46
- repo_id=""
47
- filename=""
48
- if(model=="Llama-2-13B-chat"):
49
- repo_id="TheBloke/Llama-2-13B-chat-GGUF"
50
- filename="llama-2-13b-chat.Q4_K_M.gguf"
51
- elif(model=="Mistral-7B-Instruct-v0.2") :
52
- repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
53
- filename="mistral-7b-instruct-v0.2.Q4_K_M.gguf"
54
- elif(model=="zephyr-7B-beta"):
55
- repo_id="TheBloke/zephyr-7B-beta-GGUF "
56
- filename="zephyr-7b-beta.Q4_K_M.gguf"
57
- elif(model=="vicuna-7B-v1.5"):
58
- repo_id="TheBloke/vicuna-7B-v1.5-GGUF"
59
- filename="vicuna-7b-v1.5.Q4_K_M.gguf"
60
- elif(model=="Falcon-7B-Instruct"):
61
- repo_id="TheBloke/Falcon-7B-Instruct-GGML"
62
- filename="falcon-7b-instruct.ggccv1.q4_1.bin"
63
- elif(model=="CodeLlama-7B"):
64
- repo_id="TheBloke/CodeLlama-7B-GGUF"
65
- filename="codellama-7b.Q4_K_M.gguf"
66
- else:
67
- gr.Warning("please select at least one model")
68
-
69
-
70
- mistral_model_path = hf_hub_download(
71
- repo_id= repo_id,
72
- filename= filename,
73
- resume_download=True,
74
- cache_dir=MODELS_PATH,)
75
-
76
- llm = LlamaCPP(
77
- # You can pass in the URL to a GGML model to download it automatically
78
- # model_url=model_url,
79
- # optionally, you can set the path to a pre-downloaded model instead of model_url
80
- model_path=mistral_model_path,
81
- temperature=0.1,
82
- max_new_tokens=256,
83
- # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
84
- context_window=3900,
85
- # kwargs to pass to __call__()
86
- generate_kwargs={},
87
- # set to at least 1 to use GPU
88
- model_kwargs={"n_gpu_layers": -1},
89
- # transform inputs into Llama2 format
90
- messages_to_prompt=messages_to_prompt,
91
- completion_to_prompt=completion_to_prompt,
92
- verbose=True,
93
- )
94
- gr.Info("model has been configured and ready to chat")
95
- return "model has been configured and ready to chat, your current model is "+model
96
-
97
- def predict(message, history):
98
- messages = []
99
- answer = []
100
- response = llm.stream_complete(message)
101
- for bot_response in response:
102
- token = bot_response.delta
103
- answer.append(token)
104
- final_answer = " ".join(answer)
105
- yield final_answer
106
-
107
- with gr.Blocks() as UI:
108
-
109
- models=gr.Dropdown(["CodeLlama-7B","Llama-2-13B-chat","Falcon-7B-Instruct" "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
110
- "vicuna-7B-v1.5"],value=["CodeLlama-7B","Llama-2-13B-chat","Falcon-7B-Instruct" "Mistral-7B-Instruct-v0.2", "zephyr-7B-beta",
111
- "vicuna-7B-v1.5"], label="please select at least one model", info="default model is Mistral-7B-Instruct-v0.2")
112
- textInfo = gr.Textbox(value="current model is Mistral-7B-Instruct-v0.2",label="Model Status");
113
- # Chatbot interface
114
- chatUI= gr.ChatInterface(
115
- predict,
116
- title="Open Source LLM ChatBot",
117
- description="Ask any question",
118
- theme="soft",
119
- examples=["Hello", "are you LLM model?", "how can i finetune a pre-trained LLM model?","How can i build a chatbot using local open-souce LLM ?"],
120
- cache_examples=False,
121
- submit_btn="Send Message",
122
- retry_btn=None,
123
- undo_btn="Delete Previous",
124
- clear_btn="Clear",
125
- )
126
-
127
- models.change(fn=model_initialization,inputs=[models],outputs=[textInfo])
128
-
129
- if __name__ == "__main__":
130
- UI.launch(debug=True) # launch app