Spaces:
Runtime error
Runtime error
theekshana
commited on
Commit
•
2bb1521
1
Parent(s):
7bba285
local llama2
Browse files- __pycache__/config.cpython-311.pyc +0 -0
- __pycache__/qaPipeline.cpython-311.pyc +0 -0
- app.py +4 -1
- config.py +1 -0
- qaPipeline.py +39 -1
- requirements.txt +4 -1
__pycache__/config.cpython-311.pyc
CHANGED
Binary files a/__pycache__/config.cpython-311.pyc and b/__pycache__/config.cpython-311.pyc differ
|
|
__pycache__/qaPipeline.cpython-311.pyc
CHANGED
Binary files a/__pycache__/qaPipeline.cpython-311.pyc and b/__pycache__/qaPipeline.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -3,6 +3,8 @@ Python Backend API to chat with private data
|
|
3 |
|
4 |
08/16/2023
|
5 |
D.M. Theekshana Samaradiwakara
|
|
|
|
|
6 |
"""
|
7 |
|
8 |
import os
|
@@ -229,7 +231,8 @@ def handle_userinput(query):
|
|
229 |
|
230 |
except Exception as e:
|
231 |
# logger.error(f"Answer retrieval failed with {e}")
|
232 |
-
st.error(f"
|
|
|
233 |
return
|
234 |
|
235 |
|
|
|
3 |
|
4 |
08/16/2023
|
5 |
D.M. Theekshana Samaradiwakara
|
6 |
+
|
7 |
+
python -m streamlit run app.py
|
8 |
"""
|
9 |
|
10 |
import os
|
|
|
231 |
|
232 |
except Exception as e:
|
233 |
# logger.error(f"Answer retrieval failed with {e}")
|
234 |
+
st.error(f"Error ocuured! see log info for more details.")#, icon=":books:")
|
235 |
+
print(f"Streamlit handle_userinput Error : {e}")#, icon=":books:")
|
236 |
return
|
237 |
|
238 |
|
config.py
CHANGED
@@ -6,6 +6,7 @@ MODELS={
|
|
6 |
"openai gpt-3.5":"openai",
|
7 |
# "Deci/DeciLM-6b-instruct":"Deci/DeciLM-6b-instruct",
|
8 |
# "Deci/DeciLM-6b":"Deci/DeciLM-6b",
|
|
|
9 |
|
10 |
}
|
11 |
|
|
|
6 |
"openai gpt-3.5":"openai",
|
7 |
# "Deci/DeciLM-6b-instruct":"Deci/DeciLM-6b-instruct",
|
8 |
# "Deci/DeciLM-6b":"Deci/DeciLM-6b",
|
9 |
+
"local/LLAMA2":"local/LLAMA2",
|
10 |
|
11 |
}
|
12 |
|
qaPipeline.py
CHANGED
@@ -69,6 +69,41 @@ class Singleton:
|
|
69 |
else:
|
70 |
Singleton.__instance = QAPipeline()
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
class QAPipeline:
|
73 |
|
74 |
def __init__(self):
|
@@ -134,7 +169,6 @@ class QAPipeline:
|
|
134 |
return
|
135 |
|
136 |
|
137 |
-
|
138 |
def set_model(self,model_type):
|
139 |
if model_type != self.llm_name:
|
140 |
match model_type:
|
@@ -152,12 +186,16 @@ class QAPipeline:
|
|
152 |
self.llm = ChatOpenAI(model_name="Deci/DeciLM-6b-instruct", temperature=0)
|
153 |
case "Deci/DeciLM-6b":
|
154 |
self.llm = ChatOpenAI(model_name="Deci/DeciLM-6b", temperature=0)
|
|
|
|
|
155 |
case _default:
|
156 |
# raise exception if model_type is not supported
|
157 |
raise Exception(f"Model type {model_type} is not supported. Please choose a valid one")
|
158 |
|
159 |
self.llm_name = model_type
|
160 |
|
|
|
|
|
161 |
def set_vectorstore(self, dataset):
|
162 |
if dataset != self.dataset_name:
|
163 |
# self.vectorstore = load_store(dataset)
|
|
|
69 |
else:
|
70 |
Singleton.__instance = QAPipeline()
|
71 |
|
72 |
+
def get_local_LLAMA2():
|
73 |
+
import torch
|
74 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
75 |
+
|
76 |
+
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-13b-chat-hf",
|
77 |
+
# use_auth_token=True,
|
78 |
+
)
|
79 |
+
|
80 |
+
model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-13b-chat-hf",
|
81 |
+
device_map='auto',
|
82 |
+
torch_dtype=torch.float16,
|
83 |
+
use_auth_token=True,
|
84 |
+
# load_in_8bit=True,
|
85 |
+
# load_in_4bit=True
|
86 |
+
)
|
87 |
+
from transformers import pipeline
|
88 |
+
|
89 |
+
pipe = pipeline("text-generation",
|
90 |
+
model=model,
|
91 |
+
tokenizer= tokenizer,
|
92 |
+
torch_dtype=torch.bfloat16,
|
93 |
+
device_map="auto",
|
94 |
+
max_new_tokens = 512,
|
95 |
+
do_sample=True,
|
96 |
+
top_k=30,
|
97 |
+
num_return_sequences=1,
|
98 |
+
eos_token_id=tokenizer.eos_token_id
|
99 |
+
)
|
100 |
+
|
101 |
+
from langchain import HuggingFacePipeline
|
102 |
+
LLAMA2 = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
|
103 |
+
print(f"\n\n> torch.cuda.is_available(): {torch.cuda.is_available()}")
|
104 |
+
print("\n\n> local LLAMA2 loaded")
|
105 |
+
return LLAMA2
|
106 |
+
|
107 |
class QAPipeline:
|
108 |
|
109 |
def __init__(self):
|
|
|
169 |
return
|
170 |
|
171 |
|
|
|
172 |
def set_model(self,model_type):
|
173 |
if model_type != self.llm_name:
|
174 |
match model_type:
|
|
|
186 |
self.llm = ChatOpenAI(model_name="Deci/DeciLM-6b-instruct", temperature=0)
|
187 |
case "Deci/DeciLM-6b":
|
188 |
self.llm = ChatOpenAI(model_name="Deci/DeciLM-6b", temperature=0)
|
189 |
+
case "local/LLAMA2":
|
190 |
+
self.llm = get_local_LLAMA2()
|
191 |
case _default:
|
192 |
# raise exception if model_type is not supported
|
193 |
raise Exception(f"Model type {model_type} is not supported. Please choose a valid one")
|
194 |
|
195 |
self.llm_name = model_type
|
196 |
|
197 |
+
|
198 |
+
|
199 |
def set_vectorstore(self, dataset):
|
200 |
if dataset != self.dataset_name:
|
201 |
# self.vectorstore = load_store(dataset)
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
langchain == 0.0.287
|
2 |
# openai == 0.27.8
|
3 |
# streamlit == 1.25.0
|
4 |
|
@@ -21,3 +21,6 @@ huggingface-hub
|
|
21 |
sentence_transformers
|
22 |
|
23 |
python-dotenv
|
|
|
|
|
|
|
|
1 |
+
# langchain == 0.0.287
|
2 |
# openai == 0.27.8
|
3 |
# streamlit == 1.25.0
|
4 |
|
|
|
21 |
sentence_transformers
|
22 |
|
23 |
python-dotenv
|
24 |
+
|
25 |
+
#local llama2
|
26 |
+
accelerate
|