Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -28,11 +28,11 @@ def initialize_llm(hf_token):
|
|
28 |
return HuggingFaceLLM(
|
29 |
model_name = model_name, #meta-llama/Meta-Llama-3-8B-Instruct meta-llama/Llama-2-7b-chat-hf #google/gemma-7b-it #HuggingFaceH4/zephyr-7b-beta #'GeneZC/MiniChat-2-3B'
|
30 |
tokenizer_name = model_name,
|
31 |
-
context_window=
|
32 |
# model_kwargs={"token": hf_token, "quantization_config": quantization_config},
|
33 |
model_kwargs={"token": hf_token},
|
34 |
tokenizer_kwargs={"token": hf_token},
|
35 |
-
max_new_tokens=
|
36 |
device_map="auto",
|
37 |
)
|
38 |
|
@@ -72,7 +72,7 @@ def handle_query(user_prompt, llm):
|
|
72 |
verbose=False,
|
73 |
)
|
74 |
|
75 |
-
MEMORY_THRESHOLD =
|
76 |
|
77 |
if 'memory' not in st.session_state:
|
78 |
st.session_state.memory = ChatMemoryBuffer.from_defaults(token_limit=MEMORY_THRESHOLD)
|
@@ -125,7 +125,7 @@ def main():
|
|
125 |
|
126 |
if 'index' not in st.session_state:
|
127 |
# Load or create index
|
128 |
-
index, all_nodes = load_or_create_index(embed_model, directories, persist_dir)
|
129 |
st.session_state.index = index
|
130 |
st.session_state.all_nodes_dict = {n.node_id: n for n in all_nodes}
|
131 |
|
@@ -147,7 +147,7 @@ def main():
|
|
147 |
user_prompt = st.chat_input("Ask me anything:")
|
148 |
if user_prompt:
|
149 |
st.session_state.messages.append({'role': 'user', "content": user_prompt})
|
150 |
-
response = handle_query(user_prompt, llm)
|
151 |
response = response.response
|
152 |
st.session_state.messages.append({'role': 'assistant', "content": response})
|
153 |
|
|
|
28 |
return HuggingFaceLLM(
|
29 |
model_name = model_name, #meta-llama/Meta-Llama-3-8B-Instruct meta-llama/Llama-2-7b-chat-hf #google/gemma-7b-it #HuggingFaceH4/zephyr-7b-beta #'GeneZC/MiniChat-2-3B'
|
30 |
tokenizer_name = model_name,
|
31 |
+
context_window=1900,
|
32 |
# model_kwargs={"token": hf_token, "quantization_config": quantization_config},
|
33 |
model_kwargs={"token": hf_token},
|
34 |
tokenizer_kwargs={"token": hf_token},
|
35 |
+
max_new_tokens=300,
|
36 |
device_map="auto",
|
37 |
)
|
38 |
|
|
|
72 |
verbose=False,
|
73 |
)
|
74 |
|
75 |
+
MEMORY_THRESHOLD = 1900
|
76 |
|
77 |
if 'memory' not in st.session_state:
|
78 |
st.session_state.memory = ChatMemoryBuffer.from_defaults(token_limit=MEMORY_THRESHOLD)
|
|
|
125 |
|
126 |
if 'index' not in st.session_state:
|
127 |
# Load or create index
|
128 |
+
index, all_nodes = load_or_create_index(st.session_state.embed_model, directories, persist_dir)
|
129 |
st.session_state.index = index
|
130 |
st.session_state.all_nodes_dict = {n.node_id: n for n in all_nodes}
|
131 |
|
|
|
147 |
user_prompt = st.chat_input("Ask me anything:")
|
148 |
if user_prompt:
|
149 |
st.session_state.messages.append({'role': 'user', "content": user_prompt})
|
150 |
+
response = handle_query(user_prompt, st.session_state.llm)
|
151 |
response = response.response
|
152 |
st.session_state.messages.append({'role': 'assistant', "content": response})
|
153 |
|