Roger Condori commited on
Commit
85c5bca
1 Parent(s): 55c1635

change default model and added limits for demo

Browse files
Files changed (1) hide show
  1. conversadocs/bones.py +7 -4
conversadocs/bones.py CHANGED
@@ -100,7 +100,7 @@ class DocChat(param.Parameterized):
100
  super(DocChat, self).__init__( **params)
101
  self.loaded_file = ["demo_docs/demo.txt"]
102
  self.db = load_db(self.loaded_file)
103
- self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q5_1.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
104
  self.qa = q_a(self.db, "stuff", self.k_value, self.llm)
105
 
106
 
@@ -133,7 +133,7 @@ class DocChat(param.Parameterized):
133
  result = self.qa({"question": query, "chat_history": self.chat_history})
134
  except:
135
  print("Error not get response from model, reloaded default llama-2 7B config")
136
- self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q5_1.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
137
  self.qa = q_a(self.db, "stuff", k_max, self.llm)
138
  result = self.qa({"question": query, "chat_history": self.chat_history})
139
 
@@ -145,6 +145,9 @@ class DocChat(param.Parameterized):
145
 
146
  def summarize(self, chunk_size=2000, chunk_overlap=100):
147
  # load docs
 
 
 
148
  documents = []
149
  for file in self.loaded_file:
150
  ext = "." + file.rsplit(".", 1)[-1]
@@ -196,7 +199,7 @@ class DocChat(param.Parameterized):
196
  self.k_value = k
197
  return f"Loaded {file_} [GPU INFERENCE]"
198
  except:
199
- self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q5_1.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
200
  return "No valid model | Reloaded Reloaded default llama-2 7B config"
201
  else:
202
  try:
@@ -222,7 +225,7 @@ class DocChat(param.Parameterized):
222
  self.k_value = k
223
  return f"Loaded {file_} [CPU INFERENCE SLOW]"
224
  except:
225
- self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q5_1.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
226
  return "No valid model | Reloaded default llama-2 7B config"
227
 
228
  def default_falcon_model(self, HF_TOKEN):
 
100
  super(DocChat, self).__init__( **params)
101
  self.loaded_file = ["demo_docs/demo.txt"]
102
  self.db = load_db(self.loaded_file)
103
+ self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
104
  self.qa = q_a(self.db, "stuff", self.k_value, self.llm)
105
 
106
 
 
133
  result = self.qa({"question": query, "chat_history": self.chat_history})
134
  except:
135
  print("Error not get response from model, reloaded default llama-2 7B config")
136
+ self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
137
  self.qa = q_a(self.db, "stuff", k_max, self.llm)
138
  result = self.qa({"question": query, "chat_history": self.chat_history})
139
 
 
145
 
146
  def summarize(self, chunk_size=2000, chunk_overlap=100):
147
  # load docs
148
+ if "SET_LIMIT" == os.getenv("DEMO"):
149
+ return "Since the space only uses the CPU, the summarization function cannot be used."
150
+
151
  documents = []
152
  for file in self.loaded_file:
153
  ext = "." + file.rsplit(".", 1)[-1]
 
199
  self.k_value = k
200
  return f"Loaded {file_} [GPU INFERENCE]"
201
  except:
202
+ self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
203
  return "No valid model | Reloaded Reloaded default llama-2 7B config"
204
  else:
205
  try:
 
225
  self.k_value = k
226
  return f"Loaded {file_} [CPU INFERENCE SLOW]"
227
  except:
228
+ self.change_llm("TheBloke/Llama-2-7B-Chat-GGML", "llama-2-7b-chat.ggmlv3.q2_K.bin", max_tokens=256, temperature=0.2, top_p=0.95, top_k=50, repeat_penalty=1.2, k=3)
229
  return "No valid model | Reloaded default llama-2 7B config"
230
 
231
  def default_falcon_model(self, HF_TOKEN):