Daniel Marques commited on
Commit
2084d31
1 Parent(s): d7147ea

feat: add websocket

Browse files
Files changed (3) hide show
  1. constants.py +2 -2
  2. load_models.py +2 -0
  3. prompt_template_utils.py +6 -7
constants.py CHANGED
@@ -32,13 +32,13 @@ CHROMA_SETTINGS = Settings(
32
  )
33
 
34
  # Context Window and Max New Tokens
35
- CONTEXT_WINDOW_SIZE = 4096
36
  MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
37
 
38
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
39
 
40
  N_GPU_LAYERS = 40 # Llama-2-70B has 83 layers
41
- N_BATCH = 512
42
 
43
  ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
44
  # N_GPU_LAYERS = 20
 
32
  )
33
 
34
  # Context Window and Max New Tokens
35
+ CONTEXT_WINDOW_SIZE = 2048
36
  MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
37
 
38
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
39
 
40
  N_GPU_LAYERS = 40 # Llama-2-70B has 83 layers
41
+ N_BATCH = 1024
42
 
43
  ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
44
  # N_GPU_LAYERS = 20
load_models.py CHANGED
@@ -58,6 +58,8 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
58
  "model_path": model_path,
59
  "n_ctx": CONTEXT_WINDOW_SIZE,
60
  "max_tokens": MAX_NEW_TOKENS,
 
 
61
  # set this based on your GPU & CPU RAM
62
  }
63
  if device_type.lower() == "mps":
 
58
  "model_path": model_path,
59
  "n_ctx": CONTEXT_WINDOW_SIZE,
60
  "max_tokens": MAX_NEW_TOKENS,
61
+ "n_batch": MAX_NEW_TOKENS,
62
+
63
  # set this based on your GPU & CPU RAM
64
  }
65
  if device_type.lower() == "mps":
prompt_template_utils.py CHANGED
@@ -9,15 +9,14 @@ from langchain.prompts import PromptTemplate
9
 
10
  # this is specific to Llama-2.
11
 
12
- # system_prompt = """You are a helpful assistant, you will use the context and documents provided in the training to answer users questions.
13
- # Read the context provided before answering questions and think step by step. If you can't answer a user's question based on the
14
- # context provided, inform the user. Don't use any other information to answer the user."""
15
 
16
  # system_prompt = """You are a helpful assistant, and you will use the context and documents provided in the training to answer users' questions. Please read the context provided carefully before responding to questions and follow a step-by-step thought process. If you cannot answer a user's question based on the provided context, please inform the user. Do not use any other information to answer the user. Provide a detailed response based on the content of locally trained documents."""
17
 
18
- system_prompt = """It's a useful assistant that will use the context and documents provided in the training to answer users' questions.
19
- Read the context provided before answering the questions and think step by step. Your answer cannot be more than 2000 words long.
20
- If you can't answer, just say "I don't know" and don't try to work out an answer to respond to the user."""
21
 
22
  def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
23
  if promptTemplate_type == "llama":
@@ -85,7 +84,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
85
  )
86
  prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
87
 
88
- memory = ConversationBufferMemory(input_key="question", memory_key="history", max_token_limit=10)
89
 
90
  return (
91
  prompt,
 
9
 
10
  # this is specific to Llama-2.
11
 
12
+ system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
13
+ Read the given context before answering questions and think step by step. If you can not answer a user question based on
14
+ the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
15
 
16
  # system_prompt = """You are a helpful assistant, and you will use the context and documents provided in the training to answer users' questions. Please read the context provided carefully before responding to questions and follow a step-by-step thought process. If you cannot answer a user's question based on the provided context, please inform the user. Do not use any other information to answer the user. Provide a detailed response based on the content of locally trained documents."""
17
 
18
+ # system_prompt = """It's a useful assistant that will use the context and documents provided in the training to answer users' questions.
19
+ # Read the context provided before answering the questions and think step by step. If you can't answer, just say "I don't know" and don't try to work out an answer to respond to the user."""
 
20
 
21
  def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
22
  if promptTemplate_type == "llama":
 
84
  )
85
  prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
86
 
87
+ memory = ConversationBufferMemory(input_key="question", memory_key="history")
88
 
89
  return (
90
  prompt,