Daniel Marques commited on
Commit
61d38da
1 Parent(s): 415fe69

feat: add ministral model

Browse files
Files changed (2) hide show
  1. constants.py +5 -5
  2. prompt_template_utils.py +4 -4
constants.py CHANGED
@@ -32,7 +32,7 @@ CHROMA_SETTINGS = Settings(
32
  )
33
 
34
  # Context Window and Max New Tokens
35
- CONTEXT_WINDOW_SIZE = 2048
36
  MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
37
 
38
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
@@ -98,14 +98,14 @@ EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Ac
98
  #### (FOR GGUF MODELS)
99
  ####
100
 
101
- MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
102
- MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
103
 
104
  # MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
105
  # MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
106
 
107
- # MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
108
- # MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
109
 
110
  # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
111
  # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
 
32
  )
33
 
34
  # Context Window and Max New Tokens
35
+ CONTEXT_WINDOW_SIZE = 3000
36
  MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
37
 
38
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
 
98
  #### (FOR GGUF MODELS)
99
  ####
100
 
101
+ # MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
102
+ # MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
103
 
104
  # MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
105
  # MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
106
 
107
+ MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
108
+ MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
109
 
110
  # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
111
  # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
prompt_template_utils.py CHANGED
@@ -8,9 +8,9 @@ from langchain.memory import ConversationBufferMemory
8
  from langchain.prompts import PromptTemplate
9
  from langchain.memory.chat_message_histories import RedisChatMessageHistory
10
 
11
- message_history = RedisChatMessageHistory(
12
- url="redis://localhost:6379/1", ttl=600, session_id="my-session"
13
- )
14
 
15
  # this is specific to Llama-2.
16
 
@@ -89,7 +89,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
89
  )
90
  prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
91
 
92
- memory = ConversationBufferMemory(input_key="question", memory_key="history", chat_memory=message_history)
93
 
94
  return (
95
  prompt,
 
8
  from langchain.prompts import PromptTemplate
9
  from langchain.memory.chat_message_histories import RedisChatMessageHistory
10
 
11
+ # message_history = RedisChatMessageHistory(
12
+ # url="redis://localhost:6379/1", ttl=600, session_id="my-session"
13
+ # )
14
 
15
  # this is specific to Llama-2.
16
 
 
89
  )
90
  prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
91
 
92
+ memory = ConversationBufferMemory(input_key="question", memory_key="history")
93
 
94
  return (
95
  prompt,