Spaces:

pragneshbarik
/

mixtral-search

Sleeping

pragneshbarik commited on Jan 18

Commit

a9fbba5

•

1 Parent(s): 1ae5964

fixed errors

Files changed (3) hide show

components/generate_chat_stream.py CHANGED Viewed

@@ -3,7 +3,7 @@ from middlewares.utils import gen_augmented_prompt_via_websearch
 from middlewares.chat_client import chat
-def generate_chat_stream(session_state, prompt, config):
     # 1. augments prompt according to the template
     # 2. returns chat_stream and source links
     # 3. chat_stream and source links are used by stream_handler and show_source
@@ -11,8 +11,8 @@ def generate_chat_stream(session_state, prompt, config):
     links = []
     if session_state.rag_enabled:
         with st.spinner("Fetching relevent documents from Web...."):
-            prompt, links = gen_augmented_prompt_via_websearch(
-                prompt=prompt,
                 pre_context=session_state.pre_context,
                 post_context=session_state.post_context,
                 pre_prompt=session_state.pre_prompt,
@@ -25,12 +25,6 @@ def generate_chat_stream(session_state, prompt, config):
             )
     with st.spinner("Generating response..."):
-        chat_stream = chat(
-            prompt,
-            session_state.history,
-            chat_client=chat_bot_dict[session_state.chat_bot],
-            temperature=session_state.temp,
-            max_new_tokens=session_state.max_tokens,
-        )
     return chat_stream, links

 from middlewares.chat_client import chat
+def generate_chat_stream(session_state, query, config):
     # 1. augments prompt according to the template
     # 2. returns chat_stream and source links
     # 3. chat_stream and source links are used by stream_handler and show_source
     links = []
     if session_state.rag_enabled:
         with st.spinner("Fetching relevent documents from Web...."):
+            query, links = gen_augmented_prompt_via_websearch(
+                prompt=query,
                 pre_context=session_state.pre_context,
                 post_context=session_state.post_context,
                 pre_prompt=session_state.pre_prompt,
             )
     with st.spinner("Generating response..."):
+        chat_stream = chat(session_state, query, config)
     return chat_stream, links

config.yaml CHANGED Viewed

@@ -17,4 +17,6 @@ CHAT_BOTS:
   Mistral 7B v0.1: mistralai/Mistral-7B-Instruct-v0.1
   Mistral 7B v0.2: mistralai/Mistral-7B-Instruct-v0.2
 COST_PER_1000_TOKENS_USD: 0.001737375

   Mistral 7B v0.1: mistralai/Mistral-7B-Instruct-v0.1
   Mistral 7B v0.2: mistralai/Mistral-7B-Instruct-v0.2
+CROSS_ENCODERS:
 COST_PER_1000_TOKENS_USD: 0.001737375

middlewares/chat_client.py CHANGED Viewed

@@ -9,7 +9,7 @@ API_TOKEN = os.getenv("HF_TOKEN")
-def format_prompt(session_state,query, history, chat_client):
     if chat_client=="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO" :
         model_input = f"""<|im_start|>system
         {session_state.system_instruction}
@@ -37,22 +37,23 @@ def format_prompt(session_state,query, history, chat_client):
         return model_input
-def chat(
-    prompt,
-    history,
-    chat_client="mistralai/Mistral-7B-Instruct-v0.1",
-    temperature=0.9,
-    max_new_tokens=256,
-    top_p=0.95,
-    repetition_penalty=1.0,
-    truncate = False
-):
     client = InferenceClient(chat_client, token=API_TOKEN)
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
-    top_p = float(top_p)
     generate_kwargs = dict(
         temperature=temperature,
@@ -63,7 +64,7 @@ def chat(
         seed=42,
     )
-    formatted_prompt = format_prompt(prompt, history)
     stream = client.text_generation(
         formatted_prompt,
@@ -71,6 +72,7 @@ def chat(
         stream=True,
         details=True,
         return_full_text=False,
     )
     return stream

+def format_prompt(session_state ,query, history, chat_client):
     if chat_client=="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO" :
         model_input = f"""<|im_start|>system
         {session_state.system_instruction}
         return model_input
+def chat(session_state, query, config):
+    chat_bot_dict = config["CHAT_BOTS"]
+    chat_client = chat_bot_dict[session_state.chat_bot]
+    temperature = session_state.temp
+    max_new_tokens = session_state.max_tokens
+    repetition_penalty = session_state.repetition_penalty
+    history = session_state.history
     client = InferenceClient(chat_client, token=API_TOKEN)
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
+    top_p = float(0.95)
     generate_kwargs = dict(
         temperature=temperature,
         seed=42,
     )
+    formatted_prompt = format_prompt(session_state, query, history, chat_client)
     stream = client.text_generation(
         formatted_prompt,
         stream=True,
         details=True,
         return_full_text=False,
+        truncate = 32000
     )
     return stream