Spaces:

LVKinyanjui
/

QueryYourDocs

Sleeping

LVKinyanjui commited on Oct 24, 2024

Commit

71c54ff

1 Parent(s): 8790464

Implemented llm chat history, modified model inference module to try resolve import errors

Files changed (2) hide show

inference_main.py CHANGED Viewed

@@ -1,10 +1,16 @@
 import streamlit as st
-from modules.inference.instruct import infer
 st.write("## Ask your Local LLM")
 text_input = st.text_input("Query", value="Why is the sky Blue")
 submit = st.button("Submit")
 if submit:
-    response = infer(text_input)
     response

 import streamlit as st
+from modules.inference.instruct import infer, load_model
 st.write("## Ask your Local LLM")
 text_input = st.text_input("Query", value="Why is the sky Blue")
 submit = st.button("Submit")
+@st.cache_resource
+def load_model_cached():
+    return load_model()
+model = load_model_cached()
 if submit:
+    response = infer(model, text_input)
     response

modules/inference/instruct.py CHANGED Viewed

@@ -41,46 +41,44 @@ def load_model():
     )
     return pipeline
-pipeline = load_model()
-message_store_path = "messages.jsonl"
-messages: list[dict] = [
-    {"role": "system", "content": SYSTEM_MESSAGE},
-]
-if os.path.exists(message_store_path):
-    with open(message_store_path, "r", encoding="utf-8") as f:
-        messages = [json.loads(line) for line in f]
-    print(messages)
-def infer(message: str):
     """
     Params:
         message: Most recent query to the llm.
     """
     messages.append({"role": "user", "content": message})
     # Perfom inference
-    output =  pipeline(
         messages,
-        max_new_tokens=MAX_NEW_TOKENS)
-    output_text = output[-1]['generated_text'][-1]['content']
     # Save the newly updated messages object
-    with open(message_store_path, "w", encoding="utf-8") as f:
         for line in output:
             json.dump(line, f)
             f.write("\n")
-    return output_text
 if __name__ == "__main__":
     while True:
         print("Press Ctrl + C to exit.")
         message = input("Ask a question.")
-        print(infer(message))
         print("---------------------------------------")
         print("\n\n")

     )
     return pipeline
+def infer(model, message: str, n_output_tokens=256, message_store_path: str = "messages.jsonl"):
     """
     Params:
         message: Most recent query to the llm.
+        messages: Chat history up to current point properly formatted like
+            {"role": "user", "content": "What is your name?"}
     """
+    if os.path.exists(message_store_path):
+        with open(message_store_path, "r", encoding="utf-8") as f:
+            messages = [json.loads(line) for line in f]
+    else:
+        messages = [
+            {"role": "system", "content": SYSTEM_MESSAGE},
+        ]
     messages.append({"role": "user", "content": message})
+    print(messages)
     # Perfom inference
+    outputs =  model(
         messages,
+        max_new_tokens=n_output_tokens)
+    output: list = outputs[0]["generated_text"]
     # Save the newly updated messages object
+    with open(message_store_path, "a", encoding="utf-8") as f:
         for line in output:
             json.dump(line, f)
             f.write("\n")
+    return output[-1]['content']
 if __name__ == "__main__":
+    model = load_model()
     while True:
         print("Press Ctrl + C to exit.")
         message = input("Ask a question.")
+        print(infer(model, message))
         print("---------------------------------------")
         print("\n\n")