Spaces:

ccapo
/

portfolio

Sleeping

Christopher Capobianco commited on Oct 24, 2024

Commit

27cd57b

1 Parent(s): 963c420

Enable llama-cpp-python to load fine tuned and quantized LLM

Files changed (1) hide show

projects/07_LLM_Fine_Tuned.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import streamlit as st
-# from llama_cpp import Llama
 import re
 st.header('Fine Tuned LLM', divider='green')
@@ -29,36 +29,36 @@ def chat_action(prompt):
     st.session_state["messages"].append({"role": "user", "content": prompt})
     st.chat_message("user").write(prompt)
-    # with st.spinner(f"Generating response"):
-    #     response = llm.create_chat_completion(
-    #         messages=st.session_state.messages,
-    #         temperature = 0.7,
-    #         repeat_penalty = 1.1,
-    #         stop = "[/INST]"
-    #     )
-    #     msg = response['choices'][0]['message']['content']
-    #     msg = re.sub(r'(<<|\[)*(INST|SYS)(>>|\])*', '', msg)
-    #     st.session_state["messages"].append({"role": "assistant", "content": msg})
-    #     st.chat_message("assistant").write(msg)
-# @st.cache_resource
-# def load_llm():
-#     #### Import Model from Huggingface
-#     llm = Llama.from_pretrained(
-#         repo_id="ccapo/llama-3.1-8b-chat-math-teacher-GGUF",
-#         filename="*Q4_K_M.gguf",
-#         verbose=False,
-#         n_ctx=2048
-#     )
-#     return llm
 for msg in st.session_state.messages:
     if msg["role"] != "system":
         with st.chat_message(name=msg["role"]):
             st.write(msg["content"])
-# with st.spinner(f"Loading LLM"):
-#     llm = load_llm()
 if prompt := st.chat_input():
     chat_action(prompt)

 import streamlit as st
+from llama_cpp import Llama
 import re
 st.header('Fine Tuned LLM', divider='green')
     st.session_state["messages"].append({"role": "user", "content": prompt})
     st.chat_message("user").write(prompt)
+    with st.spinner(f"Generating response"):
+        response = llm.create_chat_completion(
+            messages=st.session_state.messages,
+            temperature = 0.7,
+            repeat_penalty = 1.1,
+            stop = "[/INST]"
+        )
+        msg = response['choices'][0]['message']['content']
+        msg = re.sub(r'(<<|\[)*(INST|SYS)(>>|\])*', '', msg)
+        st.session_state["messages"].append({"role": "assistant", "content": msg})
+        st.chat_message("assistant").write(msg)
+@st.cache_resource
+def load_llm():
+    #### Import Model from Huggingface
+    llm = Llama.from_pretrained(
+        repo_id="ccapo/llama-3.1-8b-chat-math-teacher-GGUF",
+        filename="*Q4_K_M.gguf",
+        verbose=False,
+        n_ctx=2048
+    )
+    return llm
 for msg in st.session_state.messages:
     if msg["role"] != "system":
         with st.chat_message(name=msg["role"]):
             st.write(msg["content"])
+with st.spinner(f"Loading LLM"):
+    llm = load_llm()
 if prompt := st.chat_input():
     chat_action(prompt)