Christopher Capobianco
commited on
Commit
·
27cd57b
1
Parent(s):
963c420
Enable llama-cpp-python to load fine tuned and quantized LLM
Browse files- projects/07_LLM_Fine_Tuned.py +24 -24
projects/07_LLM_Fine_Tuned.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
import re
|
4 |
|
5 |
st.header('Fine Tuned LLM', divider='green')
|
@@ -29,36 +29,36 @@ def chat_action(prompt):
|
|
29 |
st.session_state["messages"].append({"role": "user", "content": prompt})
|
30 |
st.chat_message("user").write(prompt)
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
for msg in st.session_state.messages:
|
56 |
if msg["role"] != "system":
|
57 |
with st.chat_message(name=msg["role"]):
|
58 |
st.write(msg["content"])
|
59 |
|
60 |
-
|
61 |
-
|
62 |
|
63 |
if prompt := st.chat_input():
|
64 |
chat_action(prompt)
|
|
|
1 |
import streamlit as st
|
2 |
+
from llama_cpp import Llama
|
3 |
import re
|
4 |
|
5 |
st.header('Fine Tuned LLM', divider='green')
|
|
|
29 |
st.session_state["messages"].append({"role": "user", "content": prompt})
|
30 |
st.chat_message("user").write(prompt)
|
31 |
|
32 |
+
with st.spinner(f"Generating response"):
|
33 |
+
response = llm.create_chat_completion(
|
34 |
+
messages=st.session_state.messages,
|
35 |
+
temperature = 0.7,
|
36 |
+
repeat_penalty = 1.1,
|
37 |
+
stop = "[/INST]"
|
38 |
+
)
|
39 |
+
msg = response['choices'][0]['message']['content']
|
40 |
+
msg = re.sub(r'(<<|\[)*(INST|SYS)(>>|\])*', '', msg)
|
41 |
+
st.session_state["messages"].append({"role": "assistant", "content": msg})
|
42 |
+
st.chat_message("assistant").write(msg)
|
43 |
|
44 |
+
@st.cache_resource
|
45 |
+
def load_llm():
|
46 |
+
#### Import Model from Huggingface
|
47 |
+
llm = Llama.from_pretrained(
|
48 |
+
repo_id="ccapo/llama-3.1-8b-chat-math-teacher-GGUF",
|
49 |
+
filename="*Q4_K_M.gguf",
|
50 |
+
verbose=False,
|
51 |
+
n_ctx=2048
|
52 |
+
)
|
53 |
+
return llm
|
54 |
|
55 |
for msg in st.session_state.messages:
|
56 |
if msg["role"] != "system":
|
57 |
with st.chat_message(name=msg["role"]):
|
58 |
st.write(msg["content"])
|
59 |
|
60 |
+
with st.spinner(f"Loading LLM"):
|
61 |
+
llm = load_llm()
|
62 |
|
63 |
if prompt := st.chat_input():
|
64 |
chat_action(prompt)
|