Christopher Capobianco commited on
Commit
27cd57b
·
1 Parent(s): 963c420

Enable llama-cpp-python to load fine tuned and quantized LLM

Browse files
Files changed (1) hide show
  1. projects/07_LLM_Fine_Tuned.py +24 -24
projects/07_LLM_Fine_Tuned.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- # from llama_cpp import Llama
3
  import re
4
 
5
  st.header('Fine Tuned LLM', divider='green')
@@ -29,36 +29,36 @@ def chat_action(prompt):
29
  st.session_state["messages"].append({"role": "user", "content": prompt})
30
  st.chat_message("user").write(prompt)
31
 
32
- # with st.spinner(f"Generating response"):
33
- # response = llm.create_chat_completion(
34
- # messages=st.session_state.messages,
35
- # temperature = 0.7,
36
- # repeat_penalty = 1.1,
37
- # stop = "[/INST]"
38
- # )
39
- # msg = response['choices'][0]['message']['content']
40
- # msg = re.sub(r'(<<|\[)*(INST|SYS)(>>|\])*', '', msg)
41
- # st.session_state["messages"].append({"role": "assistant", "content": msg})
42
- # st.chat_message("assistant").write(msg)
43
 
44
- # @st.cache_resource
45
- # def load_llm():
46
- # #### Import Model from Huggingface
47
- # llm = Llama.from_pretrained(
48
- # repo_id="ccapo/llama-3.1-8b-chat-math-teacher-GGUF",
49
- # filename="*Q4_K_M.gguf",
50
- # verbose=False,
51
- # n_ctx=2048
52
- # )
53
- # return llm
54
 
55
  for msg in st.session_state.messages:
56
  if msg["role"] != "system":
57
  with st.chat_message(name=msg["role"]):
58
  st.write(msg["content"])
59
 
60
- # with st.spinner(f"Loading LLM"):
61
- # llm = load_llm()
62
 
63
  if prompt := st.chat_input():
64
  chat_action(prompt)
 
1
  import streamlit as st
2
+ from llama_cpp import Llama
3
  import re
4
 
5
  st.header('Fine Tuned LLM', divider='green')
 
29
  st.session_state["messages"].append({"role": "user", "content": prompt})
30
  st.chat_message("user").write(prompt)
31
 
32
+ with st.spinner(f"Generating response"):
33
+ response = llm.create_chat_completion(
34
+ messages=st.session_state.messages,
35
+ temperature = 0.7,
36
+ repeat_penalty = 1.1,
37
+ stop = "[/INST]"
38
+ )
39
+ msg = response['choices'][0]['message']['content']
40
+ msg = re.sub(r'(<<|\[)*(INST|SYS)(>>|\])*', '', msg)
41
+ st.session_state["messages"].append({"role": "assistant", "content": msg})
42
+ st.chat_message("assistant").write(msg)
43
 
44
+ @st.cache_resource
45
+ def load_llm():
46
+ #### Import Model from Huggingface
47
+ llm = Llama.from_pretrained(
48
+ repo_id="ccapo/llama-3.1-8b-chat-math-teacher-GGUF",
49
+ filename="*Q4_K_M.gguf",
50
+ verbose=False,
51
+ n_ctx=2048
52
+ )
53
+ return llm
54
 
55
  for msg in st.session_state.messages:
56
  if msg["role"] != "system":
57
  with st.chat_message(name=msg["role"]):
58
  st.write(msg["content"])
59
 
60
+ with st.spinner(f"Loading LLM"):
61
+ llm = load_llm()
62
 
63
  if prompt := st.chat_input():
64
  chat_action(prompt)