lalanikarim commited on
Commit
5f3a384
1 Parent(s): 784a3bb

rearranged code. added inline comments.

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. main.py +96 -29
.gitignore CHANGED
@@ -1,3 +1,4 @@
 
1
  # Byte-compiled / optimized / DLL files
2
  __pycache__/
3
  *.py[cod]
 
1
+ models/**
2
  # Byte-compiled / optimized / DLL files
3
  __pycache__/
4
  *.py[cod]
main.py CHANGED
@@ -3,56 +3,123 @@ from langchain.llms import LlamaCpp
3
  from langchain.prompts import PromptTemplate
4
  from langchain.chains import LLMChain
5
  from langchain.callbacks.manager import CallbackManager
6
- from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
7
-
8
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def main():
 
 
10
  st.set_page_config(
11
  page_title="Your own Chat!"
12
  )
 
 
13
  st.header("Your own Chat!")
14
 
 
 
 
 
 
 
 
 
 
 
 
15
  if "messages" not in st.session_state:
16
- st.session_state.messages = []
 
 
17
 
 
 
18
  for message in st.session_state.messages:
19
  with st.chat_message(message["role"]):
20
  st.markdown(message["content"])
21
 
22
- callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
23
-
24
- llm = LlamaCpp(
25
- model_path="mistral-7b-instruct-v0.1.Q4_0.gguf",
26
- temperature=0,
27
- max_tokens=512,
28
- top_p=1,
29
- callback_manager=callback_manager,
30
- verbose=True,
31
- )
32
 
33
- template = """
34
- You are a funny AI bot who answers questions in a couple of lines.
35
-
36
- {question}
37
- """
38
-
39
- prompt = PromptTemplate(template=template, input_variables=["question"])
40
-
41
- llm_chain = LLMChain(prompt=prompt, llm=llm)
42
-
43
- if prompt := st.chat_input("Your message here", key="user_input"):
44
  st.session_state.messages.append(
45
- {"role": "user", "content": prompt}
46
  )
47
 
 
48
  with st.chat_message("user"):
49
- st.markdown(prompt)
50
-
51
- response = llm_chain.run(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  st.session_state.messages.append(
53
  {"role": "assistant", "content": response}
54
  )
55
 
 
56
  with st.chat_message("assistant"):
57
  st.markdown(response)
58
 
 
3
  from langchain.prompts import PromptTemplate
4
  from langchain.chains import LLMChain
5
  from langchain.callbacks.manager import CallbackManager
6
+ from langchain.callbacks.base import BaseCallbackHandler
7
+
8
+
9
+ # StreamHandler to intercept streaming output from the LLM.
10
+ # This makes it appear that the Language Model is "typing"
11
+ # in realtime.
12
+ class StreamHandler(BaseCallbackHandler):
13
+ def __init__(self, container, initial_text=""):
14
+ self.container = container
15
+ self.text = initial_text
16
+
17
+ def on_llm_new_token(self, token: str, **kwargs) -> None:
18
+ self.text += token
19
+ self.container.markdown(self.text)
20
+
21
+
22
+ # The main loop of the Streamlit Application. This is not a typical main()
23
+ # function. Streamlit runs this code in its entirety everytime any inputs
24
+ # change on the webpage.
25
+ #
26
+ # P.S.: Initializing LLM and Langchain inside here seem counterproductive.
27
+ # Hopefully there is a better prescribed way to initialize and manage expensive
28
+ # resources and reference them within here. But for the sake of example, let's
29
+ # not worry about that now.
30
  def main():
31
+
32
+ # Set the webpage title
33
  st.set_page_config(
34
  page_title="Your own Chat!"
35
  )
36
+
37
+ # Create a header element
38
  st.header("Your own Chat!")
39
 
40
+ # This sets the LLM's personality.
41
+ # The initial personality privided is basic.
42
+ # Try something interesting and notice how the LLM responses are affected.
43
+ system_prompt = st.text_area(
44
+ label="System Prompt",
45
+ value="You are a helpful AI assistant who answers questions in short sentences.",
46
+ key="system_prompt")
47
+
48
+ # We store the conversation in the session state.
49
+ # This will be used to render the chat conversation.
50
+ # We initialize it with the first message we want to be greeted with.
51
  if "messages" not in st.session_state:
52
+ st.session_state.messages = [
53
+ {"role": "assistant", "content": "How may I help you today?"}
54
+ ]
55
 
56
+ # We loop through each message in the session state and render it as
57
+ # a chat message.
58
  for message in st.session_state.messages:
59
  with st.chat_message(message["role"]):
60
  st.markdown(message["content"])
61
 
62
+ # We take questions/instructions from the chat input to pass to the LLM
63
+ if user_prompt := st.chat_input("Your message here", key="user_input"):
 
 
 
 
 
 
 
 
64
 
65
+ # Add our input to the session state
 
 
 
 
 
 
 
 
 
 
66
  st.session_state.messages.append(
67
+ {"role": "user", "content": user_prompt}
68
  )
69
 
70
+ # Add our input to the chat window
71
  with st.chat_message("user"):
72
+ st.markdown(user_prompt)
73
+ # A stream handler to direct streaming output on the chat screen.
74
+ # This will need to be handled somewhat differently.
75
+ # But it demonstrates what potential it carries.
76
+ stream_handler = StreamHandler(st.empty())
77
+
78
+ # Callback manager is a way to intercept streaming output from the
79
+ # LLM and take some action on it. Here we are giving it our custom
80
+ # stream handler to make it appear as if the LLM is typing the
81
+ # responses in real time.
82
+ callback_manager = CallbackManager([stream_handler])
83
+
84
+ # We initialize the quantized LLM from a local path.
85
+ # Currently most parameters are fixed but we can make them
86
+ # configurable.
87
+ llm = LlamaCpp(
88
+ model_path="models/mistral-7b-instruct-v0.1.Q4_0.gguf",
89
+ temperature=0,
90
+ max_tokens=512,
91
+ top_p=1,
92
+ callback_manager=callback_manager,
93
+ verbose=True,
94
+ )
95
+
96
+ # Template for the prompt. I am still trying to figure out what exactly
97
+ # is needed here and if we need to have parameters etc. This may
98
+ # ultimately be dictated by the model you use.
99
+ template = """
100
+ {}
101
+
102
+ {}
103
+ """.format(system_prompt, "{question}")
104
+
105
+ # We create a prompt from the template so we can use it with langchain
106
+ prompt = PromptTemplate(template=template, input_variables=["question"])
107
+
108
+ # We create an llm chain with our llm and prompt
109
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
110
+
111
+ # Pass our input to the llm chain and capture the final responses.
112
+ # It is worth noting that the Stream Handler is already receiving the
113
+ # streaming response as the llm is generating. We get our response
114
+ # here once the llm has finished generating the complete response.
115
+ response = llm_chain.run(user_prompt)
116
+
117
+ # Add the response to the session state
118
  st.session_state.messages.append(
119
  {"role": "assistant", "content": response}
120
  )
121
 
122
+ # Add the response to the chat window
123
  with st.chat_message("assistant"):
124
  st.markdown(response)
125