Spaces:

Doogie-Kim
/

LLM101

Sleeping

App Files Files Community

LLM101 / streamlit_app.py

Doogie-Kim

Update streamlit_app.py

a7788f7 verified 7 months ago

raw

history blame contribute delete

3.72 kB

	import streamlit as st
	from langchain_openai import ChatOpenAI, OpenAIEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import TextLoader
	from langchain_core.vectorstores import InMemoryVectorStore
	from langchain.chains import create_retrieval_chain
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain_core.prompts import ChatPromptTemplate

	st.title("Ask Anything About KSEA members")

	# Add the image
	st.image("assets/LLM logo.png",
	caption="LLM 101",
	use_column_width=True)

	@st.cache_resource
	def get_models():
	embeddings_model = OpenAIEmbeddings(
	model="embed-mistral",
	api_key=st.secrets["LITELLM_KEY"],
	base_url="https://llm.nrp-nautilus.io"
	)

	llm = ChatOpenAI(
	model="llama3",
	api_key=st.secrets["LITELLM_KEY"],
	base_url="https://llm.nrp-nautilus.io",
	temperature=0
	)

	return embeddings_model, llm

	# Initialize models
	embeddings_model, llm = get_models()

	@st.cache_resource
	def initialize_retriever(file_path, chunk_size=1000, chunk_overlap=200):
	try:
	loader = TextLoader(file_path, encoding='utf-8')
	document = loader.load()

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=chunk_size,
	chunk_overlap=chunk_overlap,
	length_function=len,
	separators=["\n\n", "\n", " ", ""]
	)

	chunks = text_splitter.split_documents(document)

	vectorstore = InMemoryVectorStore.from_documents(
	documents=chunks,
	embedding=embeddings_model
	)

	return vectorstore.as_retriever()

	except Exception as e:
	st.error(f"Error processing file: {str(e)}")
	return None

	def create_rag_chain(retriever):
	system_prompt = """
	You are a witty assistant who helps people navigate the KSEA member directory, but with a comedic twist! Think of yourself as a sassy yearbook editor who knows all the gossip (but keeps it professional).

	When answering questions:
	- Maximum five sentences, keep it snappy!
	- Must include at least one light-hearted joke or playful comment
	- If someone asks about 회장 (president), just say "Julia Kim and Emily Park are the co-presidents" with a fun twist
	- For leadership questions, only use the official position info from the top of the file
	- If you don't know something, admit it with humor
	- Be extra playful when describing positions/roles
	- Keep it professional but fun
	- make jokes about sensitive topics or personal characteristics

	Remember: You're here to inform AND entertain! Think "Parks & Recreation's Leslie Knope meets Korean Student Association"

	{context}
	"""

	prompt = ChatPromptTemplate.from_messages([
	("system", system_prompt),
	("human", "{input}"),
	])

	question_answer_chain = create_stuff_documents_chain(llm, prompt)
	return create_retrieval_chain(retriever, question_answer_chain)

	# Initialize retriever and create RAG chain
	retriever = initialize_retriever("revised_text.txt")
	if retriever:
	rag_chain = create_rag_chain(retriever)

	# Chat interface
	if prompt := st.chat_input("Ask a question about the document:"):
	with st.chat_message("user"):
	st.markdown(prompt)
	with st.chat_message("assistant"):
	results = rag_chain.invoke({"input": prompt})
	st.write(results['answer'])
	with st.expander("See context matched"):
	st.write(results['context'][0].page_content)
	st.write(results['context'][0].metadata)