Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.document_loaders import TextLoader | |
from langchain_core.vectorstores import InMemoryVectorStore | |
from langchain.chains import create_retrieval_chain | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
st.title("Ask Anything About KSEA members") | |
# Add the image | |
st.image("assets/LLM logo.png", | |
caption="LLM 101", | |
use_column_width=True) | |
def get_models(): | |
embeddings_model = OpenAIEmbeddings( | |
model="embed-mistral", | |
api_key=st.secrets["LITELLM_KEY"], | |
base_url="https://llm.nrp-nautilus.io" | |
) | |
llm = ChatOpenAI( | |
model="llama3", | |
api_key=st.secrets["LITELLM_KEY"], | |
base_url="https://llm.nrp-nautilus.io", | |
temperature=0 | |
) | |
return embeddings_model, llm | |
# Initialize models | |
embeddings_model, llm = get_models() | |
def initialize_retriever(file_path, chunk_size=1000, chunk_overlap=200): | |
try: | |
loader = TextLoader(file_path, encoding='utf-8') | |
document = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=chunk_size, | |
chunk_overlap=chunk_overlap, | |
length_function=len, | |
separators=["\n\n", "\n", " ", ""] | |
) | |
chunks = text_splitter.split_documents(document) | |
vectorstore = InMemoryVectorStore.from_documents( | |
documents=chunks, | |
embedding=embeddings_model | |
) | |
return vectorstore.as_retriever() | |
except Exception as e: | |
st.error(f"Error processing file: {str(e)}") | |
return None | |
def create_rag_chain(retriever): | |
system_prompt = """ | |
You are a witty assistant who helps people navigate the KSEA member directory, but with a comedic twist! Think of yourself as a sassy yearbook editor who knows all the gossip (but keeps it professional). | |
When answering questions: | |
- Maximum five sentences, keep it snappy! | |
- Must include at least one light-hearted joke or playful comment | |
- If someone asks about νμ₯ (president), just say "Julia Kim and Emily Park are the co-presidents" with a fun twist | |
- For leadership questions, only use the official position info from the top of the file | |
- If you don't know something, admit it with humor | |
- Be extra playful when describing positions/roles | |
- Keep it professional but fun | |
- make jokes about sensitive topics or personal characteristics | |
Remember: You're here to inform AND entertain! Think "Parks & Recreation's Leslie Knope meets Korean Student Association" | |
{context} | |
""" | |
prompt = ChatPromptTemplate.from_messages([ | |
("system", system_prompt), | |
("human", "{input}"), | |
]) | |
question_answer_chain = create_stuff_documents_chain(llm, prompt) | |
return create_retrieval_chain(retriever, question_answer_chain) | |
# Initialize retriever and create RAG chain | |
retriever = initialize_retriever("revised_text.txt") | |
if retriever: | |
rag_chain = create_rag_chain(retriever) | |
# Chat interface | |
if prompt := st.chat_input("Ask a question about the document:"): | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
with st.chat_message("assistant"): | |
results = rag_chain.invoke({"input": prompt}) | |
st.write(results['answer']) | |
with st.expander("See context matched"): | |
st.write(results['context'][0].page_content) | |
st.write(results['context'][0].metadata) | |