CongresoRAG / app.py
UlascanAkbulut's picture
Update app.py
1ba75df
# Import
import urllib.parse
import streamlit as st
from RAG_public import RAG
from congreso import congreso as c
from langchain_core.documents import Document
from langchain_core.messages import HumanMessage, AIMessage
# Seperate page_content and data
def get_pagecontent_metadata(data):
"""
Separetes page content and metadata of the given document
Parameters
---------
data: dict
Document that has various features such as "id", "mensaje" and "texto"...
Returns
-------
pagecontent_metadata: dict
Creates key/value pairs for page content and metadata.\n
"texto" is used for page content, and the rest of the information is used for metadata
"""
# Checks if values are None
# If, then redefine them as empty string
# Else, returns its value, or empty string if its value not given
for key in data.keys():
if data[key] == None:
data[key] = ""
else:
data[key] = data.get(key, "")
search_base_url = "https://www.congreso.es"
if data["pdf_url"] != "":
data["pdf_url"] = search_base_url + urllib.parse.quote(data["pdf_url"])
# Defines pagecontent and metadata information
pagecontent_metadata = {
"metadata": {key: data.get(key) for key in data.keys() if key != "texto"},
"page_content" : data["texto"]}
return pagecontent_metadata
# Load data
def read_data():
"""
Returns list of documents after reading each document. Uses get_pagecontent_metadata function
to seperate content from metadata.
Returns
----------
docs: list
Document from langchain.schema.document inside a docs list
"""
# Reads Readme txt files to get information about Congreso RAG and Dataset
with open("About_CongresoRAG/CongresoRAG-README.txt") as file:
CongresoRAG_readme = file.read().replace("\n", "")
with open("About_CongresoRAG/Dataset-README.txt") as file:
Dataset_readme = file.read().replace("\n", "")
# Put page_content and metadata of these txt file into Document format
doc_CongresoRAG = Document(page_content=CongresoRAG_readme, metadata={"pdf_url":"https://huggingface.co/spaces/IIIACSIC/CongresoRAG/blob/main/About_CongresoRAG/CongresoRAG-README.txt"})
doc_Dataset = Document(page_content=Dataset_readme, metadata={"pdf_url":"https://zenodo.org/records/11195944"})
# Creates docs list to store each documents
docs = [doc_CongresoRAG, doc_Dataset]
terms = ["XV"]
t = c.load_jsons(terms)
for i in range(0, 100):
pagecontent_metadata = get_pagecontent_metadata(t["XV"][i])
document = Document(page_content=pagecontent_metadata["page_content"], metadata=pagecontent_metadata["metadata"])
docs.append(document)
return docs
# UI (User Interface)
def main():
"""
Sets page configuration and title\n
Reads documents if it is not read yet\n
Calls rag model if it is not called yet\n
Creates chat history if it is not created yet\n
Creates sidebor to display chat history\n
Takes user query and connects to the rag model\n
Get response from the rag model and displays it on the screen\n
"""
# Set page configuration
st.set_page_config(page_title="CongresoRAG", page_icon="shark")
st.title("CongresoRAG")
st.markdown("<small><i style='color: grey;'>Designed by IIIA-CSIC</i></small>", unsafe_allow_html=True)
# Read documents, if it does not read
if "documents" not in st.session_state:
st.session_state.documents = read_data()
# Calls RAG model if it does not called
if "rag" not in st.session_state:
st.session_state.rag = RAG(document=st.session_state.documents)
st.session_state.rag.model()
# Create chat history to store previous question/answer
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Defines user query when it is entered by user
user_query = st.chat_input("Message CongresoRAG")
# Creates platform to store chat history
for message in st.session_state.chat_history:
if isinstance(message, HumanMessage):
with st.chat_message("human"):
st.markdown(message.content)
else:
with st.chat_message("ai"):
st.markdown(message.content)
# Takes user query and gets response from the rag model, and store them in chat history
if user_query != None and user_query != "":
st.session_state.chat_history.append(HumanMessage(user_query))
with st.chat_message("human"):
st.markdown(user_query)
with st.chat_message("ai"):
ai_response0, ai_response1 = st.session_state.rag.conversational_rag_chain(user_query)
ai_response = ai_response0 + "\n\n" + "\n\n".join(ai_response1)
st.session_state.chat_history.append(AIMessage(ai_response))
st.markdown(ai_response)
# Calls main function
if __name__ == "__main__":
main()