Spaces:

IIIACSIC
/

CongresoRAG

Sleeping

App Files Files Community

CongresoRAG / app.py

UlascanAkbulut

Update app.py

1ba75df over 1 year ago

raw

history blame contribute delete

5.07 kB

	# Import
	import urllib.parse
	import streamlit as st
	from RAG_public import RAG
	from congreso import congreso as c
	from langchain_core.documents import Document
	from langchain_core.messages import HumanMessage, AIMessage


	# Seperate page_content and data
	def get_pagecontent_metadata(data):
	"""
	Separetes page content and metadata of the given document

	Parameters
	---------
	data: dict
	Document that has various features such as "id", "mensaje" and "texto"...
	Returns
	-------
	pagecontent_metadata: dict
	Creates key/value pairs for page content and metadata.\n
	"texto" is used for page content, and the rest of the information is used for metadata
	"""

	# Checks if values are None
	# If, then redefine them as empty string
	# Else, returns its value, or empty string if its value not given
	for key in data.keys():
	if data[key] == None:
	data[key] = ""
	else:
	data[key] = data.get(key, "")

	search_base_url = "https://www.congreso.es"

	if data["pdf_url"] != "":
	data["pdf_url"] = search_base_url + urllib.parse.quote(data["pdf_url"])

	# Defines pagecontent and metadata information
	pagecontent_metadata = {
	"metadata": {key: data.get(key) for key in data.keys() if key != "texto"},
	"page_content" : data["texto"]}
	return pagecontent_metadata

	# Load data
	def read_data():
	"""
	Returns list of documents after reading each document. Uses get_pagecontent_metadata function
	to seperate content from metadata.

	Returns
	----------
	docs: list
	Document from langchain.schema.document inside a docs list
	"""

	# Reads Readme txt files to get information about Congreso RAG and Dataset
	with open("About_CongresoRAG/CongresoRAG-README.txt") as file:
	CongresoRAG_readme = file.read().replace("\n", "")

	with open("About_CongresoRAG/Dataset-README.txt") as file:
	Dataset_readme = file.read().replace("\n", "")

	# Put page_content and metadata of these txt file into Document format
	doc_CongresoRAG = Document(page_content=CongresoRAG_readme, metadata={"pdf_url":"https://huggingface.co/spaces/IIIACSIC/CongresoRAG/blob/main/About_CongresoRAG/CongresoRAG-README.txt"})
	doc_Dataset = Document(page_content=Dataset_readme, metadata={"pdf_url":"https://zenodo.org/records/11195944"})

	# Creates docs list to store each documents
	docs = [doc_CongresoRAG, doc_Dataset]
	terms = ["XV"]
	t = c.load_jsons(terms)
	for i in range(0, 100):
	pagecontent_metadata = get_pagecontent_metadata(t["XV"][i])
	document = Document(page_content=pagecontent_metadata["page_content"], metadata=pagecontent_metadata["metadata"])
	docs.append(document)
	return docs

	# UI (User Interface)
	def main():
	"""
	Sets page configuration and title\n
	Reads documents if it is not read yet\n
	Calls rag model if it is not called yet\n
	Creates chat history if it is not created yet\n
	Creates sidebor to display chat history\n
	Takes user query and connects to the rag model\n
	Get response from the rag model and displays it on the screen\n
	"""

	# Set page configuration
	st.set_page_config(page_title="CongresoRAG", page_icon="shark")
	st.title("CongresoRAG")
	st.markdown("<small><i style='color: grey;'>Designed by IIIA-CSIC</i></small>", unsafe_allow_html=True)

	# Read documents, if it does not read
	if "documents" not in st.session_state:
	st.session_state.documents = read_data()

	# Calls RAG model if it does not called
	if "rag" not in st.session_state:
	st.session_state.rag = RAG(document=st.session_state.documents)
	st.session_state.rag.model()

	# Create chat history to store previous question/answer
	if "chat_history" not in st.session_state:
	st.session_state.chat_history = []

	# Defines user query when it is entered by user
	user_query = st.chat_input("Message CongresoRAG")

	# Creates platform to store chat history
	for message in st.session_state.chat_history:
	if isinstance(message, HumanMessage):
	with st.chat_message("human"):
	st.markdown(message.content)
	else:
	with st.chat_message("ai"):
	st.markdown(message.content)

	# Takes user query and gets response from the rag model, and store them in chat history
	if user_query != None and user_query != "":
	st.session_state.chat_history.append(HumanMessage(user_query))

	with st.chat_message("human"):
	st.markdown(user_query)

	with st.chat_message("ai"):
	ai_response0, ai_response1 = st.session_state.rag.conversational_rag_chain(user_query)

	ai_response = ai_response0 + "\n\n" + "\n\n".join(ai_response1)
	st.session_state.chat_history.append(AIMessage(ai_response))
	st.markdown(ai_response)

	# Calls main function
	if __name__ == "__main__":
	main()