Spaces:
Sleeping
Sleeping
| # Import | |
| import urllib.parse | |
| import streamlit as st | |
| from RAG_public import RAG | |
| from congreso import congreso as c | |
| from langchain_core.documents import Document | |
| from langchain_core.messages import HumanMessage, AIMessage | |
| # Seperate page_content and data | |
| def get_pagecontent_metadata(data): | |
| """ | |
| Separetes page content and metadata of the given document | |
| Parameters | |
| --------- | |
| data: dict | |
| Document that has various features such as "id", "mensaje" and "texto"... | |
| Returns | |
| ------- | |
| pagecontent_metadata: dict | |
| Creates key/value pairs for page content and metadata.\n | |
| "texto" is used for page content, and the rest of the information is used for metadata | |
| """ | |
| # Checks if values are None | |
| # If, then redefine them as empty string | |
| # Else, returns its value, or empty string if its value not given | |
| for key in data.keys(): | |
| if data[key] == None: | |
| data[key] = "" | |
| else: | |
| data[key] = data.get(key, "") | |
| search_base_url = "https://www.congreso.es" | |
| if data["pdf_url"] != "": | |
| data["pdf_url"] = search_base_url + urllib.parse.quote(data["pdf_url"]) | |
| # Defines pagecontent and metadata information | |
| pagecontent_metadata = { | |
| "metadata": {key: data.get(key) for key in data.keys() if key != "texto"}, | |
| "page_content" : data["texto"]} | |
| return pagecontent_metadata | |
| # Load data | |
| def read_data(): | |
| """ | |
| Returns list of documents after reading each document. Uses get_pagecontent_metadata function | |
| to seperate content from metadata. | |
| Returns | |
| ---------- | |
| docs: list | |
| Document from langchain.schema.document inside a docs list | |
| """ | |
| # Reads Readme txt files to get information about Congreso RAG and Dataset | |
| with open("About_CongresoRAG/CongresoRAG-README.txt") as file: | |
| CongresoRAG_readme = file.read().replace("\n", "") | |
| with open("About_CongresoRAG/Dataset-README.txt") as file: | |
| Dataset_readme = file.read().replace("\n", "") | |
| # Put page_content and metadata of these txt file into Document format | |
| doc_CongresoRAG = Document(page_content=CongresoRAG_readme, metadata={"pdf_url":"https://huggingface.co/spaces/IIIACSIC/CongresoRAG/blob/main/About_CongresoRAG/CongresoRAG-README.txt"}) | |
| doc_Dataset = Document(page_content=Dataset_readme, metadata={"pdf_url":"https://zenodo.org/records/11195944"}) | |
| # Creates docs list to store each documents | |
| docs = [doc_CongresoRAG, doc_Dataset] | |
| terms = ["XV"] | |
| t = c.load_jsons(terms) | |
| for i in range(0, 100): | |
| pagecontent_metadata = get_pagecontent_metadata(t["XV"][i]) | |
| document = Document(page_content=pagecontent_metadata["page_content"], metadata=pagecontent_metadata["metadata"]) | |
| docs.append(document) | |
| return docs | |
| # UI (User Interface) | |
| def main(): | |
| """ | |
| Sets page configuration and title\n | |
| Reads documents if it is not read yet\n | |
| Calls rag model if it is not called yet\n | |
| Creates chat history if it is not created yet\n | |
| Creates sidebor to display chat history\n | |
| Takes user query and connects to the rag model\n | |
| Get response from the rag model and displays it on the screen\n | |
| """ | |
| # Set page configuration | |
| st.set_page_config(page_title="CongresoRAG", page_icon="shark") | |
| st.title("CongresoRAG") | |
| st.markdown("<small><i style='color: grey;'>Designed by IIIA-CSIC</i></small>", unsafe_allow_html=True) | |
| # Read documents, if it does not read | |
| if "documents" not in st.session_state: | |
| st.session_state.documents = read_data() | |
| # Calls RAG model if it does not called | |
| if "rag" not in st.session_state: | |
| st.session_state.rag = RAG(document=st.session_state.documents) | |
| st.session_state.rag.model() | |
| # Create chat history to store previous question/answer | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [] | |
| # Defines user query when it is entered by user | |
| user_query = st.chat_input("Message CongresoRAG") | |
| # Creates platform to store chat history | |
| for message in st.session_state.chat_history: | |
| if isinstance(message, HumanMessage): | |
| with st.chat_message("human"): | |
| st.markdown(message.content) | |
| else: | |
| with st.chat_message("ai"): | |
| st.markdown(message.content) | |
| # Takes user query and gets response from the rag model, and store them in chat history | |
| if user_query != None and user_query != "": | |
| st.session_state.chat_history.append(HumanMessage(user_query)) | |
| with st.chat_message("human"): | |
| st.markdown(user_query) | |
| with st.chat_message("ai"): | |
| ai_response0, ai_response1 = st.session_state.rag.conversational_rag_chain(user_query) | |
| ai_response = ai_response0 + "\n\n" + "\n\n".join(ai_response1) | |
| st.session_state.chat_history.append(AIMessage(ai_response)) | |
| st.markdown(ai_response) | |
| # Calls main function | |
| if __name__ == "__main__": | |
| main() | |