arman77mxx commited on
Commit
87ea4cf
1 Parent(s): c09131e
Files changed (1) hide show
  1. app.py +99 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Aquí tienes el código Python sin comentarios:
2
+
3
+ import streamlit as st
4
+ from langchain_community.document_loaders import PyPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_community.vectorstores import Chroma
8
+ from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ import tempfile
12
+ import os
13
+
14
+ st.set_page_config(layout="wide")
15
+
16
+ if 'chat_history' not in st.session_state:
17
+ st.session_state.chat_history = []
18
+ if 'vector_store' not in st.session_state:
19
+ st.session_state.vector_store = None
20
+ if 'selected_llm' not in st.session_state:
21
+ st.session_state.selected_llm = None
22
+ if 'api_key' not in st.session_state:
23
+ st.session_state.api_key = ""
24
+
25
+ def process_pdf(pdf_file):
26
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
27
+ tmp_file.write(pdf_file.getvalue())
28
+ tmp_file_path = tmp_file.name
29
+
30
+ loader = PyPDFLoader(tmp_file_path)
31
+ pages = loader.load_and_split()
32
+
33
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
34
+ chunks = text_splitter.split_documents(pages)
35
+
36
+ os.unlink(tmp_file_path)
37
+ return chunks
38
+
39
+ def setup_rag(chunks, selected_llm, api_key):
40
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
41
+ vector_store = Chroma.from_documents(chunks, embeddings)
42
+
43
+ if selected_llm == "Gemini 1.5 Pro":
44
+ llm = ChatGoogleGenerativeAI(model_name="gemini-1.5-pro-latest", temperature=0, google_api_key=api_key)
45
+ else:
46
+ llm = ChatOpenAI(model_name="gpt-4", temperature=0, api_key=api_key)
47
+
48
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
49
+ chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever)
50
+
51
+ return chain
52
+
53
+ col1, col2 = st.columns([7, 3])
54
+
55
+ with col2:
56
+ st.header("Configuración")
57
+
58
+ st.session_state.api_key = st.text_input("API Key", type="password", value=st.session_state.api_key)
59
+ st.session_state.selected_llm = st.selectbox("Seleccionar LLM", ["Gemini 1.5 Pro", "GPT-4"], index=0 if st.session_state.selected_llm == "Gemini 1.5 Pro" else 1)
60
+
61
+ st.header("Cargar PDFs")
62
+ uploaded_files = st.file_uploader("Selecciona los archivos PDF", accept_multiple_files=True, type=['pdf'])
63
+
64
+ if uploaded_files and st.session_state.api_key and st.session_state.selected_llm:
65
+ if st.button("Procesar PDFs"):
66
+ all_chunks = []
67
+ for pdf_file in uploaded_files:
68
+ chunks = process_pdf(pdf_file)
69
+ all_chunks.extend(chunks)
70
+
71
+ st.session_state.vector_store = setup_rag(all_chunks, st.session_state.selected_llm, st.session_state.api_key)
72
+ st.success(f"Se han procesado {len(uploaded_files)} archivos PDF.")
73
+ else:
74
+ st.warning("Por favor, asegúrate de proporcionar la API Key, seleccionar un LLM y cargar al menos un archivo PDF.")
75
+
76
+ with col1:
77
+ st.header("Chat")
78
+
79
+ for message in st.session_state.chat_history:
80
+ with st.chat_message(message["role"]):
81
+ st.markdown(message["content"])
82
+
83
+ if query := st.chat_input("Haz una pregunta sobre los documentos"):
84
+ st.session_state.chat_history.append({"role": "user", "content": query})
85
+ with st.chat_message("user"):
86
+ st.markdown(query)
87
+
88
+ if st.session_state.vector_store:
89
+ with st.chat_message("assistant"):
90
+ response = st.session_state.vector_store({"question": query, "chat_history": [(msg["role"], msg["content"]) for msg in st.session_state.chat_history]})
91
+ st.write(f"Respuesta de {st.session_state.selected_llm}:")
92
+ st.write(response['answer'])
93
+
94
+ st.session_state.chat_history.append({"role": "assistant", "content": response['answer']})
95
+ else:
96
+ with st.chat_message("assistant"):
97
+ st.write("Por favor, carga y procesa algunos archivos PDF primero.")
98
+ st.session_state.chat_history.append({"role": "assistant", "content": "Por favor, carga y procesa algunos archivos PDF primero."})
99
+