File size: 4,255 Bytes
87ea4cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
Aquí tienes el código Python sin comentarios:

import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
import tempfile
import os

st.set_page_config(layout="wide")

if 'chat_history' not in st.session_state:
    st.session_state.chat_history = []
if 'vector_store' not in st.session_state:
    st.session_state.vector_store = None
if 'selected_llm' not in st.session_state:
    st.session_state.selected_llm = None
if 'api_key' not in st.session_state:
    st.session_state.api_key = ""

def process_pdf(pdf_file):
    with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
        tmp_file.write(pdf_file.getvalue())
        tmp_file_path = tmp_file.name

    loader = PyPDFLoader(tmp_file_path)
    pages = loader.load_and_split()
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(pages)
    
    os.unlink(tmp_file_path)
    return chunks

def setup_rag(chunks, selected_llm, api_key):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = Chroma.from_documents(chunks, embeddings)
    
    if selected_llm == "Gemini 1.5 Pro":
        llm = ChatGoogleGenerativeAI(model_name="gemini-1.5-pro-latest", temperature=0, google_api_key=api_key)
    else:
        llm = ChatOpenAI(model_name="gpt-4", temperature=0, api_key=api_key)
    
    retriever = vector_store.as_retriever(search_kwargs={"k": 3})
    chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever)
    
    return chain

col1, col2 = st.columns([7, 3])

with col2:
    st.header("Configuración")
    
    st.session_state.api_key = st.text_input("API Key", type="password", value=st.session_state.api_key)
    st.session_state.selected_llm = st.selectbox("Seleccionar LLM", ["Gemini 1.5 Pro", "GPT-4"], index=0 if st.session_state.selected_llm == "Gemini 1.5 Pro" else 1)
    
    st.header("Cargar PDFs")
    uploaded_files = st.file_uploader("Selecciona los archivos PDF", accept_multiple_files=True, type=['pdf'])
    
    if uploaded_files and st.session_state.api_key and st.session_state.selected_llm:
        if st.button("Procesar PDFs"):
            all_chunks = []
            for pdf_file in uploaded_files:
                chunks = process_pdf(pdf_file)
                all_chunks.extend(chunks)
            
            st.session_state.vector_store = setup_rag(all_chunks, st.session_state.selected_llm, st.session_state.api_key)
            st.success(f"Se han procesado {len(uploaded_files)} archivos PDF.")
    else:
        st.warning("Por favor, asegúrate de proporcionar la API Key, seleccionar un LLM y cargar al menos un archivo PDF.")

with col1:
    st.header("Chat")
    
    for message in st.session_state.chat_history:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])
    
    if query := st.chat_input("Haz una pregunta sobre los documentos"):
        st.session_state.chat_history.append({"role": "user", "content": query})
        with st.chat_message("user"):
            st.markdown(query)
        
        if st.session_state.vector_store:
            with st.chat_message("assistant"):
                response = st.session_state.vector_store({"question": query, "chat_history": [(msg["role"], msg["content"]) for msg in st.session_state.chat_history]})
                st.write(f"Respuesta de {st.session_state.selected_llm}:")
                st.write(response['answer'])
            
            st.session_state.chat_history.append({"role": "assistant", "content": response['answer']})
        else:
            with st.chat_message("assistant"):
                st.write("Por favor, carga y procesa algunos archivos PDF primero.")
            st.session_state.chat_history.append({"role": "assistant", "content": "Por favor, carga y procesa algunos archivos PDF primero."})