arman77mxx's picture
Inicio
87ea4cf verified
Aqu铆 tienes el c贸digo Python sin comentarios:
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
import tempfile
import os
st.set_page_config(layout="wide")
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
if 'vector_store' not in st.session_state:
st.session_state.vector_store = None
if 'selected_llm' not in st.session_state:
st.session_state.selected_llm = None
if 'api_key' not in st.session_state:
st.session_state.api_key = ""
def process_pdf(pdf_file):
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(pdf_file.getvalue())
tmp_file_path = tmp_file.name
loader = PyPDFLoader(tmp_file_path)
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = text_splitter.split_documents(pages)
os.unlink(tmp_file_path)
return chunks
def setup_rag(chunks, selected_llm, api_key):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_store = Chroma.from_documents(chunks, embeddings)
if selected_llm == "Gemini 1.5 Pro":
llm = ChatGoogleGenerativeAI(model_name="gemini-1.5-pro-latest", temperature=0, google_api_key=api_key)
else:
llm = ChatOpenAI(model_name="gpt-4", temperature=0, api_key=api_key)
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever)
return chain
col1, col2 = st.columns([7, 3])
with col2:
st.header("Configuraci贸n")
st.session_state.api_key = st.text_input("API Key", type="password", value=st.session_state.api_key)
st.session_state.selected_llm = st.selectbox("Seleccionar LLM", ["Gemini 1.5 Pro", "GPT-4"], index=0 if st.session_state.selected_llm == "Gemini 1.5 Pro" else 1)
st.header("Cargar PDFs")
uploaded_files = st.file_uploader("Selecciona los archivos PDF", accept_multiple_files=True, type=['pdf'])
if uploaded_files and st.session_state.api_key and st.session_state.selected_llm:
if st.button("Procesar PDFs"):
all_chunks = []
for pdf_file in uploaded_files:
chunks = process_pdf(pdf_file)
all_chunks.extend(chunks)
st.session_state.vector_store = setup_rag(all_chunks, st.session_state.selected_llm, st.session_state.api_key)
st.success(f"Se han procesado {len(uploaded_files)} archivos PDF.")
else:
st.warning("Por favor, aseg煤rate de proporcionar la API Key, seleccionar un LLM y cargar al menos un archivo PDF.")
with col1:
st.header("Chat")
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if query := st.chat_input("Haz una pregunta sobre los documentos"):
st.session_state.chat_history.append({"role": "user", "content": query})
with st.chat_message("user"):
st.markdown(query)
if st.session_state.vector_store:
with st.chat_message("assistant"):
response = st.session_state.vector_store({"question": query, "chat_history": [(msg["role"], msg["content"]) for msg in st.session_state.chat_history]})
st.write(f"Respuesta de {st.session_state.selected_llm}:")
st.write(response['answer'])
st.session_state.chat_history.append({"role": "assistant", "content": response['answer']})
else:
with st.chat_message("assistant"):
st.write("Por favor, carga y procesa algunos archivos PDF primero.")
st.session_state.chat_history.append({"role": "assistant", "content": "Por favor, carga y procesa algunos archivos PDF primero."})