Spaces:
Build error
Build error
Aqu铆 tienes el c贸digo Python sin comentarios: | |
import streamlit as st | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import Chroma | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_openai import ChatOpenAI | |
from langchain.chains import ConversationalRetrievalChain | |
import tempfile | |
import os | |
st.set_page_config(layout="wide") | |
if 'chat_history' not in st.session_state: | |
st.session_state.chat_history = [] | |
if 'vector_store' not in st.session_state: | |
st.session_state.vector_store = None | |
if 'selected_llm' not in st.session_state: | |
st.session_state.selected_llm = None | |
if 'api_key' not in st.session_state: | |
st.session_state.api_key = "" | |
def process_pdf(pdf_file): | |
with tempfile.NamedTemporaryFile(delete=False) as tmp_file: | |
tmp_file.write(pdf_file.getvalue()) | |
tmp_file_path = tmp_file.name | |
loader = PyPDFLoader(tmp_file_path) | |
pages = loader.load_and_split() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
chunks = text_splitter.split_documents(pages) | |
os.unlink(tmp_file_path) | |
return chunks | |
def setup_rag(chunks, selected_llm, api_key): | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
vector_store = Chroma.from_documents(chunks, embeddings) | |
if selected_llm == "Gemini 1.5 Pro": | |
llm = ChatGoogleGenerativeAI(model_name="gemini-1.5-pro-latest", temperature=0, google_api_key=api_key) | |
else: | |
llm = ChatOpenAI(model_name="gpt-4", temperature=0, api_key=api_key) | |
retriever = vector_store.as_retriever(search_kwargs={"k": 3}) | |
chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever) | |
return chain | |
col1, col2 = st.columns([7, 3]) | |
with col2: | |
st.header("Configuraci贸n") | |
st.session_state.api_key = st.text_input("API Key", type="password", value=st.session_state.api_key) | |
st.session_state.selected_llm = st.selectbox("Seleccionar LLM", ["Gemini 1.5 Pro", "GPT-4"], index=0 if st.session_state.selected_llm == "Gemini 1.5 Pro" else 1) | |
st.header("Cargar PDFs") | |
uploaded_files = st.file_uploader("Selecciona los archivos PDF", accept_multiple_files=True, type=['pdf']) | |
if uploaded_files and st.session_state.api_key and st.session_state.selected_llm: | |
if st.button("Procesar PDFs"): | |
all_chunks = [] | |
for pdf_file in uploaded_files: | |
chunks = process_pdf(pdf_file) | |
all_chunks.extend(chunks) | |
st.session_state.vector_store = setup_rag(all_chunks, st.session_state.selected_llm, st.session_state.api_key) | |
st.success(f"Se han procesado {len(uploaded_files)} archivos PDF.") | |
else: | |
st.warning("Por favor, aseg煤rate de proporcionar la API Key, seleccionar un LLM y cargar al menos un archivo PDF.") | |
with col1: | |
st.header("Chat") | |
for message in st.session_state.chat_history: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
if query := st.chat_input("Haz una pregunta sobre los documentos"): | |
st.session_state.chat_history.append({"role": "user", "content": query}) | |
with st.chat_message("user"): | |
st.markdown(query) | |
if st.session_state.vector_store: | |
with st.chat_message("assistant"): | |
response = st.session_state.vector_store({"question": query, "chat_history": [(msg["role"], msg["content"]) for msg in st.session_state.chat_history]}) | |
st.write(f"Respuesta de {st.session_state.selected_llm}:") | |
st.write(response['answer']) | |
st.session_state.chat_history.append({"role": "assistant", "content": response['answer']}) | |
else: | |
with st.chat_message("assistant"): | |
st.write("Por favor, carga y procesa algunos archivos PDF primero.") | |
st.session_state.chat_history.append({"role": "assistant", "content": "Por favor, carga y procesa algunos archivos PDF primero."}) | |