Spaces:
Runtime error
Runtime error
import streamlit as st | |
import tempfile | |
import torch | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import SentenceTransformersTokenTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings | |
from langchain.vectorstores import FAISS | |
splitter = SentenceTransformersTokenTextSplitter(model_name='dangvantuan/sentence-camembert-large', | |
chunk_size=380, | |
chunk_overlap=100 | |
) | |
embeddings_fun = HuggingFaceEmbeddings(model_name='dangvantuan/sentence-camembert-large') | |
def read_pdf(file): | |
with tempfile.NamedTemporaryFile(delete=False) as temp: | |
temp.write(file.getvalue()) | |
loader = PyPDFLoader(temp.name) | |
raw_documents = loader.load() | |
return raw_documents | |
st.title('PDF Text Extractor') | |
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") | |
query = st.text_input("Entrer une question") | |
st.text('La reponse à votre question:') | |
if uploaded_file is not None: | |
raw_documents = read_pdf(uploaded_file) | |
documents = splitter.split_documents(raw_documents) | |
# embeddings_text = embeddings_fun.embed_documents(documents) | |
faiss_db = FAISS.from_documents(documents, embeddings_fun) | |
docs = faiss_db.similarity_search(query) | |
st.write(docs[0].page_content) | |
else: | |
st.write("file not uploaded correctly") | |