Spaces:

medelharchaoui
/

SemanticSearch

Runtime error

File size: 1,253 Bytes

46d9645
fd18c44
0f14a07
26ba5f9
 
 
 
 
 
 
46d9645
 
fd18c44
 
 
 
 
46d9645
 
 
 
 
f9b3ca4
26ba5f9

import streamlit as st
import tempfile
import torch
from langchain.document_loaders import PyPDFLoader

from langchain.text_splitter import SentenceTransformersTokenTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS



def read_pdf(file):
    with tempfile.NamedTemporaryFile(delete=False) as temp:
        temp.write(file.getvalue())
        loader = PyPDFLoader(temp.name)
        raw_documents = loader.load()
        return raw_documents

st.title('PDF Text Extractor')

uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
    raw_documents = read_pdf(uploaded_file)


splitter = SentenceTransformersTokenTextSplitter(model_name='dangvantuan/sentence-camembert-large',
chunk_overlap=50
)

documents = splitter.split_documents(raw_documents)


embeddings_fun = HuggingFaceEmbeddings(model_name='dangvantuan/sentence-camembert-large')

# embeddings_text = embeddings_fun.embed_documents(documents)

faiss_db = FAISS.from_documents(documents, embeddings_fun)

query = st.text_input("Entrer une question")
docs = faiss_db.similarity_search(query)

st.text('La reponse à votre question:')
st.write(docs[0].page_content)