Spaces:

medelharchaoui
/

SemanticSearch

Runtime error

medelharchaoui commited on Jun 26, 2023

Commit

973762f

•

1 Parent(s): e198633

fix chunk_size

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,13 +9,11 @@ from langchain.vectorstores import FAISS
 splitter = SentenceTransformersTokenTextSplitter(model_name='dangvantuan/sentence-camembert-large',
-    chunk_overlap=50
-    )
 embeddings_fun = HuggingFaceEmbeddings(model_name='dangvantuan/sentence-camembert-large')
-query = st.text_input("Entrer une question")
-st.text('La reponse à votre question:')
 def read_pdf(file):
     with tempfile.NamedTemporaryFile(delete=False) as temp:
@@ -27,6 +25,11 @@ def read_pdf(file):
 st.title('PDF Text Extractor')
 uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
 if uploaded_file is not None:
     raw_documents = read_pdf(uploaded_file)
     documents = splitter.split_documents(raw_documents)

 splitter = SentenceTransformersTokenTextSplitter(model_name='dangvantuan/sentence-camembert-large',
+                                                 chunk_size=380,
+                                                 chunk_overlap=100
+                                                )
 embeddings_fun = HuggingFaceEmbeddings(model_name='dangvantuan/sentence-camembert-large')
 def read_pdf(file):
     with tempfile.NamedTemporaryFile(delete=False) as temp:
 st.title('PDF Text Extractor')
 uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
+query = st.text_input("Entrer une question")
+st.text('La reponse à votre question:')
 if uploaded_file is not None:
     raw_documents = read_pdf(uploaded_file)
     documents = splitter.split_documents(raw_documents)