medelharchaoui commited on
Commit
973762f
1 Parent(s): e198633

fix chunk_size

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -9,13 +9,11 @@ from langchain.vectorstores import FAISS
9
 
10
 
11
  splitter = SentenceTransformersTokenTextSplitter(model_name='dangvantuan/sentence-camembert-large',
12
- chunk_overlap=50
13
- )
 
14
  embeddings_fun = HuggingFaceEmbeddings(model_name='dangvantuan/sentence-camembert-large')
15
 
16
- query = st.text_input("Entrer une question")
17
-
18
- st.text('La reponse à votre question:')
19
 
20
  def read_pdf(file):
21
  with tempfile.NamedTemporaryFile(delete=False) as temp:
@@ -27,6 +25,11 @@ def read_pdf(file):
27
  st.title('PDF Text Extractor')
28
 
29
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
 
 
 
 
 
30
  if uploaded_file is not None:
31
  raw_documents = read_pdf(uploaded_file)
32
  documents = splitter.split_documents(raw_documents)
 
9
 
10
 
11
  splitter = SentenceTransformersTokenTextSplitter(model_name='dangvantuan/sentence-camembert-large',
12
+ chunk_size=380,
13
+ chunk_overlap=100
14
+ )
15
  embeddings_fun = HuggingFaceEmbeddings(model_name='dangvantuan/sentence-camembert-large')
16
 
 
 
 
17
 
18
  def read_pdf(file):
19
  with tempfile.NamedTemporaryFile(delete=False) as temp:
 
25
  st.title('PDF Text Extractor')
26
 
27
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
28
+
29
+ query = st.text_input("Entrer une question")
30
+
31
+ st.text('La reponse à votre question:')
32
+
33
  if uploaded_file is not None:
34
  raw_documents = read_pdf(uploaded_file)
35
  documents = splitter.split_documents(raw_documents)