medelharchaoui commited on
Commit
e198633
1 Parent(s): 20dfb46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -8,6 +8,14 @@ from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbed
8
  from langchain.vectorstores import FAISS
9
 
10
 
 
 
 
 
 
 
 
 
11
 
12
  def read_pdf(file):
13
  with tempfile.NamedTemporaryFile(delete=False) as temp:
@@ -20,24 +28,15 @@ st.title('PDF Text Extractor')
20
 
21
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
22
  if uploaded_file is not None:
23
- raw_documents = read_pdf(uploaded_file)
24
-
25
-
26
- splitter = SentenceTransformersTokenTextSplitter(model_name='dangvantuan/sentence-camembert-large',
27
- chunk_overlap=50
28
- )
29
-
30
- documents = splitter.split_documents(raw_documents)
31
-
32
-
33
- embeddings_fun = HuggingFaceEmbeddings(model_name='dangvantuan/sentence-camembert-large')
34
-
35
- # embeddings_text = embeddings_fun.embed_documents(documents)
36
-
37
- faiss_db = FAISS.from_documents(documents, embeddings_fun)
38
-
39
- query = st.text_input("Entrer une question")
40
- docs = faiss_db.similarity_search(query)
41
-
42
- st.text('La reponse à votre question:')
43
- st.write(docs[0].page_content)
 
8
  from langchain.vectorstores import FAISS
9
 
10
 
11
+ splitter = SentenceTransformersTokenTextSplitter(model_name='dangvantuan/sentence-camembert-large',
12
+ chunk_overlap=50
13
+ )
14
+ embeddings_fun = HuggingFaceEmbeddings(model_name='dangvantuan/sentence-camembert-large')
15
+
16
+ query = st.text_input("Entrer une question")
17
+
18
+ st.text('La reponse à votre question:')
19
 
20
  def read_pdf(file):
21
  with tempfile.NamedTemporaryFile(delete=False) as temp:
 
28
 
29
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
30
  if uploaded_file is not None:
31
+ raw_documents = read_pdf(uploaded_file)
32
+ documents = splitter.split_documents(raw_documents)
33
+
34
+ # embeddings_text = embeddings_fun.embed_documents(documents)
35
+
36
+ faiss_db = FAISS.from_documents(documents, embeddings_fun)
37
+
38
+ docs = faiss_db.similarity_search(query)
39
+
40
+ st.write(docs[0].page_content)
41
+ else:
42
+ st.write("file not uploaded correctly")