Spaces:

rushi29
/

AIP_pdf

Runtime error

App Files Files Community

rushi29 commited on Jul 18, 2022

Commit

6e53c46

•

1 Parent(s): e669c16

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -47

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ st.image(url)
 st.markdown('_Welecome to Question Answering System 🧠 🤖_')
-a = st.sidebar.radio("SELECT -", ['PDF', 'Website'])
 ## webscrap function
 def my_web():
@@ -53,9 +53,7 @@ def my_web():
     st.write(total_lines[j])
-if a == 'PDF' :
   uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True ,
                   type = ['pdf', 'docx' , 'txt'] )
@@ -64,53 +62,28 @@ if a == 'PDF' :
   quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
   st.write('Your query is - ', quer)
-  if st.button("Process"):
-    for uploaded_file in uploaded_files:
-      if uploaded_file is not None:
-        file_details = {"Filename":uploaded_file.name,"FileType":uploaded_file.type,"FileSize":uploaded_file.size}
-        #st.write(file_details)
-        if uploaded_file.type == "text/plain":
-          raw_text = str(uploaded_file.read(),"utf-8")
-          st.write(raw_text)
-        elif uploaded_file.type == "application/pdf" :
-          reader = PdfReader(uploaded_file)
-          text = ""
-          for page in reader.pages:
-            text += page.extract_text() + "\n"
-          #st.write(text)
-          data_lines =  tokenize.sent_tokenize(text)
-          #st.write(data_lines)
-          seq = embeddings.similarity(quer, data_lines)
-          three_most = seq[0:3]
-          indexes = []
-          for i in three_most:
-            indexes.append(i[0])
-          for j in indexes:
-            st.write(data_lines[j])
-          #total_lines = []
-          #for i in data_lines:
-            #total_lines += i
-          #st.write(data_lines)
-          #try:
-            #with pdfplumber.open(uploaded_file) as pdf:
-              #pages = pdf.pages[0]
-              #st.write(pages.extract_text())
-          #except:
-            #st.write("None")
-        elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" :
-          raw_text = docx2txt.process(uploaded_file)
-          st.write(raw_text)
 ## web
 else:
   number = st.number_input('Insert a number of Links -',value =1, step =1)

 st.markdown('_Welecome to Question Answering System 🧠 🤖_')
+a = st.sidebar.radio("SELECT -", ['File Upload', 'Website'])
 ## webscrap function
 def my_web():
     st.write(total_lines[j])
+if a == 'File Upload' :
   uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True ,
                   type = ['pdf', 'docx' , 'txt'] )
   quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
   st.write('Your query is - ', quer)
+  if st.button("Confirm!"):
+    text_raw = ""
+    for i in uploaded_files:
+      if i.type == "application/pdf" :
+        reader = PdfReader(i)
+      # print(reader.numPages)
+        pageObj = reader.getPage(0)
+      # print(pageObj.extractText())
+        text_raw += pageObj.extract_text() + "\n"
+    all_tokens = tokenize.sent_tokenize(text_raw)
+    seq = embeddings.similarity(quer, all_tokens)
+    three_most = seq[0:3]
+    indexes = []
+    for i in three_most:
+      indexes.append(i[0])
+      # print(indexes)
+    for j in indexes:
+      st.write(all_tokens[j])
 ## web
 else:
   number = st.number_input('Insert a number of Links -',value =1, step =1)