Rushi2903 commited on
Commit
6412936
1 Parent(s): 1b063d0

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +77 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import PyPDF2
4
+ import nltk
5
+ nltk.download('punkt')
6
+ from nltk.tokenize import sent_tokenize
7
+ from sentence_transformers import SentenceTransformer
8
+ import numpy as np
9
+ from numpy.linalg import norm
10
+
11
+ url = "https://www.independentschoolparent.com/wp-content/uploads/2018/01/AI.jpg"
12
+
13
+ st.title("AIP-S³")
14
+ st.write("AI Powered Smart Search System")
15
+ st.image(url)
16
+
17
+ st.markdown('_Welcome to Question Answering System 🧠 🤖_')
18
+
19
+ a = st.sidebar.radio("SELECT -", ['PDF', 'Website'])
20
+
21
+ if a == 'PDF' :
22
+
23
+ uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True ,
24
+ type = ['pdf'] )
25
+
26
+ if st.button("Process!"):
27
+ for i in uploaded_files:
28
+ if i.type == "application/pdf" :
29
+ reader = PyPDF2.PdfReader(i)
30
+
31
+ text_ext = []
32
+ for i in range(len(reader.pages)):
33
+ pageObj = reader.pages[i]
34
+ # extracting text from page
35
+ text_ext.append(pageObj.extract_text())
36
+
37
+ sent_toks = []
38
+ for i in text_ext:
39
+ sent_toks.append(sent_tokenize(i))
40
+ concat_list = [j for i in sent_toks for j in i]
41
+
42
+ filt1_list = []
43
+ for i in concat_list:
44
+ a = (i.replace('\n', ' '))
45
+ filt1_list.append(a)
46
+
47
+
48
+
49
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
50
+ embeddings = model.encode(filt1_list)
51
+
52
+ st.write("Process Completed")
53
+
54
+
55
+ query = st.text_input('Ask me anything!', placeholder = 'Type.....')
56
+ if st.button("Confirm!"):
57
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
58
+ query_embedding = model.encode(query)
59
+
60
+ cosine_lis = []
61
+ for i in range(len(filt1_list)):
62
+ cosine = np.dot(query_embedding , embeddings[i])/ (norm(query_embedding)*norm(embeddings[i]))
63
+ cosine_lis.append(cosine)
64
+
65
+
66
+ N = 3
67
+ list1 = cosine_lis
68
+
69
+ indexes_final= sorted(range(len(list1)), key=lambda i: list1[i], reverse=True)[:N]
70
+
71
+ for i in indexes_final:
72
+ st.write(filt1_list[i])
73
+ st.write("")
74
+
75
+
76
+
77
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ PyPDF2
2
+ nltk==3.7
3
+ numpy==1.21.6
4
+ sentence_transformers==2.2.2