Upload 2 files
Browse files- app.py +77 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import PyPDF2
|
4 |
+
import nltk
|
5 |
+
nltk.download('punkt')
|
6 |
+
from nltk.tokenize import sent_tokenize
|
7 |
+
from sentence_transformers import SentenceTransformer
|
8 |
+
import numpy as np
|
9 |
+
from numpy.linalg import norm
|
10 |
+
|
11 |
+
url = "https://www.independentschoolparent.com/wp-content/uploads/2018/01/AI.jpg"
|
12 |
+
|
13 |
+
st.title("AIP-S³")
|
14 |
+
st.write("AI Powered Smart Search System")
|
15 |
+
st.image(url)
|
16 |
+
|
17 |
+
st.markdown('_Welcome to Question Answering System 🧠 🤖_')
|
18 |
+
|
19 |
+
a = st.sidebar.radio("SELECT -", ['PDF', 'Website'])
|
20 |
+
|
21 |
+
if a == 'PDF' :
|
22 |
+
|
23 |
+
uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True ,
|
24 |
+
type = ['pdf'] )
|
25 |
+
|
26 |
+
if st.button("Process!"):
|
27 |
+
for i in uploaded_files:
|
28 |
+
if i.type == "application/pdf" :
|
29 |
+
reader = PyPDF2.PdfReader(i)
|
30 |
+
|
31 |
+
text_ext = []
|
32 |
+
for i in range(len(reader.pages)):
|
33 |
+
pageObj = reader.pages[i]
|
34 |
+
# extracting text from page
|
35 |
+
text_ext.append(pageObj.extract_text())
|
36 |
+
|
37 |
+
sent_toks = []
|
38 |
+
for i in text_ext:
|
39 |
+
sent_toks.append(sent_tokenize(i))
|
40 |
+
concat_list = [j for i in sent_toks for j in i]
|
41 |
+
|
42 |
+
filt1_list = []
|
43 |
+
for i in concat_list:
|
44 |
+
a = (i.replace('\n', ' '))
|
45 |
+
filt1_list.append(a)
|
46 |
+
|
47 |
+
|
48 |
+
|
49 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
50 |
+
embeddings = model.encode(filt1_list)
|
51 |
+
|
52 |
+
st.write("Process Completed")
|
53 |
+
|
54 |
+
|
55 |
+
query = st.text_input('Ask me anything!', placeholder = 'Type.....')
|
56 |
+
if st.button("Confirm!"):
|
57 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
58 |
+
query_embedding = model.encode(query)
|
59 |
+
|
60 |
+
cosine_lis = []
|
61 |
+
for i in range(len(filt1_list)):
|
62 |
+
cosine = np.dot(query_embedding , embeddings[i])/ (norm(query_embedding)*norm(embeddings[i]))
|
63 |
+
cosine_lis.append(cosine)
|
64 |
+
|
65 |
+
|
66 |
+
N = 3
|
67 |
+
list1 = cosine_lis
|
68 |
+
|
69 |
+
indexes_final= sorted(range(len(list1)), key=lambda i: list1[i], reverse=True)[:N]
|
70 |
+
|
71 |
+
for i in indexes_final:
|
72 |
+
st.write(filt1_list[i])
|
73 |
+
st.write("")
|
74 |
+
|
75 |
+
|
76 |
+
|
77 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
PyPDF2
|
2 |
+
nltk==3.7
|
3 |
+
numpy==1.21.6
|
4 |
+
sentence_transformers==2.2.2
|