AIPP_DEMO / app.py
Rushi2903's picture
Update app.py
b4bc79e
import streamlit as st
import os
import PyPDF2
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
from sentence_transformers import SentenceTransformer
import numpy as np
from numpy.linalg import norm
url = "https://www.independentschoolparent.com/wp-content/uploads/2018/01/AI.jpg"
st.title("AIP-S³")
st.write("AI Powered Smart Search System")
st.image(url)
st.markdown('_Welcome to Question Answering System 🧠 🤖_')
a = st.sidebar.radio("SELECT -", ['PDF', 'Website'])
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
#filt1_list = []
class PDFProcessor:
def __init__(self):
self.filt1_list = []
def process_files(self, uploaded_files):
for i in uploaded_files:
if i.type == "application/pdf" :
reader = PyPDF2.PdfReader(i)
text_ext = []
for i in range(len(reader.pages)):
pageObj = reader.pages[i]
text_ext.append(pageObj.extract_text())
sent_toks = []
for i in text_ext:
sent_toks.append(sent_tokenize(i))
concat_list = [j for i in sent_toks for j in i]
for i in concat_list:
a = (i.replace('\n', ' '))
self.filt1_list.append(a)
if a == 'PDF':
pdf_processor = PDFProcessor()
uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True, type=['pdf'])
if st.button("Process!"):
pdf_processor.process_files(uploaded_files)
filt1_list = pdf_processor.filt1_list
st.write("Process Completed")
query = st.text_input('Ask me anything!', placeholder = 'Type.....')
if st.button("Confirm!"):
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
query_embedding = model.encode(query)
cosine_lis = []
for i in range(len(pdf_processor.filt1_list)):
cosine = np.dot(query_embedding , embeddings[i])/ (norm(query_embedding)*norm(embeddings[i]))
cosine_lis.append(cosine)
N = 3
list1 = cosine_lis
indexes_final= sorted(range(len(list1)), key=lambda i: list1[i], reverse=True)[:N]
for i in indexes_final:
st.write(filt1_list[i])
st.write("")
# if a == 'PDF' :
# uploaded_files = st.file_uploader("Upload files - ", accept_multiple_files=True ,
# type = ['pdf'] )
# if st.button("Process!"):
# for i in uploaded_files:
# if i.type == "application/pdf" :
# reader = PyPDF2.PdfReader(i)
# text_ext = []
# for i in range(len(reader.pages)):
# pageObj = reader.pages[i]
# # extracting text from page
# text_ext.append(pageObj.extract_text())
# sent_toks = []
# for i in text_ext:
# sent_toks.append(sent_tokenize(i))
# concat_list = [j for i in sent_toks for j in i]
# filt1_list = []
# for i in concat_list:
# a = (i.replace('\n', ' '))
# filt1_list.append(a)
# #model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# embeddings = model.encode(filt1_list)
# st.write("Process Completed")
# '''