rushi29 commited on
Commit
50bf7dc
1 Parent(s): dbced9a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from txtai.pipeline import Textractor
3
+ from txtai.embeddings import Embeddings
4
+ import nltk
5
+ nltk.download('punkt')
6
+ #Web Scraping
7
+ import bs4 as bs
8
+ import urllib.request
9
+ import re
10
+ # Create embeddings model, backed by sentence-transformers & transformers
11
+ embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"})
12
+
13
+ url = "https://cdn.pixabay.com/photo/2022/02/25/09/23/background-7033808_1280.jpg"
14
+
15
+ st.title("AIP-S³")
16
+ st.write("AI Powered Smart Search System")
17
+ st.image(url)
18
+
19
+ st.markdown('_Welecome to Question Answering System 🧠 🤖_')
20
+
21
+ a = st.sidebar.radio("SELECT -", ['PDF', 'Website'])
22
+
23
+ def my_function_pdf():
24
+ textract = Textractor(sentences=True)
25
+
26
+ data_lines = []
27
+ for i in (locations_max):
28
+ lines = textract(i)
29
+ data_lines.append(lines)
30
+ total_lines = []
31
+ for i in data_lines:
32
+ total_lines += i
33
+ seq = embeddings.similarity(quer, total_lines)
34
+ three_most = seq[0:3]
35
+ indexes = []
36
+ for i in three_most:
37
+ indexes.append(i[0])
38
+ for j in indexes:
39
+ st.write(total_lines[j])
40
+
41
+ ## webscrap function
42
+ def my_web():
43
+ from txtai.pipeline import Textractor
44
+ textract = Textractor(sentences=True)
45
+ data_lines = []
46
+ total_lines = []
47
+ article_text = " "
48
+ for i in (locations_max):
49
+ #print(i)
50
+ scraped_data = urllib.request.urlopen(i)
51
+ article = scraped_data.read()
52
+ parsed_article = bs.BeautifulSoup(article,'lxml')
53
+ paragraphs = parsed_article.find_all('p')
54
+ for p in paragraphs:
55
+ article_text += p.text
56
+ lines = textract(i)
57
+ data_lines.append(lines)
58
+ total_lines = []
59
+ for i in data_lines:
60
+ total_lines += i
61
+ seq = embeddings.similarity(quer, total_lines)
62
+ three_most = seq[0:3]
63
+ indexes = []
64
+ for i in three_most:
65
+ indexes.append(i[0])
66
+ for j in indexes:
67
+ st.write(total_lines[j])
68
+
69
+
70
+
71
+
72
+ ##
73
+
74
+ if a == 'PDF' :
75
+ number = st.number_input('Insert a number of files -',value =1, step =1)
76
+ st.write('Number of PDF files - ', number)
77
+ st.markdown("---")
78
+ locations_max = []
79
+ for i in range (number) :
80
+ loc = st.text_input('Enter the PDF path :', placeholder = 'ex- /content/drive/MyDrive/', key = i)
81
+ locations_max.append(loc)
82
+
83
+ # for query
84
+ quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
85
+ st.write('Your query is - ', quer)
86
+
87
+ # for textraction
88
+ if st.button('Confirm!'):
89
+ st.write('Confirmed')
90
+ my_function_pdf()
91
+ else:
92
+ st.write('')
93
+ ## web
94
+ else:
95
+ number = st.number_input('Insert a number of Links -',value =1, step =1)
96
+ st.write('Number of web pages - ', number)
97
+ st.markdown("---")
98
+ locations_max = []
99
+ for i in range (number) :
100
+ loc = st.text_input('Enter the URL :', placeholder = 'ex- https:\\', key = i)
101
+ locations_max.append(loc)
102
+
103
+ # for query
104
+ quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
105
+ st.write('Your query is - ', quer)
106
+
107
+ if st.button('Confirm!'):
108
+ st.write('Confirmed')
109
+ my_web()
110
+ else:
111
+ st.write('')