rushi29 commited on
Commit
7d6e246
1 Parent(s): b35b1e4

Create new file

Browse files
Files changed (1) hide show
  1. app.py +112 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from txtai.pipeline import Textractor
3
+ from txtai.embeddings import Embeddings
4
+ import nltk
5
+ nltk.download('punkt')
6
+ #Web Scraping
7
+ import bs4 as bs
8
+ import urllib.request
9
+ import re
10
+ # Create embeddings model, backed by sentence-transformers & transformers
11
+ embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2"})
12
+
13
+ url = "https://cdn.pixabay.com/photo/2022/02/25/09/23/background-7033808_1280.jpg"
14
+
15
+ st.title("AIP-S³")
16
+ st.write("AI Powered Smart Search System")
17
+ st.image(url)
18
+
19
+ st.markdown('_Welecome to Question Answering System 🧠 🤖_')
20
+
21
+ a = st.sidebar.radio("SELECT -", ['PDF', 'Website'])
22
+
23
+ def my_function_pdf():
24
+ textract = Textractor(sentences=True)
25
+
26
+ data_lines = []
27
+ for i in (locations_max):
28
+ lines = textract(i)
29
+ data_lines.append(lines)
30
+ total_lines = []
31
+ for i in data_lines:
32
+ total_lines += i
33
+ seq = embeddings.similarity(quer, total_lines)
34
+ three_most = seq[0:3]
35
+ indexes = []
36
+ for i in three_most:
37
+ indexes.append(i[0])
38
+ for j in indexes:
39
+ st.write(total_lines[j])
40
+
41
+
42
+ ## webscrap function
43
+ def my_web():
44
+ from txtai.pipeline import Textractor
45
+ textract = Textractor(sentences=True)
46
+ data_lines = []
47
+ total_lines = []
48
+ article_text = " "
49
+ for i in (locations_max):
50
+ #print(i)
51
+ scraped_data = urllib.request.urlopen(i)
52
+ article = scraped_data.read()
53
+ parsed_article = bs.BeautifulSoup(article,'lxml')
54
+ paragraphs = parsed_article.find_all('p')
55
+ for p in paragraphs:
56
+ article_text += p.text
57
+ lines = textract(i)
58
+ data_lines.append(lines)
59
+ total_lines = []
60
+ for i in data_lines:
61
+ total_lines += i
62
+ seq = embeddings.similarity(quer, total_lines)
63
+ three_most = seq[0:3]
64
+ indexes = []
65
+ for i in three_most:
66
+ indexes.append(i[0])
67
+ for j in indexes:
68
+ st.write(total_lines[j])
69
+
70
+
71
+
72
+ if a == 'PDF' :
73
+ uploaded_files = st.file_uploader("Choose a CSV file", accept_multiple_files=True)
74
+ locations_max = []
75
+ for uploaded_file in uploaded_files:
76
+ # st.write(uploaded_file.name)
77
+ locations_max.append(uploaded_file.name)
78
+
79
+
80
+ # for query
81
+ quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
82
+ st.write('Your query is - ', quer)
83
+
84
+
85
+ # for textraction
86
+ if st.button('Confirm!'):
87
+ st.write('Confirmed')
88
+ my_function_pdf()
89
+ else:
90
+ st.write('')
91
+
92
+
93
+
94
+ ## web
95
+ else:
96
+ number = st.number_input('Insert a number of Links -',value =1, step =1)
97
+ st.write('Number of web pages - ', number)
98
+ st.markdown("---")
99
+ locations_max = []
100
+ for i in range (number) :
101
+ loc = st.text_input('Enter the URL :', placeholder = 'ex- https:\\', key = i)
102
+ locations_max.append(loc)
103
+
104
+ # for query
105
+ quer = st.text_input('ask me anything!', placeholder = 'ex - what is AI?')
106
+ st.write('Your query is - ', quer)
107
+
108
+ if st.button('Confirm!'):
109
+ st.write('Confirmed')
110
+ my_web()
111
+ else:
112
+ st.write('')