Spaces:
Runtime error
Runtime error
Commit
·
e0ed1f1
1
Parent(s):
b2b6846
hf
Browse files- app.py +41 -22
- requirements.txt +1 -1
app.py
CHANGED
@@ -37,12 +37,20 @@ from transformers import pipeline
|
|
37 |
|
38 |
#@st.cache_resource()
|
39 |
@st.cache(allow_output_mutation=True)
|
40 |
-
def
|
41 |
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
|
42 |
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
43 |
return tokenizer,model
|
44 |
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
def extract_text_from_pdf(path):
|
48 |
text=''
|
@@ -69,6 +77,9 @@ def download_html():
|
|
69 |
st.download_button(label="Download Report", data=html, file_name=file_name, mime=mime_type)
|
70 |
st.stop()
|
71 |
|
|
|
|
|
|
|
72 |
st.write("""
|
73 |
# Sentiment Analysis Tool
|
74 |
""")
|
@@ -76,22 +87,29 @@ st.write("""
|
|
76 |
#uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf'])
|
77 |
uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=True, type=['pdf'])
|
78 |
#if uploaded_file is not None:
|
79 |
-
if len(uploaded_file)
|
|
|
|
|
|
|
80 |
import time
|
81 |
-
|
82 |
# Wait for 5 seconds
|
83 |
time.sleep(5)
|
84 |
-
|
85 |
pdf_reader = PyPDF2.PdfReader(uploaded_file[0])
|
86 |
-
# Get the number of pages in the PDF file
|
87 |
num_pages = len(pdf_reader.pages)
|
|
|
88 |
|
|
|
|
|
|
|
89 |
if num_pages > 20:
|
90 |
st.error("Pages in PDF file should be less than 20.")
|
91 |
# Check that only one file was uploaded
|
92 |
#elif isinstance(uploaded_file, list):
|
93 |
elif len(uploaded_file) > 1:
|
94 |
st.error("Please upload only one PDF file at a time.")
|
|
|
|
|
95 |
else:
|
96 |
#uploaded_file = uploaded_file[0]
|
97 |
# Check that the file is a PDF
|
@@ -132,14 +150,23 @@ if len(uploaded_file)>0:
|
|
132 |
|
133 |
with st.spinner('Processing please wait...'):
|
134 |
|
|
|
|
|
135 |
pipe = pipeline(model="ProsusAI/finbert")
|
136 |
-
|
137 |
classifier = pipeline(model="ProsusAI/finbert")
|
138 |
output = classifier(useful_sentence)
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
df = pd.DataFrame.from_dict(output)
|
141 |
df['Sentence']= pd.Series(useful_sentence)
|
142 |
|
|
|
|
|
143 |
labels = ['neutral', 'positive', 'negative']
|
144 |
values = df.label.value_counts().to_list()
|
145 |
|
@@ -178,6 +205,8 @@ if len(uploaded_file)>0:
|
|
178 |
df_temp = pd.concat([df_temp, pos_df])
|
179 |
|
180 |
|
|
|
|
|
181 |
fig = make_subplots(
|
182 |
rows=26, cols=6,
|
183 |
specs=[ [None, None, None, None, None, None],
|
@@ -279,31 +308,21 @@ if len(uploaded_file)>0:
|
|
279 |
# Add HTML tags to force line breaks in the title text
|
280 |
wrapped_title = "<br>".join(wrapped_title.split("\n"))
|
281 |
|
282 |
-
fig.update_layout(height=
|
283 |
|
284 |
#pyo.plot(fig, filename='report.html')
|
285 |
|
|
|
|
|
286 |
buffer = io.StringIO()
|
287 |
fig.write_html(buffer, include_plotlyjs='cdn')
|
288 |
html_bytes = buffer.getvalue().encode()
|
289 |
|
290 |
st.download_button(
|
291 |
-
label='Download
|
292 |
data=html_bytes,
|
293 |
file_name='report.html',
|
294 |
mime='text/html'
|
295 |
)
|
296 |
|
297 |
-
|
298 |
-
# import base64
|
299 |
-
|
300 |
-
# # Convert the figure to HTML format
|
301 |
-
# fig_html = pio.to_html(fig, full_html=False)
|
302 |
-
# b64 = base64.b64encode(fig_html.encode()).decode()
|
303 |
-
|
304 |
-
# # Generate a download link
|
305 |
-
# filename = "figure.html"
|
306 |
-
# href = f'<a href="data:file/html;base64,{b64}" download="{filename}">Download Report</a>'
|
307 |
-
|
308 |
-
# # Display the link
|
309 |
-
# st.markdown(href, unsafe_allow_html=True)
|
|
|
37 |
|
38 |
#@st.cache_resource()
|
39 |
@st.cache(allow_output_mutation=True)
|
40 |
+
def get_sentiment_model():
|
41 |
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
|
42 |
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
|
43 |
return tokenizer,model
|
44 |
|
45 |
+
tokenizer_sentiment,model_sentiment = get_sentiment_model()
|
46 |
+
|
47 |
+
@st.cache(allow_output_mutation=True)
|
48 |
+
def get_emotion_model():
|
49 |
+
tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
|
50 |
+
model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
|
51 |
+
return tokenizer,model
|
52 |
+
|
53 |
+
tokenizer_emotion,model_emotion = get_emotion_model()
|
54 |
|
55 |
def extract_text_from_pdf(path):
|
56 |
text=''
|
|
|
77 |
st.download_button(label="Download Report", data=html, file_name=file_name, mime=mime_type)
|
78 |
st.stop()
|
79 |
|
80 |
+
if 'filename_key' not in st.session_state:
|
81 |
+
st.session_state.filename_key = ''
|
82 |
+
|
83 |
st.write("""
|
84 |
# Sentiment Analysis Tool
|
85 |
""")
|
|
|
87 |
#uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf'])
|
88 |
uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=True, type=['pdf'])
|
89 |
#if uploaded_file is not None:
|
90 |
+
if len(uploaded_file)==0:
|
91 |
+
#print('none')
|
92 |
+
st.session_state.filename_key = ''
|
93 |
+
elif len(uploaded_file)>0:
|
94 |
import time
|
|
|
95 |
# Wait for 5 seconds
|
96 |
time.sleep(5)
|
97 |
+
|
98 |
pdf_reader = PyPDF2.PdfReader(uploaded_file[0])
|
|
|
99 |
num_pages = len(pdf_reader.pages)
|
100 |
+
file_name = uploaded_file[0].name
|
101 |
|
102 |
+
# st.write(st.session_state.filename_key)
|
103 |
+
# print(file_name)
|
104 |
+
# st.write("Filename:", file_name)
|
105 |
if num_pages > 20:
|
106 |
st.error("Pages in PDF file should be less than 20.")
|
107 |
# Check that only one file was uploaded
|
108 |
#elif isinstance(uploaded_file, list):
|
109 |
elif len(uploaded_file) > 1:
|
110 |
st.error("Please upload only one PDF file at a time.")
|
111 |
+
elif st.session_state.filename_key == file_name:
|
112 |
+
st.write("Report downloaded successfully")
|
113 |
else:
|
114 |
#uploaded_file = uploaded_file[0]
|
115 |
# Check that the file is a PDF
|
|
|
150 |
|
151 |
with st.spinner('Processing please wait...'):
|
152 |
|
153 |
+
tokenizer = tokenizer_sentiment
|
154 |
+
model = model_sentiment
|
155 |
pipe = pipeline(model="ProsusAI/finbert")
|
|
|
156 |
classifier = pipeline(model="ProsusAI/finbert")
|
157 |
output = classifier(useful_sentence)
|
158 |
|
159 |
+
tokenizer = tokenizer_emotion
|
160 |
+
model = model_emotion
|
161 |
+
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
|
162 |
+
output_emotion = classifier(useful_sentence)
|
163 |
+
#print(output_emotion[0])
|
164 |
+
|
165 |
df = pd.DataFrame.from_dict(output)
|
166 |
df['Sentence']= pd.Series(useful_sentence)
|
167 |
|
168 |
+
############################ 3. Processing ############################
|
169 |
+
|
170 |
labels = ['neutral', 'positive', 'negative']
|
171 |
values = df.label.value_counts().to_list()
|
172 |
|
|
|
205 |
df_temp = pd.concat([df_temp, pos_df])
|
206 |
|
207 |
|
208 |
+
############################ 4. Plotting ############################
|
209 |
+
|
210 |
fig = make_subplots(
|
211 |
rows=26, cols=6,
|
212 |
specs=[ [None, None, None, None, None, None],
|
|
|
308 |
# Add HTML tags to force line breaks in the title text
|
309 |
wrapped_title = "<br>".join(wrapped_title.split("\n"))
|
310 |
|
311 |
+
fig.update_layout(height=1500, showlegend=False, title={'text': f"<b>{wrapped_title} - Sentiment Analysis Report</b>", 'x': 0.5, 'xanchor': 'center','font': {'size': 32}})
|
312 |
|
313 |
#pyo.plot(fig, filename='report.html')
|
314 |
|
315 |
+
############################## 5. Download Report ##############################
|
316 |
+
|
317 |
buffer = io.StringIO()
|
318 |
fig.write_html(buffer, include_plotlyjs='cdn')
|
319 |
html_bytes = buffer.getvalue().encode()
|
320 |
|
321 |
st.download_button(
|
322 |
+
label='Download Report',
|
323 |
data=html_bytes,
|
324 |
file_name='report.html',
|
325 |
mime='text/html'
|
326 |
)
|
327 |
|
328 |
+
st.session_state.filename_key = file_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
streamlit
|
2 |
transformers
|
3 |
torch
|
4 |
PyPDF2
|
|
|
1 |
+
streamlit==1.17.0
|
2 |
transformers
|
3 |
torch
|
4 |
PyPDF2
|