darthPanda commited on
Commit
e0ed1f1
1 Parent(s): b2b6846
Files changed (2) hide show
  1. app.py +41 -22
  2. requirements.txt +1 -1
app.py CHANGED
@@ -37,12 +37,20 @@ from transformers import pipeline
37
 
38
  #@st.cache_resource()
39
  @st.cache(allow_output_mutation=True)
40
- def get_model():
41
  tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
42
  model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
43
  return tokenizer,model
44
 
45
- tokenizer,model = get_model()
 
 
 
 
 
 
 
 
46
 
47
  def extract_text_from_pdf(path):
48
  text=''
@@ -69,6 +77,9 @@ def download_html():
69
  st.download_button(label="Download Report", data=html, file_name=file_name, mime=mime_type)
70
  st.stop()
71
 
 
 
 
72
  st.write("""
73
  # Sentiment Analysis Tool
74
  """)
@@ -76,22 +87,29 @@ st.write("""
76
  #uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf'])
77
  uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=True, type=['pdf'])
78
  #if uploaded_file is not None:
79
- if len(uploaded_file)>0:
 
 
 
80
  import time
81
-
82
  # Wait for 5 seconds
83
  time.sleep(5)
84
- #print('gone')
85
  pdf_reader = PyPDF2.PdfReader(uploaded_file[0])
86
- # Get the number of pages in the PDF file
87
  num_pages = len(pdf_reader.pages)
 
88
 
 
 
 
89
  if num_pages > 20:
90
  st.error("Pages in PDF file should be less than 20.")
91
  # Check that only one file was uploaded
92
  #elif isinstance(uploaded_file, list):
93
  elif len(uploaded_file) > 1:
94
  st.error("Please upload only one PDF file at a time.")
 
 
95
  else:
96
  #uploaded_file = uploaded_file[0]
97
  # Check that the file is a PDF
@@ -132,14 +150,23 @@ if len(uploaded_file)>0:
132
 
133
  with st.spinner('Processing please wait...'):
134
 
 
 
135
  pipe = pipeline(model="ProsusAI/finbert")
136
-
137
  classifier = pipeline(model="ProsusAI/finbert")
138
  output = classifier(useful_sentence)
139
 
 
 
 
 
 
 
140
  df = pd.DataFrame.from_dict(output)
141
  df['Sentence']= pd.Series(useful_sentence)
142
 
 
 
143
  labels = ['neutral', 'positive', 'negative']
144
  values = df.label.value_counts().to_list()
145
 
@@ -178,6 +205,8 @@ if len(uploaded_file)>0:
178
  df_temp = pd.concat([df_temp, pos_df])
179
 
180
 
 
 
181
  fig = make_subplots(
182
  rows=26, cols=6,
183
  specs=[ [None, None, None, None, None, None],
@@ -279,31 +308,21 @@ if len(uploaded_file)>0:
279
  # Add HTML tags to force line breaks in the title text
280
  wrapped_title = "<br>".join(wrapped_title.split("\n"))
281
 
282
- fig.update_layout(height=700, showlegend=False, title={'text': f"<b>{wrapped_title} - Sentiment Analysis Report</b>", 'x': 0.5, 'xanchor': 'center','font': {'size': 32}})
283
 
284
  #pyo.plot(fig, filename='report.html')
285
 
 
 
286
  buffer = io.StringIO()
287
  fig.write_html(buffer, include_plotlyjs='cdn')
288
  html_bytes = buffer.getvalue().encode()
289
 
290
  st.download_button(
291
- label='Download HTML',
292
  data=html_bytes,
293
  file_name='report.html',
294
  mime='text/html'
295
  )
296
 
297
-
298
- # import base64
299
-
300
- # # Convert the figure to HTML format
301
- # fig_html = pio.to_html(fig, full_html=False)
302
- # b64 = base64.b64encode(fig_html.encode()).decode()
303
-
304
- # # Generate a download link
305
- # filename = "figure.html"
306
- # href = f'<a href="data:file/html;base64,{b64}" download="{filename}">Download Report</a>'
307
-
308
- # # Display the link
309
- # st.markdown(href, unsafe_allow_html=True)
 
37
 
38
  #@st.cache_resource()
39
  @st.cache(allow_output_mutation=True)
40
+ def get_sentiment_model():
41
  tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
42
  model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
43
  return tokenizer,model
44
 
45
+ tokenizer_sentiment,model_sentiment = get_sentiment_model()
46
+
47
+ @st.cache(allow_output_mutation=True)
48
+ def get_emotion_model():
49
+ tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
50
+ model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base")
51
+ return tokenizer,model
52
+
53
+ tokenizer_emotion,model_emotion = get_emotion_model()
54
 
55
  def extract_text_from_pdf(path):
56
  text=''
 
77
  st.download_button(label="Download Report", data=html, file_name=file_name, mime=mime_type)
78
  st.stop()
79
 
80
+ if 'filename_key' not in st.session_state:
81
+ st.session_state.filename_key = ''
82
+
83
  st.write("""
84
  # Sentiment Analysis Tool
85
  """)
 
87
  #uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False, type=['pdf'])
88
  uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=True, type=['pdf'])
89
  #if uploaded_file is not None:
90
+ if len(uploaded_file)==0:
91
+ #print('none')
92
+ st.session_state.filename_key = ''
93
+ elif len(uploaded_file)>0:
94
  import time
 
95
  # Wait for 5 seconds
96
  time.sleep(5)
97
+
98
  pdf_reader = PyPDF2.PdfReader(uploaded_file[0])
 
99
  num_pages = len(pdf_reader.pages)
100
+ file_name = uploaded_file[0].name
101
 
102
+ # st.write(st.session_state.filename_key)
103
+ # print(file_name)
104
+ # st.write("Filename:", file_name)
105
  if num_pages > 20:
106
  st.error("Pages in PDF file should be less than 20.")
107
  # Check that only one file was uploaded
108
  #elif isinstance(uploaded_file, list):
109
  elif len(uploaded_file) > 1:
110
  st.error("Please upload only one PDF file at a time.")
111
+ elif st.session_state.filename_key == file_name:
112
+ st.write("Report downloaded successfully")
113
  else:
114
  #uploaded_file = uploaded_file[0]
115
  # Check that the file is a PDF
 
150
 
151
  with st.spinner('Processing please wait...'):
152
 
153
+ tokenizer = tokenizer_sentiment
154
+ model = model_sentiment
155
  pipe = pipeline(model="ProsusAI/finbert")
 
156
  classifier = pipeline(model="ProsusAI/finbert")
157
  output = classifier(useful_sentence)
158
 
159
+ tokenizer = tokenizer_emotion
160
+ model = model_emotion
161
+ classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
162
+ output_emotion = classifier(useful_sentence)
163
+ #print(output_emotion[0])
164
+
165
  df = pd.DataFrame.from_dict(output)
166
  df['Sentence']= pd.Series(useful_sentence)
167
 
168
+ ############################ 3. Processing ############################
169
+
170
  labels = ['neutral', 'positive', 'negative']
171
  values = df.label.value_counts().to_list()
172
 
 
205
  df_temp = pd.concat([df_temp, pos_df])
206
 
207
 
208
+ ############################ 4. Plotting ############################
209
+
210
  fig = make_subplots(
211
  rows=26, cols=6,
212
  specs=[ [None, None, None, None, None, None],
 
308
  # Add HTML tags to force line breaks in the title text
309
  wrapped_title = "<br>".join(wrapped_title.split("\n"))
310
 
311
+ fig.update_layout(height=1500, showlegend=False, title={'text': f"<b>{wrapped_title} - Sentiment Analysis Report</b>", 'x': 0.5, 'xanchor': 'center','font': {'size': 32}})
312
 
313
  #pyo.plot(fig, filename='report.html')
314
 
315
+ ############################## 5. Download Report ##############################
316
+
317
  buffer = io.StringIO()
318
  fig.write_html(buffer, include_plotlyjs='cdn')
319
  html_bytes = buffer.getvalue().encode()
320
 
321
  st.download_button(
322
+ label='Download Report',
323
  data=html_bytes,
324
  file_name='report.html',
325
  mime='text/html'
326
  )
327
 
328
+ st.session_state.filename_key = file_name
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- streamlit
2
  transformers
3
  torch
4
  PyPDF2
 
1
+ streamlit==1.17.0
2
  transformers
3
  torch
4
  PyPDF2