Cachoups commited on
Commit
9995b35
·
verified ·
1 Parent(s): cbcd9d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -10
app.py CHANGED
@@ -25,6 +25,7 @@ def make_spans(text, results):
25
  summarizer = pipeline("summarization", model="human-centered-summarization/financial-summarization-pegasus")
26
  fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
27
  fin_model_bis = pipeline("sentiment-analysis", model='ProsusAI/finbert', tokenizer='ProsusAI/finbert')
 
28
 
29
  def summarize_text(text):
30
  resp = summarizer(text)
@@ -81,6 +82,7 @@ def get_sheet_names(file):
81
  xls = pd.ExcelFile(os.path.join(PDF_FOLDER, file))
82
  return gr.update(choices=xls.sheet_names)
83
 
 
84
  def process_and_compare(file1, sheet1, file2, sheet2):
85
  def process_file(file_path, sheet_name):
86
  # Extract year from file name
@@ -106,20 +108,15 @@ def process_and_compare(file1, sheet1, file2, sheet2):
106
  df.columns = new_columns
107
  else:
108
  raise ValueError(f"Expected {len(new_columns)} columns, but found {len(df.columns)} columns in the data.")
109
-
110
- return df
111
 
112
  # Process both files
113
- df1 = process_file(file1, sheet1)
114
- df2 = process_file(file2, sheet2)
 
115
  year1 = int(re.search(r'(\d{4})', file1).group(1))
116
  year2 = int(re.search(r'(\d{4})', file2).group(1))
117
- # Calculate the differences
118
- # historical_col1 = f'Historical {int(year1) - 1}'
119
- # historical_col2 = f'Historical {int(year2) - 1}'
120
-
121
- # df1['Historical vs Adverse'] = df1[historical_col1] - df1['Adverse Cumulative']
122
- # df2['Historical vs Adverse'] = df2[historical_col2] - df2['Adverse Cumulative']
123
 
124
  # Merge dataframes on 'Country'
125
  merged_df = pd.merge(df2, df1, on='Country', suffixes=(f'_{year1}', f'_{year2}'))
@@ -197,10 +194,48 @@ def process_pdfs_and_analyze_sentiment(file1, file2, sheet):
197
  result_pdf2 = fin_ext_bis(text_pdf2)
198
 
199
  return result_pdf1, result_pdf2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  stored_paragraphs_1 = []
202
  stored_paragraphs_2 = []
203
 
 
 
 
204
  with gr.Blocks() as demo:
205
  with gr.Tab("Financial Report Text Analysis"):
206
  gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
@@ -283,6 +318,11 @@ with gr.Blocks() as demo:
283
  with gr.Row():
284
  with gr.Column():
285
  sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
 
 
 
 
 
286
  with gr.Column():
287
  sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
288
 
 
25
  summarizer = pipeline("summarization", model="human-centered-summarization/financial-summarization-pegasus")
26
  fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
27
  fin_model_bis = pipeline("sentiment-analysis", model='ProsusAI/finbert', tokenizer='ProsusAI/finbert')
28
+ table_to_text = pipeline('text2text-generation', model='google/flan-t5-large')
29
 
30
  def summarize_text(text):
31
  resp = summarizer(text)
 
82
  xls = pd.ExcelFile(os.path.join(PDF_FOLDER, file))
83
  return gr.update(choices=xls.sheet_names)
84
 
85
+
86
  def process_and_compare(file1, sheet1, file2, sheet2):
87
  def process_file(file_path, sheet_name):
88
  # Extract year from file name
 
108
  df.columns = new_columns
109
  else:
110
  raise ValueError(f"Expected {len(new_columns)} columns, but found {len(df.columns)} columns in the data.")
111
+ columns = ['Country', f'Adverse {year}', f'Adverse {year+1}', f'Adverse {year+2}', 'Adverse Cumulative']
112
+ return df, df[columns]
113
 
114
  # Process both files
115
+ global stored_df1, stored_df2
116
+ df1, stored_df1 = process_file(file1, sheet1)
117
+ df2, stored_df2 = process_file(file2, sheet2)
118
  year1 = int(re.search(r'(\d{4})', file1).group(1))
119
  year2 = int(re.search(r'(\d{4})', file2).group(1))
 
 
 
 
 
 
120
 
121
  # Merge dataframes on 'Country'
122
  merged_df = pd.merge(df2, df1, on='Country', suffixes=(f'_{year1}', f'_{year2}'))
 
194
  result_pdf2 = fin_ext_bis(text_pdf2)
195
 
196
  return result_pdf1, result_pdf2
197
+ def change_choices(df):
198
+ return gr.Dropdown.update(choices=df.Country.values.tolist())
199
+
200
+ def generate_text(df, country, theme):
201
+ # Filter the dataframe based on the country
202
+ row = df[df['Country'] == country].iloc[0]
203
+
204
+ # Convert the row to a string format for prompt
205
+ row_str = row.to_string(index=True)
206
+
207
+ # Create the prompt
208
+ prompt = f"""
209
+ Here is an example:
210
+ A table from France country:
211
+ Country France
212
+ Adverse 2020 -0.427975
213
+ Adverse 2021 -1.987167
214
+ Adverse 2022 -1.195906
215
+ Adverse Cumulative -3.573762
216
 
217
+ The theme is GDP
218
+
219
+ The output:
220
+ In adverse scenario, the growth for GDP in France is -0.427975% in 2018 and isn't getting better in 2019 with -1.98767% and -1.195906% in 2022.
221
+
222
+ Here is another table:
223
+ {row_str}
224
+
225
+ Summarize the adverse scenario growth for {theme} in {country} based on the data above, following a similar pattern to the example for France.
226
+ """
227
+
228
+ # Generate the descriptive text using the model
229
+ result = table_to_text(prompt, max_length=200)[0]['generated_text']
230
+
231
+ return result
232
+ # Global variable
233
  stored_paragraphs_1 = []
234
  stored_paragraphs_2 = []
235
 
236
+ stored_df1 = []
237
+ stored_df2 = []
238
+
239
  with gr.Blocks() as demo:
240
  with gr.Tab("Financial Report Text Analysis"):
241
  gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
 
318
  with gr.Row():
319
  with gr.Column():
320
  sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
321
+ country_1_dropdown = gr.Dropdown(label="Select Country from Excel File 1")
322
+ country_1_dropdown.change(fn =change_choices, inputs= stored_df1, outputs= paragraph_1_dropdown)
323
+ summarize_btn1_country = gr.Button("Summary for the selected country")
324
+ text_result_df1 = gr.Textbox(label="Sentence for excel file 1", lines=2)
325
+ summarize_btn1_country.click(fn= generate_text, inputs = [stored_df1, country_1_dropdown, sheet], outputs = text_result_df1)
326
  with gr.Column():
327
  sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
328