Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -25,6 +25,7 @@ def make_spans(text, results):
|
|
25 |
summarizer = pipeline("summarization", model="human-centered-summarization/financial-summarization-pegasus")
|
26 |
fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
|
27 |
fin_model_bis = pipeline("sentiment-analysis", model='ProsusAI/finbert', tokenizer='ProsusAI/finbert')
|
|
|
28 |
|
29 |
def summarize_text(text):
|
30 |
resp = summarizer(text)
|
@@ -81,6 +82,7 @@ def get_sheet_names(file):
|
|
81 |
xls = pd.ExcelFile(os.path.join(PDF_FOLDER, file))
|
82 |
return gr.update(choices=xls.sheet_names)
|
83 |
|
|
|
84 |
def process_and_compare(file1, sheet1, file2, sheet2):
|
85 |
def process_file(file_path, sheet_name):
|
86 |
# Extract year from file name
|
@@ -106,20 +108,15 @@ def process_and_compare(file1, sheet1, file2, sheet2):
|
|
106 |
df.columns = new_columns
|
107 |
else:
|
108 |
raise ValueError(f"Expected {len(new_columns)} columns, but found {len(df.columns)} columns in the data.")
|
109 |
-
|
110 |
-
return df
|
111 |
|
112 |
# Process both files
|
113 |
-
|
114 |
-
|
|
|
115 |
year1 = int(re.search(r'(\d{4})', file1).group(1))
|
116 |
year2 = int(re.search(r'(\d{4})', file2).group(1))
|
117 |
-
# Calculate the differences
|
118 |
-
# historical_col1 = f'Historical {int(year1) - 1}'
|
119 |
-
# historical_col2 = f'Historical {int(year2) - 1}'
|
120 |
-
|
121 |
-
# df1['Historical vs Adverse'] = df1[historical_col1] - df1['Adverse Cumulative']
|
122 |
-
# df2['Historical vs Adverse'] = df2[historical_col2] - df2['Adverse Cumulative']
|
123 |
|
124 |
# Merge dataframes on 'Country'
|
125 |
merged_df = pd.merge(df2, df1, on='Country', suffixes=(f'_{year1}', f'_{year2}'))
|
@@ -197,10 +194,48 @@ def process_pdfs_and_analyze_sentiment(file1, file2, sheet):
|
|
197 |
result_pdf2 = fin_ext_bis(text_pdf2)
|
198 |
|
199 |
return result_pdf1, result_pdf2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
stored_paragraphs_1 = []
|
202 |
stored_paragraphs_2 = []
|
203 |
|
|
|
|
|
|
|
204 |
with gr.Blocks() as demo:
|
205 |
with gr.Tab("Financial Report Text Analysis"):
|
206 |
gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
|
@@ -283,6 +318,11 @@ with gr.Blocks() as demo:
|
|
283 |
with gr.Row():
|
284 |
with gr.Column():
|
285 |
sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
|
|
|
|
|
|
|
|
|
|
|
286 |
with gr.Column():
|
287 |
sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
|
288 |
|
|
|
25 |
summarizer = pipeline("summarization", model="human-centered-summarization/financial-summarization-pegasus")
|
26 |
fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
|
27 |
fin_model_bis = pipeline("sentiment-analysis", model='ProsusAI/finbert', tokenizer='ProsusAI/finbert')
|
28 |
+
table_to_text = pipeline('text2text-generation', model='google/flan-t5-large')
|
29 |
|
30 |
def summarize_text(text):
|
31 |
resp = summarizer(text)
|
|
|
82 |
xls = pd.ExcelFile(os.path.join(PDF_FOLDER, file))
|
83 |
return gr.update(choices=xls.sheet_names)
|
84 |
|
85 |
+
|
86 |
def process_and_compare(file1, sheet1, file2, sheet2):
|
87 |
def process_file(file_path, sheet_name):
|
88 |
# Extract year from file name
|
|
|
108 |
df.columns = new_columns
|
109 |
else:
|
110 |
raise ValueError(f"Expected {len(new_columns)} columns, but found {len(df.columns)} columns in the data.")
|
111 |
+
columns = ['Country', f'Adverse {year}', f'Adverse {year+1}', f'Adverse {year+2}', 'Adverse Cumulative']
|
112 |
+
return df, df[columns]
|
113 |
|
114 |
# Process both files
|
115 |
+
global stored_df1, stored_df2
|
116 |
+
df1, stored_df1 = process_file(file1, sheet1)
|
117 |
+
df2, stored_df2 = process_file(file2, sheet2)
|
118 |
year1 = int(re.search(r'(\d{4})', file1).group(1))
|
119 |
year2 = int(re.search(r'(\d{4})', file2).group(1))
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
# Merge dataframes on 'Country'
|
122 |
merged_df = pd.merge(df2, df1, on='Country', suffixes=(f'_{year1}', f'_{year2}'))
|
|
|
194 |
result_pdf2 = fin_ext_bis(text_pdf2)
|
195 |
|
196 |
return result_pdf1, result_pdf2
|
197 |
+
def change_choices(df):
|
198 |
+
return gr.Dropdown.update(choices=df.Country.values.tolist())
|
199 |
+
|
200 |
+
def generate_text(df, country, theme):
|
201 |
+
# Filter the dataframe based on the country
|
202 |
+
row = df[df['Country'] == country].iloc[0]
|
203 |
+
|
204 |
+
# Convert the row to a string format for prompt
|
205 |
+
row_str = row.to_string(index=True)
|
206 |
+
|
207 |
+
# Create the prompt
|
208 |
+
prompt = f"""
|
209 |
+
Here is an example:
|
210 |
+
A table from France country:
|
211 |
+
Country France
|
212 |
+
Adverse 2020 -0.427975
|
213 |
+
Adverse 2021 -1.987167
|
214 |
+
Adverse 2022 -1.195906
|
215 |
+
Adverse Cumulative -3.573762
|
216 |
|
217 |
+
The theme is GDP
|
218 |
+
|
219 |
+
The output:
|
220 |
+
In adverse scenario, the growth for GDP in France is -0.427975% in 2018 and isn't getting better in 2019 with -1.98767% and -1.195906% in 2022.
|
221 |
+
|
222 |
+
Here is another table:
|
223 |
+
{row_str}
|
224 |
+
|
225 |
+
Summarize the adverse scenario growth for {theme} in {country} based on the data above, following a similar pattern to the example for France.
|
226 |
+
"""
|
227 |
+
|
228 |
+
# Generate the descriptive text using the model
|
229 |
+
result = table_to_text(prompt, max_length=200)[0]['generated_text']
|
230 |
+
|
231 |
+
return result
|
232 |
+
# Global variable
|
233 |
stored_paragraphs_1 = []
|
234 |
stored_paragraphs_2 = []
|
235 |
|
236 |
+
stored_df1 = []
|
237 |
+
stored_df2 = []
|
238 |
+
|
239 |
with gr.Blocks() as demo:
|
240 |
with gr.Tab("Financial Report Text Analysis"):
|
241 |
gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
|
|
|
318 |
with gr.Row():
|
319 |
with gr.Column():
|
320 |
sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
|
321 |
+
country_1_dropdown = gr.Dropdown(label="Select Country from Excel File 1")
|
322 |
+
country_1_dropdown.change(fn =change_choices, inputs= stored_df1, outputs= paragraph_1_dropdown)
|
323 |
+
summarize_btn1_country = gr.Button("Summary for the selected country")
|
324 |
+
text_result_df1 = gr.Textbox(label="Sentence for excel file 1", lines=2)
|
325 |
+
summarize_btn1_country.click(fn= generate_text, inputs = [stored_df1, country_1_dropdown, sheet], outputs = text_result_df1)
|
326 |
with gr.Column():
|
327 |
sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
|
328 |
|