Spaces:

Cachoups
/

FinanceReport

Running

App Files Files Community

Cachoups commited on Sep 18, 2024

Commit

9995b35

verified ·

1 Parent(s): cbcd9d2

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -10

app.py CHANGED Viewed

@@ -25,6 +25,7 @@ def make_spans(text, results):
 summarizer = pipeline("summarization", model="human-centered-summarization/financial-summarization-pegasus")
 fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
 fin_model_bis = pipeline("sentiment-analysis", model='ProsusAI/finbert', tokenizer='ProsusAI/finbert')
 def summarize_text(text):
     resp = summarizer(text)
@@ -81,6 +82,7 @@ def get_sheet_names(file):
     xls = pd.ExcelFile(os.path.join(PDF_FOLDER, file))
     return gr.update(choices=xls.sheet_names)
 def process_and_compare(file1, sheet1, file2, sheet2):
     def process_file(file_path, sheet_name):
         # Extract year from file name
@@ -106,20 +108,15 @@ def process_and_compare(file1, sheet1, file2, sheet2):
             df.columns = new_columns
         else:
             raise ValueError(f"Expected {len(new_columns)} columns, but found {len(df.columns)} columns in the data.")
-        return df
     # Process both files
-    df1 = process_file(file1, sheet1)
-    df2 = process_file(file2, sheet2)
     year1 = int(re.search(r'(\d{4})', file1).group(1))
     year2 = int(re.search(r'(\d{4})', file2).group(1))
-    # Calculate the differences
-    # historical_col1 = f'Historical {int(year1) - 1}'
-    # historical_col2 = f'Historical {int(year2) - 1}'
-    # df1['Historical vs Adverse'] = df1[historical_col1] - df1['Adverse Cumulative']
-    # df2['Historical vs Adverse'] = df2[historical_col2] - df2['Adverse Cumulative']
     # Merge dataframes on 'Country'
     merged_df = pd.merge(df2, df1, on='Country', suffixes=(f'_{year1}', f'_{year2}'))
@@ -197,10 +194,48 @@ def process_pdfs_and_analyze_sentiment(file1, file2, sheet):
     result_pdf2 = fin_ext_bis(text_pdf2)
     return result_pdf1, result_pdf2
 stored_paragraphs_1 = []
 stored_paragraphs_2 = []
 with gr.Blocks() as demo:
     with gr.Tab("Financial Report Text Analysis"):
         gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
@@ -283,6 +318,11 @@ with gr.Blocks() as demo:
         with gr.Row():
             with gr.Column():
                 sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
             with gr.Column():
                 sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")

 summarizer = pipeline("summarization", model="human-centered-summarization/financial-summarization-pegasus")
 fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
 fin_model_bis = pipeline("sentiment-analysis", model='ProsusAI/finbert', tokenizer='ProsusAI/finbert')
+table_to_text = pipeline('text2text-generation', model='google/flan-t5-large')
 def summarize_text(text):
     resp = summarizer(text)
     xls = pd.ExcelFile(os.path.join(PDF_FOLDER, file))
     return gr.update(choices=xls.sheet_names)
 def process_and_compare(file1, sheet1, file2, sheet2):
     def process_file(file_path, sheet_name):
         # Extract year from file name
             df.columns = new_columns
         else:
             raise ValueError(f"Expected {len(new_columns)} columns, but found {len(df.columns)} columns in the data.")
+        columns = ['Country', f'Adverse {year}', f'Adverse {year+1}', f'Adverse {year+2}', 'Adverse Cumulative']
+        return df, df[columns]
     # Process both files
+    global stored_df1, stored_df2
+    df1, stored_df1 = process_file(file1, sheet1)
+    df2, stored_df2 = process_file(file2, sheet2)
     year1 = int(re.search(r'(\d{4})', file1).group(1))
     year2 = int(re.search(r'(\d{4})', file2).group(1))
     # Merge dataframes on 'Country'
     merged_df = pd.merge(df2, df1, on='Country', suffixes=(f'_{year1}', f'_{year2}'))
     result_pdf2 = fin_ext_bis(text_pdf2)
     return result_pdf1, result_pdf2
+def change_choices(df):
+    return gr.Dropdown.update(choices=df.Country.values.tolist())
+def generate_text(df, country, theme):
+    # Filter the dataframe based on the country
+    row = df[df['Country'] == country].iloc[0]
+    # Convert the row to a string format for prompt
+    row_str = row.to_string(index=True)
+    # Create the prompt
+    prompt = f"""
+    Here is an example:
+    A table from France country:
+    Country                 France
+    Adverse 2020         -0.427975
+    Adverse 2021         -1.987167
+    Adverse 2022         -1.195906
+    Adverse Cumulative   -3.573762
+    The theme is GDP
+    The output:
+    In adverse scenario, the growth for GDP in France is -0.427975% in 2018 and isn't getting better in 2019 with -1.98767% and -1.195906% in 2022.
+    Here is another table:
+    {row_str}
+    Summarize the adverse scenario growth for {theme} in {country} based on the data above, following a similar pattern to the example for France.
+    """
+    # Generate the descriptive text using the model
+    result = table_to_text(prompt, max_length=200)[0]['generated_text']
+    return result
+# Global variable
 stored_paragraphs_1 = []
 stored_paragraphs_2 = []
+stored_df1 = []
+stored_df2 = []
 with gr.Blocks() as demo:
     with gr.Tab("Financial Report Text Analysis"):
         gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
         with gr.Row():
             with gr.Column():
                 sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
+                country_1_dropdown = gr.Dropdown(label="Select Country from Excel File 1")
+                country_1_dropdown.change(fn =change_choices, inputs= stored_df1, outputs= paragraph_1_dropdown)
+                summarize_btn1_country = gr.Button("Summary for the selected country")
+                text_result_df1 = gr.Textbox(label="Sentence for excel file 1", lines=2)
+                summarize_btn1_country.click(fn= generate_text, inputs = [stored_df1, country_1_dropdown, sheet], outputs = text_result_df1)
             with gr.Column():
                 sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")