financial_sentiment_analysis

Running on CPU Upgrade

App Files Files Community

mobrown commited on May 30

Commit

dcb533f

•

1 Parent(s): 7702cd1

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -111

app.py CHANGED Viewed

@@ -1,114 +1,98 @@
-import numpy as np
-from sklearn.decomposition import PCA
-import gensim.downloader as api
 import gradio as gr
-import plotly.graph_objects as go
-# Load the Word2Vec model
-model = api.load("word2vec-google-news-300")
-def gensim_analogy(model, word1, word2, word3):
-    try:
-        result = model.most_similar(positive=[word2, word3], negative=[word1], topn=1)
-        return result[0][0]  # Return the word
-    except KeyError as e:
-        return str(e)
-def plot_words_plotly(model, words):
-    vectors = np.array([model[word] for word in words if word in model.key_to_index])
-    # Reduce dimensions to 2D for plotting
-    pca = PCA(n_components=2)
-    vectors_2d = pca.fit_transform(vectors)
-    # Create a scatter plot
-    fig = go.Figure()
-    # Add scatter points for each word vector
-    for word, vec in zip(words, vectors_2d):
-        fig.add_trace(go.Scatter(x=[vec[0]], y=[vec[1]],
-                                 text=[word], mode='markers+text',
-                                 textposition="bottom center",
-                                 name=word))
-    fig.update_layout(title="Visualization of Word Vectors",
-                      xaxis_title="PCA 1",
-                      yaxis_title="PCA 2",
-                      showlegend=True,
-                      width=600,  # Adjust width as needed
-                      height=400)  # Adjust height as needed
-    return fig
-def gradio_interface(choice, custom_input):
-    if choice == "Custom":
-        if not custom_input or len(custom_input.split(", ")) != 3:
-            return "Invalid input. Please enter exactly three words, separated by commas.", None, {
-                "error": "Invalid input"}
-        words = custom_input.split(", ")
-    else:
-        if not choice:
-            return "Invalid input. Please select or enter words.", None, {
-                "error": "Invalid input"}
-        words = choice.split(", ")
-    word1, word2, word3 = words
-    word4 = gensim_analogy(model, word1, word2, word3)
-    plot_fig = plot_words_plotly(model, [word1, word2, word3, word4])
-    if word4 in model.key_to_index:
-        vector = model[word4]
-        vector_display = f"{word4}: {np.round(vector, 2).tolist()}"
-    else:
-        vector_display = "Vector not available for the resulting word"
-    return word4, plot_fig, vector_display
-choices = [
-    "man, king, woman",
-    "Paris, France, London",
-    "strong, stronger, weak",
-    "pork, pig, beef",
-    "Custom"
 ]
-def clear_inputs():
-    return "", "", "", "", None
-# Define the layout using Rows and Columns
-with gr.Blocks() as iface:
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("# Word Analogy and Vector Visualization")
-            gr.Markdown(
-                "Select a predefined triplet of words or choose 'Custom' and enter your own (comma-separated) to find a fourth word by analogy, and see their vectors plotted with Plotly.")
-            radio = gr.Radio(choices=choices, label="Choose predefined words or enter custom words")
-            custom_words = gr.Textbox(
-                label="Custom words (comma-separated, required for custom choice; use only if 'Custom' is selected)",
-                placeholder="Enter 3 words separated by commas")
-            with gr.Row():
-                clear_btn = gr.Button("Clear")
-                submit_btn = gr.Button("Submit")
-            output_word = gr.Textbox(label="Output Word")
-        word_plot = gr.Plot(label="Word Vectors Visualization")
-    with gr.Row():
-        word_vectorization = gr.Textbox(label="Vectorization of the Output Word", lines=4, max_lines=4)
-    clear_btn.click(fn=clear_inputs, inputs=None,
-                    outputs=[radio, custom_words, output_word, word_vectorization, word_plot])
-    submit_btn.click(fn=gradio_interface, inputs=[radio, custom_words],
-                     outputs=[output_word, word_plot, word_vectorization])
-iface.launch(share=True)

 import gradio as gr
+from transformers import pipeline
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.svm import SVC
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.pipeline import make_pipeline
+from sklearn.model_selection import train_test_split
+from sklearn import metrics
+import pandas as pd
+# Load the provided dataset
+file_path = 'data.csv'
+df = pd.read_csv(file_path)
+# Split data into training and test sets
+X_train, X_test, y_train, y_test = train_test_split(df['Sentence'], df['Sentiment'], test_size=0.2, random_state=42)
+# Define models
+nb_model = make_pipeline(TfidfVectorizer(), MultinomialNB())
+svm_model = make_pipeline(TfidfVectorizer(), SVC(probability=True))
+rf_model = make_pipeline(TfidfVectorizer(), RandomForestClassifier())
+# Train models
+nb_model.fit(X_train, y_train)
+svm_model.fit(X_train, y_train)
+rf_model.fit(X_train, y_train)
+# Define sentences to choose from
+sentences = [
+    "The announced restructuring will significantly decrease the company's indebtedness.",
+    "UPM-Kymmene upgraded to `in-line' from `underperform' by Goldman Sachs.",
+    "$AAPL shares are breaking out of the recent resistance level.",
+    "Profitability (in EBIT %) was 13.6%, compared to 14.3% in Q2 2009.",
+    "The Finnish bank has issued a profit warning.",
+    "TeliaSonera's underlying results however included 457 mln SKr in positive one-offs, hence the adjusted underlying EBITDA actually amounts to 7.309 bln SKr, clearly below expectations, analysts said."
 ]
+# Function to map BERT labels
+def map_bert_label(label):
+    if label in ["1 star", "2 stars"]:
+        return "negative"
+    elif label == "3 stars":
+        return "neutral"
+    elif label in ["4 stars", "5 stars"]:
+        return "positive"
+# Function to map RoBERTa labels
+def map_roberta_label(label):
+    label_mapping = {"LABEL_0": "negative", "LABEL_1": "neutral", "LABEL_2": "positive"}
+    return label_mapping[label]
+# Function to analyze sentiment
+def analyze_sentiment(sentence):
+    # Define model paths
+    model_paths = {
+        "FinBert": "ProsusAI/finbert",
+        "BERT": "nlptown/bert-base-multilingual-uncased-sentiment",
+        "RoBERTa": "cardiffnlp/twitter-roberta-base-sentiment"
+    }
+    # Analyze sentiment using transformers models
+    results = {}
+    for model_name, model_path in model_paths.items():
+        sentiment_analyzer = pipeline("sentiment-analysis", model=model_path)
+        result = sentiment_analyzer(sentence[:512])[0]  # Analyze first 512 characters for brevity
+        if model_name == "BERT":
+            result['label'] = map_bert_label(result['label'])
+        elif model_name == "RoBERTa":
+            result['label'] = map_roberta_label(result['label'])
+        results[model_name] = result
+    # Analyze sentiment using sklearn models
+    results["Naive Bayes"] = {"label": nb_model.predict([sentence])[0],
+                              "score": nb_model.predict_proba([sentence]).max()}
+    results["SVM"] = {"label": svm_model.predict([sentence])[0], "score": svm_model.predict_proba([sentence]).max()}
+    results["Random Forest"] = {"label": rf_model.predict([sentence])[0],
+                                "score": rf_model.predict_proba([sentence]).max()}
+    return sentence, results
+# Create Gradio interface
+dropdown = gr.Dropdown(choices=sentences, label="Select Sentence")
+text_output = gr.Textbox(label="Selected Sentence", lines=2)
+sentiment_output = gr.JSON(label="Sentiment Scores")
+gr.Interface(
+    fn=analyze_sentiment,
+    inputs=[dropdown],
+    outputs=[text_output, sentiment_output],
+    title="Compare Sentiment Analysis Across Models",
+    description="Select a sentence to see sentiment analysis results from multiple models."
+).launch(share=True)