Spaces:

Cloudy1225
/

stackoverflow-sentiment-analysis

Sleeping

App Files Files Community

Cloudy1225 commited on Nov 10, 2024

Commit

23ddf94

verified ·

1 Parent(s): 5ac8a66

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -200

app.py CHANGED Viewed

@@ -1,200 +1,200 @@
-import csv
-import gradio as gr
-import pandas as pd
-from sentiment_analyser import RandomAnalyser, RoBERTaAnalyser, ChatGPTAnalyser
-import matplotlib.pyplot as plt
-from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
-def plot_bar(value_counts):
-    fig, ax = plt.subplots(figsize=(6, 6))
-    value_counts.plot.barh(ax=ax)
-    ax.bar_label(ax.containers[0])
-    plt.title('Frequency of Predictions')
-    return fig
-def plot_confusion_matrix(y_pred, y_true):
-    cm = confusion_matrix(y_true, y_pred, normalize='true')
-    fig, ax = plt.subplots(figsize=(6, 6))
-    labels = []
-    for label in SENTI_MAPPING.keys():
-        if (label in y_pred.values) or (label in y_true.values):
-            labels.append(label)
-    disp = ConfusionMatrixDisplay(confusion_matrix=cm,
-                                  display_labels=labels)
-    disp.plot(cmap="Blues", values_format=".2f", ax=ax, colorbar=False)
-    plt.title("Normalized Confusion Matrix")
-    return fig
-def classify(num: int):
-    samples_df = df.sample(num)
-    X = samples_df['Text'].tolist()
-    y = samples_df['Label']
-    roberta = MODEL_MAPPING[OUR_MODEL]
-    y_pred = pd.Series(roberta.predict(X), index=samples_df.index)
-    samples_df['Predict'] = y_pred
-    bar = plot_bar(y_pred.value_counts())
-    cm = plot_confusion_matrix(y_pred, y)
-    plt.close()
-    return samples_df, bar, cm
-def analysis(Text):
-    keys = []
-    values = []
-    for name, model in MODEL_MAPPING.items():
-        keys.append(name)
-        values.append(SENTI_MAPPING[model.predict([Text])[0]])
-    return pd.DataFrame([values], columns=keys)
-def analyse_file(file):
-    output_name = 'output.csv'
-    with open(output_name, mode='w', newline='') as output:
-        writer = csv.writer(output)
-        header = ['Text', 'Label']
-        writer.writerow(header)
-        model = MODEL_MAPPING[OUR_MODEL]
-        with open(file.name) as f:
-            for line in f:
-                text = line[:-1]
-                sentiment = model.predict([text])
-                writer.writerow([text, sentiment[0]])
-    return output_name
-MODEL_MAPPING = {
-    'Random': RandomAnalyser(),
-    'RoBERTa': RoBERTaAnalyser(),
-    'ChatGPT': RandomAnalyser(),
-}
-OUR_MODEL = 'RoBERTa'
-SENTI_MAPPING = {
-    'negative': '😭',
-    'neutral': '😶',
-    'positive': '🥰'
-}
-TITLE = "Sentiment Analysis on Software Engineer Texts"
-DESCRIPTION = {
-    'en': (
-        "This is the demo page for our model: "
-        "[Cloudy1225/stackoverflow-roberta-base-sentiment]"
-        "(https://huggingface.co/Cloudy1225/stackoverflow-roberta-base-sentiment)."
-    ),
-    'zh': (
-        "这里是第16组“睿王和他的五个小跟班”软工三迭代三模型演示页面。"
-        "模型链接：[Cloudy1225/stackoverflow-roberta-base-sentiment]"
-        "(https://huggingface.co/Cloudy1225/stackoverflow-roberta-base-sentiment)."
-    )
-}
-PROMPT1 = {
-    'en': (
-        "Enter text in the left text box and press Enter, and the sentiment analysis results will be output on the right. "
-        "Here, we present three types of results, which come from random, our model, and ChatGPT."
-    ),
-    'zh': (
-        "在左侧文本框中输入文本并按回车键，右侧将输出情感分析结果。"
-        "这里我们展示了三种结果，分别是随机结果、模型结果和 ChatGPT 结果。"
-    )
-}
-PROMPT2 = {
-    'en': (
-        "Upload a txt/csv file in the left file box, and the model will perform sentiment analysis on each line of the input text. "
-        "You can download the output file on the right. "
-        "The output file will be in CSV format with two columns: the original text, and the classification results."
-    ),
-    'zh': (
-        "在左侧文件框中上传 txt/csv 文件，模型会对输入文本的每一行当作一个文本进行情感分析。"
-        "可以在右侧下载输出文件，输出文件为两列 csv 格式，第一列为原始文本，第二列为分类结果。"
-    )
-}
-PROMPT3 = {
-    'en': (
-        "Here we evaluate our model on the StackOverflow4423 dataset. "
-        "Sliding the slider will sample a specified number of samples from the StackOverflow4423 dataset and predict their sentiment labels. "
-        "Based on the prediction results, a label distribution chart and a confusion matrix will be plotted."
-    ),
-    'zh': (
-        "这里是在 StackOverflow4423 数据集上评估我们的模型。"
-        "滑动 Slider，将会从 StackOverflow4423 数据集中抽样出指定数量的样本，预测其情感标签。"
-        "并根据预测结果绘制标签分布图和混淆矩阵。"
-    )
-}
-DEFAULT_LANG = 'en'
-MAX_SAMPLES = 64
-df = pd.read_csv('./SOF4423.csv')
-def set_language(lang):
-    return DESCRIPTION[lang], PROMPT1[lang], PROMPT2[lang], PROMPT3[lang]
-with gr.Blocks(title=TITLE) as demo:
-    with gr.Row():
-        with gr.Column():
-            gr.HTML(f"<H1>{TITLE}</H1>")
-        with gr.Column():
-            language_selector = gr.Radio(
-                ['en', 'zh'], label="Select Language", value=DEFAULT_LANG,
-                interactive=True, show_label=False, container=False
-            )
-    description = gr.Markdown(DESCRIPTION[DEFAULT_LANG])
-    gr.HTML("<H2>Model Inference</H2>")
-    prompt1 = gr.Markdown(PROMPT1[DEFAULT_LANG])
-    with gr.Row():
-        with gr.Column():
-            text_input = gr.Textbox(label='Input',
-                                    placeholder="Enter a positive or negative sentence here...")
-        with gr.Column():
-            senti_output = gr.Dataframe(type="pandas", value=[['😋', '😋', '😋']],
-                                        headers=list(MODEL_MAPPING.keys()), interactive=False)
-    text_input.submit(analysis, inputs=text_input, outputs=senti_output, show_progress='full')
-    prompt2 = gr.Markdown(PROMPT2[DEFAULT_LANG])
-    with gr.Row():
-        with gr.Column():
-            file_input = gr.File(label='File',
-                                 file_types=['.txt', '.csv'])
-        with gr.Column():
-            file_output = gr.File(label='Output')
-    file_input.upload(analyse_file, inputs=file_input, outputs=file_output)
-    gr.HTML("<H2>Model Evaluation</H2>")
-    prompt3 = gr.Markdown(PROMPT3[DEFAULT_LANG])
-    input_models = list(MODEL_MAPPING)
-    input_n_samples = gr.Slider(
-        minimum=4,
-        maximum=MAX_SAMPLES,
-        value=8,
-        step=4,
-        label='Number of samples'
-    )
-    with gr.Row():
-        with gr.Column():
-            bar_plot = gr.Plot(label='Predictions Frequency')
-        with gr.Column():
-            cm_plot = gr.Plot(label='Confusion Matrix')
-    with gr.Row():
-        dataframe = gr.Dataframe(type="pandas", wrap=True, headers=['Text', 'Label', 'Predict'])
-    input_n_samples.change(fn=classify, inputs=input_n_samples, outputs=[dataframe, bar_plot, cm_plot])
-    language_selector.change(fn=set_language, inputs=language_selector,
-                             outputs=[description, prompt1, prompt2, prompt3])
-    demo.launch()

+import csv
+import gradio as gr
+import pandas as pd
+from sentiment_analyser import RandomAnalyser, RoBERTaAnalyser, ChatGPTAnalyser
+import matplotlib.pyplot as plt
+from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
+def plot_bar(value_counts):
+    fig, ax = plt.subplots(figsize=(6, 6))
+    value_counts.plot.barh(ax=ax)
+    ax.bar_label(ax.containers[0])
+    plt.title('Frequency of Predictions')
+    return fig
+def plot_confusion_matrix(y_pred, y_true):
+    cm = confusion_matrix(y_true, y_pred, normalize='true')
+    fig, ax = plt.subplots(figsize=(6, 6))
+    labels = []
+    for label in SENTI_MAPPING.keys():
+        if (label in y_pred.values) or (label in y_true.values):
+            labels.append(label)
+    disp = ConfusionMatrixDisplay(confusion_matrix=cm,
+                                  display_labels=labels)
+    disp.plot(cmap="Blues", values_format=".2f", ax=ax, colorbar=False)
+    plt.title("Normalized Confusion Matrix")
+    return fig
+def classify(num: int):
+    samples_df = df.sample(num)
+    X = samples_df['Text'].tolist()
+    y = samples_df['Label']
+    roberta = MODEL_MAPPING[OUR_MODEL]
+    y_pred = pd.Series(roberta.predict(X), index=samples_df.index)
+    samples_df['Predict'] = y_pred
+    bar = plot_bar(y_pred.value_counts())
+    cm = plot_confusion_matrix(y_pred, y)
+    plt.close()
+    return samples_df, bar, cm
+def analysis(Text):
+    keys = []
+    values = []
+    for name, model in MODEL_MAPPING.items():
+        keys.append(name)
+        values.append(SENTI_MAPPING[model.predict([Text])[0]])
+    return pd.DataFrame([values], columns=keys)
+def analyse_file(file):
+    output_name = 'output.csv'
+    with open(output_name, mode='w', newline='') as output:
+        writer = csv.writer(output)
+        header = ['Text', 'Label']
+        writer.writerow(header)
+        model = MODEL_MAPPING[OUR_MODEL]
+        with open(file.name) as f:
+            for line in f:
+                text = line[:-1]
+                sentiment = model.predict([text])
+                writer.writerow([text, sentiment[0]])
+    return output_name
+MODEL_MAPPING = {
+    'Random': RandomAnalyser(),
+    'RoBERTa': RoBERTaAnalyser(),
+    'ChatGPT': RandomAnalyser(),
+}
+OUR_MODEL = 'RoBERTa'
+SENTI_MAPPING = {
+    'negative': '😭',
+    'neutral': '😶',
+    'positive': '🥰'
+}
+TITLE = "Sentiment Analysis on Software Engineer Texts"
+DESCRIPTION = {
+    'en': (
+        "This is the demo page for our model: "
+        "[Cloudy1225/stackoverflow-roberta-base-sentiment]"
+        "(https://huggingface.co/Cloudy1225/stackoverflow-roberta-base-sentiment)."
+    ),
+    'zh': (
+        "这里是第16组“睿王和他的五个小跟班”软工三��代三模型演示页面。"
+        "模型链接：[Cloudy1225/stackoverflow-roberta-base-sentiment]"
+        "(https://huggingface.co/Cloudy1225/stackoverflow-roberta-base-sentiment)."
+    )
+}
+PROMPT1 = {
+    'en': (
+        "Enter text in the left text box and press Enter, and the sentiment analysis results will be output on the right. "
+        "Here, we present three types of results, which come from random, our model, and ChatGPT."
+    ),
+    'zh': (
+        "在左侧文本框中输入文本并按回车键，右侧将输出情感分析结果。"
+        "这里我们展示了三种结果，分别是随机结果、模型结果和 ChatGPT 结果。"
+    )
+}
+PROMPT2 = {
+    'en': (
+        "Upload a txt/csv file in the left file box, and the model will perform sentiment analysis on each line of the input text. "
+        "You can download the output file on the right. "
+        "The output file will be in CSV format with two columns: the original text, and the classification results."
+    ),
+    'zh': (
+        "在左侧文件框中上传 txt/csv 文件，模型会对输入文本的每一行当作一个文本进行情感分析。"
+        "可以在右侧下载输出文件，输出文件为两列 csv 格式，第一列为原始文本，第二列为分类结果。"
+    )
+}
+PROMPT3 = {
+    'en': (
+        "Here we evaluate our model on the StackOverflow4423 dataset. "
+        "Sliding the slider will sample a specified number of samples from the StackOverflow4423 dataset and predict their sentiment labels. "
+        "Based on the prediction results, a label distribution chart and a confusion matrix will be plotted."
+    ),
+    'zh': (
+        "这里是在 StackOverflow4423 数据集上评估我们的模型。"
+        "滑动 Slider，将会从 StackOverflow4423 数据集中抽样出指定数量的样本，预测其情感标签。"
+        "并根据预测结果绘制标签分布图和混淆矩阵。"
+    )
+}
+DEFAULT_LANG = 'en'
+MAX_SAMPLES = 64
+df = pd.read_csv('./SOF4423.csv')
+def set_language(lang):
+    return DESCRIPTION[lang], PROMPT1[lang], PROMPT2[lang], PROMPT3[lang]
+with gr.Blocks(title=TITLE) as demo:
+    with gr.Row():
+        with gr.Column():
+            gr.HTML(f"<H1>{TITLE}</H1>")
+        with gr.Column(min_width=160):
+            language_selector = gr.Radio(
+                ['en', 'zh'], label="Select Language", value=DEFAULT_LANG,
+                interactive=True, show_label=False, container=False
+            )
+    description = gr.Markdown(DESCRIPTION[DEFAULT_LANG])
+    gr.HTML("<H2>Model Inference</H2>")
+    prompt1 = gr.Markdown(PROMPT1[DEFAULT_LANG])
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(label='Input',
+                                    placeholder="Enter a positive or negative sentence here...")
+        with gr.Column():
+            senti_output = gr.Dataframe(type="pandas", value=[['😋', '😋', '😋']],
+                                        headers=list(MODEL_MAPPING.keys()), interactive=False)
+    text_input.submit(analysis, inputs=text_input, outputs=senti_output, show_progress='full')
+    prompt2 = gr.Markdown(PROMPT2[DEFAULT_LANG])
+    with gr.Row():
+        with gr.Column():
+            file_input = gr.File(label='File',
+                                 file_types=['.txt', '.csv'])
+        with gr.Column():
+            file_output = gr.File(label='Output')
+    file_input.upload(analyse_file, inputs=file_input, outputs=file_output)
+    gr.HTML("<H2>Model Evaluation</H2>")
+    prompt3 = gr.Markdown(PROMPT3[DEFAULT_LANG])
+    input_models = list(MODEL_MAPPING)
+    input_n_samples = gr.Slider(
+        minimum=4,
+        maximum=MAX_SAMPLES,
+        value=8,
+        step=4,
+        label='Number of samples'
+    )
+    with gr.Row():
+        with gr.Column():
+            bar_plot = gr.Plot(label='Predictions Frequency')
+        with gr.Column():
+            cm_plot = gr.Plot(label='Confusion Matrix')
+    with gr.Row():
+        dataframe = gr.Dataframe(type="pandas", wrap=True, headers=['Text', 'Label', 'Predict'])
+    input_n_samples.change(fn=classify, inputs=input_n_samples, outputs=[dataframe, bar_plot, cm_plot])
+    language_selector.change(fn=set_language, inputs=language_selector,
+                             outputs=[description, prompt1, prompt2, prompt3])
+    demo.launch()