Spaces:

CCCCC5
/

sentiment_analysis

Sleeping

App Files Files Community

CCCCC5 commited on Oct 17, 2023

Commit

2fb89f5

1 Parent(s): 18c1ca9

Upload 3 files

Browse files

Files changed (3) hide show

LoughranMcDonald_MasterDictionary_2020.csv +0 -0
app.py +104 -0
requirements.txt +6 -0

LoughranMcDonald_MasterDictionary_2020.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import gradio as gr
+from transformers import pipeline
+import torch
+import pandas as pd
+from openprompt.plms import load_plm
+from openprompt import PromptDataLoader
+from openprompt.prompts import ManualVerbalizer
+from openprompt.prompts import ManualTemplate
+from openprompt.data_utils import InputExample
+from openprompt import PromptForClassification
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+def readLMwords():
+    alldata = pd.read_csv("LoughranMcDonald_MasterDictionary_2020.csv")
+    positive = list(alldata[alldata["Positive"]!=0]["Word"].str.lower())
+    negative = list(alldata[alldata["Negative"]!=0]["Word"].str.lower())
+    uncertainty = list(alldata[alldata["Uncertainty"]!=0]["Word"].str.lower())
+    return positive,negative,uncertainty
+def sentiment_analysis(sentence, model_name):
+    model_name = "HYCCC/"+model_name
+    raw_sentences = sentence.strip().split('\n')
+    template = '{"placeholder":"text_a"} Shares are {"mask"}.'
+    classes = ['positive', 'neutral', 'negative']
+    positive,negative,neutral = readLMwords()
+    label_words = {
+        "positive": positive,
+        "neutral": neutral,
+        "negative": negative,
+    }
+    type_dic = {
+        "HYCCC/RoBERTa_Chinese_AnnualReport_tuned":"roberta",
+        "HYCCC/RoBERTa_Chinese_FinancialNews_tuned":"roberta",
+        "HYCCC/RoBERTa_English_AnnualReport_tuned":"roberta",
+        "HYCCC/RoBERTa_English_FinancialNews_tuned":"roberta",
+    }
+    if 'Chinese' in model_name:
+        tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
+        model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
+        translated_tokens = model.generate(
+             **tokenizer(raw_sentences, return_tensors="pt", padding=True)
+        )
+        sentences_translated = []
+        for t in translated_tokens:
+            sentences_translated.append(tokenizer.decode(t, skip_special_tokens=True))
+        sentences = sentences_translated
+    else:
+        sentences = raw_sentences
+    testdata = []
+    for i,sentence in enumerate(sentences):
+        testdata.append(InputExample(guid=i,text_a=sentence,label=0))
+    plm, tokenizer, model_config, WrapperClass = load_plm(type_dic[model_name], model_name)
+    promptTemplate = ManualTemplate(
+        text = template,
+        tokenizer = tokenizer,
+    )
+    promptVerbalizer = ManualVerbalizer(
+        classes = classes,
+        label_words = label_words,
+        tokenizer = tokenizer,
+    )
+    test_dataloader = PromptDataLoader(
+        dataset = testdata,
+        tokenizer = tokenizer,
+        template = promptTemplate,
+        tokenizer_wrapper_class = WrapperClass,
+        batch_size = 4,
+        max_seq_length = 512,
+    )
+    prompt_model = PromptForClassification(
+        plm=plm,
+        template=promptTemplate,
+        verbalizer=promptVerbalizer,
+        freeze_plm=True
+    )
+    result = []
+    for step, inputs in enumerate(test_dataloader):
+        logits = prompt_model(inputs)
+        result.extend(torch.argmax(logits, dim=-1))
+    output = '\n'.join([f"{classes[res]}, {raw_sentences[i]}" for i,res in enumerate(result)])
+    return str(output)
+demo = gr.Interface(fn=sentiment_analysis,
+                    inputs = [gr.TextArea(placeholder="Enter sentence here. If you have multiple sentences, separate them with '\\n'.",
+                                          label="Sentence",lines=5,
+                                          max_lines = 10),
+                            gr.Radio(choices=["RoBERTa_Chinese_AnnualReport_tuned",
+                                              "RoBERTa_Chinese_FinancialNews_tuned",
+                                              "RoBERTa_English_AnnualReport_tuned",
+                                              "RoBERTa_English_FinancialNews_tuned"],
+                                     label="Model Selection")],
+                    outputs=gr.TextArea(label="Sentiment",lines=5, show_copy_button=True, max_lines = 10),
+                    title = "Prompt Learning-Based Disclosure Sentiment Detection"
+        )
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+transformers
+torch
+scikit-learn
+openprompt
+sacremoses