|
|
|
from transformers import BertTokenizer, BertForSequenceClassification |
|
from transformers import pipeline |
|
import gradio as gr |
|
from collections import Counter |
|
import re |
|
import spacy |
|
import pandas as pd |
|
|
|
|
|
nlp = spacy.load('en_core_web_sm') |
|
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3) |
|
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone') |
|
sentiment = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer) |
|
|
|
|
|
|
|
def return_sentiment(text): |
|
text = re.sub(r'Photo by.+', '', text) |
|
text = re.sub(r"\n", " ", text) |
|
text = re.sub(r"\n\n", " ", text) |
|
text = re.sub(r"\t", " ", text) |
|
text = text.strip(" ") |
|
text = re.sub( |
|
" +", " ", text |
|
).strip() |
|
|
|
results = sentiment(text[:512]) |
|
return (f"{results[0]['label']} ---> {results[0]['score']}") |
|
|
|
|
|
def show_org(text): |
|
text = re.sub(r'Photo by.+', '', text) |
|
text = re.sub(r"\n", " ", text) |
|
text = re.sub(r"\n\n", " ", text) |
|
text = re.sub(r"\t", " ", text) |
|
text = text.strip(" ") |
|
text = re.sub( |
|
" +", " ", text |
|
).strip() |
|
|
|
org = [] |
|
doc = nlp(text) |
|
if doc.ents: |
|
for ent in doc.ents: |
|
if ent.label_ == 'ORG': |
|
org.append(ent.text) |
|
None |
|
|
|
final = (Counter(org).most_common(1)[0][0]) |
|
|
|
return (f'Organization: {final}') |
|
|
|
def final_output(text): |
|
return return_sentiment(text), show_org(text) |
|
|
|
sentiment_analysis = gr.Interface( |
|
final_output, |
|
inputs = gr.inputs.Textbox(label="Input your news article here", optional=False), |
|
outputs=[gr.outputs.Textbox(label="Sentiment Analysis"), |
|
gr.outputs.Textbox(label="Named Organization")] |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
sentiment_analysis.launch(debug=True) |