|
import numpy as np |
|
import pandas as pd |
|
from transformers import AutoTokenizer, AutoConfig,AutoModelForSequenceClassification |
|
from scipy.special import softmax |
|
import os |
|
|
|
|
|
|
|
def check_csv(csv_file, data): |
|
if os.path.isfile(csv_file): |
|
data.to_csv(csv_file, mode='a', header=False, index=False, encoding='utf-8') |
|
else: |
|
history = data.copy() |
|
history.to_csv(csv_file, index=False) |
|
|
|
|
|
def preprocess(text): |
|
new_text = [] |
|
for t in text.split(" "): |
|
t = "@user" if t.startswith("@") and len(t) > 1 else t |
|
t = "http" if t.startswith("http") else t |
|
print(t) |
|
new_text.append(t) |
|
|
|
return " ".join(new_text) |
|
|
|
|
|
def run_sentiment_analysis(text, tokenizer, model): |
|
|
|
encoded_input = tokenizer(text, return_tensors = "pt") |
|
output = model(**encoded_input) |
|
scores_ = output[0][0].detach().numpy() |
|
scores_ = softmax(scores_) |
|
|
|
|
|
labels = ["Negative", "Neutral", "Positive"] |
|
scores = {l:float(s) for (l,s) in zip(labels, scores_) } |
|
|
|
|
|
|
|
|
|
|
|
return scores |
|
|
|
|
|
|
|
|
|
|