File size: 2,283 Bytes
397ed79
d66b74a
2b96d10
d66b74a
 
8a02fed
2b96d10
 
 
 
 
 
 
 
 
397ed79
50b8f28
d66b74a
2b96d10
397ed79
2b96d10
397ed79
50b8f28
defc0a9
2b96d10
 
 
 
397ed79
2b96d10
397ed79
 
2b96d10
 
 
 
397ed79
 
2b96d10
397ed79
 
b3fe5ba
 
397ed79
b3fe5ba
397ed79
d66b74a
397ed79
 
b3fe5ba
d66b74a
397ed79
 
2b96d10
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import gradio as gr
import numpy as np
from transformers import AutoTokenizer, AutoModel
from scipy.special import softmax

import gradio as gr
import numpy as np
import pandas as pd
import pickle
import transformers
from transformers import AutoTokenizer, AutoConfig,AutoModelForSequenceClassification,TFAutoModelForSequenceClassification, pipeline
from scipy.special import softmax
from dotenv import load_dotenv, dotenv_values
from huggingface_hub import login
load_dotenv()
login(os.getenv("access_token"))


# Requirements
# huggingface_token = ""  # Replace with your actual token
model_path = "imalexianne/distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_path)

# tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
# tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased", revision="main")
config = AutoConfig.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Preprocessessing function
def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = "@user" if t.startswith("@") and len(t) > 1 else t
        t = "http" if t.startswith("http") else t
        new_text.append(t)
    return " ".join(new_text)

# ---- Function to process the input and return prediction
def sentiment_analysis(text):
    text = preprocess(text)

    encoded_input = tokenizer(text, return_tensors = "pt") # for PyTorch-based models
    output = model(**encoded_input)
    scores_ = output[0][0].detach().numpy()
    scores_ = softmax(scores_)
    
    # Format output dict of scores
    labels = ["Negative", "Neutral", "Positive"]
    scores = {l:float(s) for (l,s) in zip(labels, scores_) }
    
    return scores


# ---- Gradio app interface
app = gr.Interface(fn = sentiment_analysis,
                   inputs = gr.Textbox("Write your text here..."),
                   outputs = "label",
                   title = "Sentiment Analysis of Tweets on COVID-19 Vaccines",
                   description  = "Sentiment Analysis of text based on tweets about COVID-19 Vaccines using a fine-tuned 'distilbert-base-uncased' model",
                  
                   examples = [["Covid vaccination has no positive impact"]]
                   )

app.launch()