kobbyeduah commited on
Commit
5436943
1 Parent(s): e979aa9

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +24 -0
  2. app.py +108 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory within the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements.txt file into the container
8
+ COPY ./requirements.txt /app/requirements.txt
9
+
10
+ # Install the Python dependencies
11
+ RUN pip install -r /app/requirements.txt
12
+
13
+ # Copy the Gradio application code into the container
14
+ COPY ./app.py /app/app.py
15
+
16
+ # Download NLTK resources
17
+ RUN python -m nltk.downloader stopwords
18
+ RUN python -m nltk.downloader wordnet
19
+
20
+ # Expose port 7860 to access the Gradio interface
21
+ EXPOSE 7860
22
+
23
+ # Command to run the Gradio app
24
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """app.py
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1SKjRNc67_9TZPKUGhtfiYMfcpZuMh6s0
8
+ """
9
+
10
+ # %pip install gradio transformers -q
11
+ # %pip install nltk
12
+
13
+ # Import the key libraries
14
+ import gradio as gr
15
+ import torch
16
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
17
+ from scipy.special import softmax
18
+ import nltk
19
+ import re
20
+ from nltk.corpus import stopwords
21
+ from nltk.stem import WordNetLemmatizer
22
+
23
+ # Download NLTK resources (if not already downloaded)
24
+ nltk.download('stopwords')
25
+ nltk.download('wordnet')
26
+
27
+ # Load the tokenizer and model from Hugging Face
28
+ model_path = "rasmodev/Covid-19_Sentiment_Analysis_RoBERTa_Model"
29
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
30
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
31
+
32
+ # Preprocess text (username and link placeholders, and text preprocessing)
33
+ def preprocess(text):
34
+ # Convert text to lowercase
35
+ text = text.lower()
36
+
37
+ # Remove special characters, numbers, and extra whitespaces
38
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
39
+
40
+ # Remove stopwords (common words that don't carry much meaning)
41
+ stop_words = set(stopwords.words('english'))
42
+ words = text.split() # Split text into words
43
+ words = [word for word in words if word not in stop_words]
44
+
45
+ # Lemmatize words to their base form
46
+ lemmatizer = WordNetLemmatizer()
47
+ words = [lemmatizer.lemmatize(word) for word in words]
48
+
49
+ # Rejoin the preprocessed words into a single string
50
+ processed_text = ' '.join(words)
51
+
52
+ # Process placeholders
53
+ new_text = []
54
+ for t in processed_text.split(" "):
55
+ t = '@user' if t.startswith('@') and len(t) > 1 else t
56
+ t = 'http' if t.startswith('http') else t
57
+ new_text.append(t)
58
+
59
+ return " ".join(new_text)
60
+
61
+ # Perform sentiment analysis
62
+ def sentiment_analysis(text):
63
+ text = preprocess(text)
64
+
65
+ # Tokenize input text
66
+ inputs = tokenizer(text, return_tensors='pt')
67
+
68
+ # Forward pass through the model
69
+ with torch.no_grad():
70
+ outputs = model(**inputs)
71
+
72
+ # Get predicted probabilities
73
+ scores_ = outputs.logits[0].detach().numpy()
74
+ scores_ = softmax(scores_)
75
+
76
+ # Define labels and corresponding colors
77
+ labels = ['Negative', 'Neutral', 'Positive']
78
+ colors = ['red', 'yellow', 'green']
79
+ font_colors = ['white', 'black', 'white']
80
+
81
+ # Find the label with the highest percentage
82
+ max_label = labels[scores_.argmax()]
83
+ max_percentage = scores_.max() * 100
84
+
85
+ # Create HTML for the label with the specified style
86
+ label_html = f'<div style="display: flex; justify-content: center;"><button style="text-align: center; font-size: 16px; padding: 10px; border-radius: 15px; background-color: {colors[labels.index(max_label)]}; color: {font_colors[labels.index(max_label)]};">{max_label}({max_percentage:.2f}%)</button></div>'
87
+
88
+ return label_html
89
+
90
+ # Create a Gradio interface
91
+ interface = gr.Interface(
92
+ fn=sentiment_analysis,
93
+ inputs=gr.Textbox(placeholder="Write your tweet here..."),
94
+ outputs=gr.HTML(),
95
+ title="COVID-19 Sentiment Analysis App",
96
+ description="This App Analyzes the sentiment of COVID-19 related tweets. Negative: Indicates a negative sentiment, Neutral: Indicates a neutral sentiment, Positive: Indicates a positive sentiment.",
97
+ theme="default",
98
+ layout="horizontal",
99
+ examples=[
100
+ ["This vaccine is terrible!"],
101
+ ["I don't have a strong opinion about this vaccines."],
102
+ ["The Vaccine is Good I have had no issues!"]
103
+ ]
104
+ )
105
+
106
+ # Launch the Gradio app
107
+ interface.launch()
108
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==3.44.2
2
+ torch==2.0.1
3
+ transformers==4.33.1
4
+ nltk==3.8.1
5
+ scipy==1.11.2