karpurna2 commited on
Commit
127e34a
1 Parent(s): d31bcb3

initial upload

Browse files
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datetime import datetime
3
+ import unicodedata
4
+ from utils.utils import get_label
5
+
6
+
7
+
8
+ def sanitize_feedback(feedback):
9
+ """
10
+ Convert emojis or other non-text characters in feedback to a text representation.
11
+ This ensures only clean text is saved in the feedback file.
12
+ """
13
+ sanitized_feedback = ''.join(
14
+ c if unicodedata.category(c).startswith(('L', 'N', 'P', 'Z')) else '' for c in feedback
15
+ )
16
+ return sanitized_feedback
17
+
18
+
19
+ # Function to capture feedback and save it to a timestamped file
20
+ def capture_feedback(feedback):
21
+ # Sanitize feedback to ensure only text is saved
22
+ sanitized_feedback = sanitize_feedback(feedback)
23
+ # Get the current timestamp
24
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
25
+ # Create a filename with the timestamp
26
+ filename = f"{timestamp}.txt"
27
+ # Write feedback to the file
28
+ with open(filename, "w", encoding="utf-8") as file:
29
+ file.write(sanitized_feedback)
30
+ return "Thank you for your feedback!" # Return message for the popup
31
+
32
+
33
+ # Main Gradio interface
34
+ with gr.Blocks() as demo:
35
+ # State to manage interface visibility
36
+ feedback_submitted = gr.State(False)
37
+
38
+ # Title and description of the demo
39
+ gr.Markdown("<h1 style='text-align: center;'>Emotions Classification Demo</h1>")
40
+ gr.Markdown("""
41
+ <div style='text-align: center;'>
42
+ This is a demo page for our classification model. Our model provides a brief description of your image and predicts the most relevant emotions that the image invokes.
43
+ </div>
44
+ """)
45
+
46
+ # Organize the input and output sections in a row
47
+ with gr.Row():
48
+ # Left side: input section with description
49
+ with gr.Column():
50
+ gr.Markdown("<b>Upload your image here to get the Emotion predictions.</b>")
51
+ image_input = gr.Image(type="pil", label="Input Image")
52
+
53
+ # Submit button for image upload
54
+ submit_button = gr.Button("Submit")
55
+
56
+ # Right side: output section with description
57
+ with gr.Column():
58
+ gr.Markdown("<b>Predicted outputs</b>")
59
+ # Textbox for emotion label
60
+ output_text = gr.Textbox(label="Image Description")
61
+ # Plot for the bar chart
62
+ output_plot = gr.Plot(label="Emotion Probabilities")
63
+
64
+ # Feedback section directly below the graph
65
+ gr.Markdown("<b>How do you feel about our emotion results?</b>")
66
+ feedback_choices = [
67
+ "😄 Very Satisfied",
68
+ "😊 Satisfied",
69
+ "😐 Neutral",
70
+ "🙁 Dissatisfied",
71
+ "😡 Very Dissatisfied"
72
+ ]
73
+ feedback = gr.Radio(choices=feedback_choices, label="Your Feedback")
74
+
75
+ # Button to submit feedback
76
+ feedback_button = gr.Button("Submit Feedback")
77
+
78
+
79
+ # Feedback submission action
80
+ def submit_feedback(feedback):
81
+ capture_feedback(feedback) # Save feedback to file
82
+ feedback_submitted.set(True) # Set state to indicate feedback was submitted
83
+ return "Thank you for your feedback!" # Return thank-you message
84
+
85
+
86
+ feedback_button.click(fn=submit_feedback, inputs=feedback, outputs=None)
87
+
88
+ # Thank you message section, initially hidden
89
+ thank_you_message = gr.Markdown("", visible=False)
90
+
91
+ # Display thank you message when feedback is submitted
92
+ feedback_submitted.change(
93
+ fn=lambda: ("Thank you for your feedback!", False), # Show thank you message
94
+ inputs=feedback_submitted,
95
+ outputs=[thank_you_message, feedback_submitted]
96
+ )
97
+
98
+ # Main interface to show
99
+ with gr.Row(visible=True):
100
+ gr.Markdown("Thank you for your feedback!", visible=feedback_submitted)
101
+
102
+ # Function to process the input and output
103
+ submit_button.click(fn=get_label, inputs=image_input, outputs=[output_text, output_plot])
104
+
105
+ # Launch the interface
106
+ if __name__ == "__main__":
107
+ demo.launch(share=True)
models/Custom.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53fe8d9b76b222441f1e3a70318b47264be5833240bf92b828c28fd695e2bf9e
3
+ size 934034
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cu118
2
+
3
+ torch==2.1.2+cu118
4
+ torchvision==0.16.2+cu118
5
+ Pillow==10.1.0
6
+ transformers==4.40.0
7
+ sentencepiece==0.1.99
8
+ requests
9
+ pandas
10
+ numpy
11
+ scikit-learn
12
+ opencv-python
13
+ opencv-contrib-python
14
+ openai-clip
15
+ gradio==5.8.0
utils/Caption.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModel, AutoTokenizer
3
+
4
+
5
+ def get_caption(image):
6
+ print(image)
7
+ model = AutoModel.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True, torch_dtype=torch.float16)
8
+ model = model.to(device='cuda')
9
+
10
+ tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True)
11
+ model.eval()
12
+ question = "Describe the image."
13
+ msgs = [{'role': 'user', 'content': question}]
14
+
15
+ res = model.chat(
16
+ image=image,
17
+ msgs=msgs,
18
+ tokenizer=tokenizer,
19
+ sampling=True,
20
+ temperature=0.7,
21
+ stream=True
22
+ )
23
+ generated_text = ""
24
+ for new_text in res:
25
+ generated_text += new_text
26
+
27
+ model.cpu()
28
+ del model
29
+ torch.cuda.empty_cache()
30
+ return generated_text
utils/CustomDataset.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.utils.data import Dataset
3
+
4
+
5
+ class CustomDataset(Dataset):
6
+ def __init__(self, image, texts, labels, tokenizer, max_len, transforms=None):
7
+ self.image = image
8
+ self.texts = texts
9
+ self.labels = labels
10
+ self.tokenizer = tokenizer
11
+ self.max_len = max_len
12
+ self.transforms = transforms
13
+
14
+ def __len__(self):
15
+ return len(self.texts)
16
+
17
+ def __getitem__(self, idx):
18
+ image = self.image
19
+ text = str(self.texts[idx])
20
+ label = self.labels[idx]
21
+ if self.transforms:
22
+ image = self.transforms(image)
23
+
24
+ inputs = self.tokenizer.encode_plus(
25
+ text,
26
+ None,
27
+ add_special_tokens=True,
28
+ max_length=self.max_len,
29
+ padding='max_length',
30
+ truncation=True
31
+ )
32
+
33
+ input_ids = inputs['input_ids']
34
+ attention_mask = inputs['attention_mask']
35
+
36
+ return {
37
+ 'input_ids': torch.tensor(input_ids, dtype=torch.long),
38
+ 'attention_mask': torch.tensor(attention_mask, dtype=torch.long),
39
+ 'labels': torch.tensor(label, dtype=torch.float),
40
+ 'images': image
41
+ }
utils/Emotions.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import clip
4
+ import torchvision
5
+ from utils.Roberta import RoBERTaClassifier
6
+ from utils.ImageOnly import Decoder4
7
+ from utils.CustomDataset import CustomDataset
8
+ from utils.test import test
9
+ from transformers import RobertaTokenizer
10
+
11
+
12
+ def get_emotions(image, text):
13
+ tags = ['Excitement', 'Sadness', 'Amusement', 'Disgust', 'Awe', 'Contentment', 'Fear', 'Anger']
14
+ max_len = 128
15
+ input_dim = 768
16
+ output_dim = 8
17
+ print(image)
18
+
19
+ test_transform = torchvision.transforms.Compose([
20
+ torchvision.transforms.Resize((224, 224)),
21
+ torchvision.transforms.ToTensor(),
22
+ torchvision.transforms.Normalize(
23
+ (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
24
+ )
25
+
26
+ test_emo = np.zeros((1, 8))
27
+ text = [text]
28
+
29
+ tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
30
+ test_dataset = CustomDataset(image, text, test_emo, tokenizer, max_len, test_transform)
31
+ test_loader = torch.utils.data.DataLoader(test_dataset,
32
+ batch_size=1,
33
+ shuffle=False, num_workers=2)
34
+ device = "cuda" if torch.cuda.is_available() else "cpu"
35
+ model, preprocess = clip.load("ViT-L/14", device=device)
36
+
37
+ model2 = RoBERTaClassifier(num_labels=output_dim)
38
+ decoder = Decoder4(input_dim, output_dim).to(device)
39
+ model2.load_state_dict(torch.load('models/Roberta.pth', map_location=device))
40
+ decoder.load_state_dict(torch.load('models/Custom.pth', map_location=device))
41
+ decoder = decoder.to(device)
42
+
43
+ y_pred = test(model, model2, decoder, device, test_loader)
44
+ del model, model2, decoder, test_loader
45
+ torch.cuda.empty_cache()
46
+ pred = y_pred.flatten()
47
+
48
+ return pred
utils/ImageOnly.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+
3
+
4
+ class Decoder4(nn.Module):
5
+ def __init__(self, input_dim, output_dim):
6
+ super(Decoder4, self).__init__()
7
+ self.fc1 = nn.Linear(input_dim, 256)
8
+ self.batch_norm1 = nn.BatchNorm1d(256)
9
+ self.relu1 = nn.ReLU()
10
+ self.dropout1 = nn.Dropout(0.5)
11
+ self.fc2 = nn.Linear(256, 128)
12
+ self.batch_norm2 = nn.BatchNorm1d(128)
13
+ self.relu2 = nn.ReLU()
14
+ self.dropout2 = nn.Dropout(0.5)
15
+ self.fc3 = nn.Linear(128, output_dim)
16
+ self.sigmoid = nn.Sigmoid()
17
+
18
+ def forward(self, x):
19
+ x = self.fc1(x)
20
+ x = self.batch_norm1(x)
21
+ x = self.relu1(x)
22
+ x = self.dropout1(x)
23
+ x = self.fc2(x)
24
+ x = self.batch_norm2(x)
25
+ x = self.relu2(x)
26
+ x = self.dropout2(x)
27
+ x = self.fc3(x)
28
+ x = self.sigmoid(x)
29
+ return x
utils/Roberta.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ from transformers import RobertaModel
3
+
4
+
5
+ class RoBERTaClassifier(nn.Module):
6
+ def __init__(self, num_labels):
7
+ super(RoBERTaClassifier, self).__init__()
8
+ self.roberta = RobertaModel.from_pretrained('roberta-base')
9
+ self.dropout = nn.Dropout(0.2)
10
+ self.linear = nn.Linear(self.roberta.config.hidden_size, num_labels)
11
+ self.sigmoid = nn.Sigmoid()
12
+
13
+ def forward(self, input_ids, attention_mask):
14
+ outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
15
+ pooled_output = outputs.pooler_output
16
+ pooled_output = self.dropout(pooled_output)
17
+ logits = self.linear(pooled_output)
18
+ return self.sigmoid(logits)
utils/test.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+
5
+ def test(model, model2, decoder, device, test_loader):
6
+ model = model.to(device)
7
+ decoder = decoder.to(device)
8
+ decoder.eval()
9
+ model2 = model2.to(device)
10
+ model2.eval()
11
+
12
+ y_pred_val = []
13
+
14
+ with torch.no_grad():
15
+ for batch in test_loader:
16
+ input_ids = batch['input_ids'].to(device)
17
+ attention_mask = batch['attention_mask'].to(device)
18
+ labels = batch['labels'].to(device)
19
+ images = batch['images'].to(device)
20
+ outputs1 = model2(input_ids, attention_mask)
21
+ with torch.no_grad():
22
+ image_features = model.encode_image(images)
23
+ image_features = image_features.to(torch.float32)
24
+ outputs2 = decoder(image_features)
25
+
26
+ outputs = (3 * outputs1 + 1 * outputs2) / 4
27
+
28
+ preds = outputs
29
+ y_pred_val.extend(preds.cpu().numpy())
30
+
31
+ y_pred = np.array(y_pred_val)
32
+ y_pred = np.reshape(y_pred, (-1, 8))
33
+
34
+ model.cpu()
35
+ model2.cpu()
36
+ decoder.cpu()
37
+ return y_pred
utils/utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+ from utils.Caption import get_caption
4
+ from utils.Emotions import get_emotions
5
+
6
+
7
+ def get_label(image):
8
+ caption = get_caption(image)
9
+
10
+ pred = get_emotions(image, caption)
11
+
12
+ emotions = ['Excitement', 'Sadness', 'Amusement', 'Disgust', 'Awe', 'Contentment', 'Fear', 'Anger']
13
+ probabilities = pred
14
+ print(pred)
15
+ max_idx = np.argmax(probabilities)
16
+
17
+ # Create color list where all bars are one color, and the max bar is another color
18
+ bar_colors = ['skyblue' if i != max_idx else 'orange' for i in range(len(emotions))]
19
+
20
+ # Create bar chart
21
+ fig, ax = plt.subplots()
22
+ ax.bar(emotions, probabilities, color=bar_colors, width=0.5)
23
+ ax.set_ylabel('Probability')
24
+ ax.set_title('Emotion Prediction Probabilities')
25
+
26
+ plt.xticks(rotation=60, ha='right')
27
+ plt.tight_layout()
28
+
29
+ return caption, fig