Spaces:

vieunite
/

DemoEmotions

Sleeping

App Files Files Community

karpurna2 commited on Dec 12, 2024

Commit

127e34a

1 Parent(s): d31bcb3

initial upload

Browse files

Files changed (10) hide show

app.py +107 -0
models/Custom.pth +3 -0
requirements.txt +15 -0
utils/Caption.py +30 -0
utils/CustomDataset.py +41 -0
utils/Emotions.py +48 -0
utils/ImageOnly.py +29 -0
utils/Roberta.py +18 -0
utils/test.py +37 -0
utils/utils.py +29 -0

app.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import gradio as gr
+from datetime import datetime
+import unicodedata
+from utils.utils import get_label
+def sanitize_feedback(feedback):
+    """
+    Convert emojis or other non-text characters in feedback to a text representation.
+    This ensures only clean text is saved in the feedback file.
+    """
+    sanitized_feedback = ''.join(
+        c if unicodedata.category(c).startswith(('L', 'N', 'P', 'Z')) else '' for c in feedback
+    )
+    return sanitized_feedback
+# Function to capture feedback and save it to a timestamped file
+def capture_feedback(feedback):
+    # Sanitize feedback to ensure only text is saved
+    sanitized_feedback = sanitize_feedback(feedback)
+    # Get the current timestamp
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    # Create a filename with the timestamp
+    filename = f"{timestamp}.txt"
+    # Write feedback to the file
+    with open(filename, "w", encoding="utf-8") as file:
+        file.write(sanitized_feedback)
+    return "Thank you for your feedback!"   # Return message for the popup
+# Main Gradio interface
+with gr.Blocks() as demo:
+    # State to manage interface visibility
+    feedback_submitted = gr.State(False)
+    # Title and description of the demo
+    gr.Markdown("<h1 style='text-align: center;'>Emotions Classification Demo</h1>")
+    gr.Markdown("""
+    <div style='text-align: center;'>
+        This is a demo page for our classification model. Our model provides a brief description of your image and predicts the most relevant emotions that the image invokes.
+    </div>
+    """)
+    # Organize the input and output sections in a row
+    with gr.Row():
+        # Left side: input section with description
+        with gr.Column():
+            gr.Markdown("<b>Upload your image here to get the Emotion predictions.</b>")
+            image_input = gr.Image(type="pil", label="Input Image")
+            # Submit button for image upload
+            submit_button = gr.Button("Submit")
+        # Right side: output section with description
+        with gr.Column():
+            gr.Markdown("<b>Predicted outputs</b>")
+            # Textbox for emotion label
+            output_text = gr.Textbox(label="Image Description")
+            # Plot for the bar chart
+            output_plot = gr.Plot(label="Emotion Probabilities")
+            # Feedback section directly below the graph
+            gr.Markdown("<b>How do you feel about our emotion results?</b>")
+            feedback_choices = [
+                "😄 Very Satisfied",
+                "😊 Satisfied",
+                "😐 Neutral",
+                "🙁 Dissatisfied",
+                "😡 Very Dissatisfied"
+            ]
+            feedback = gr.Radio(choices=feedback_choices, label="Your Feedback")
+            # Button to submit feedback
+            feedback_button = gr.Button("Submit Feedback")
+    # Feedback submission action
+    def submit_feedback(feedback):
+        capture_feedback(feedback)  # Save feedback to file
+        feedback_submitted.set(True)  # Set state to indicate feedback was submitted
+        return "Thank you for your feedback!"  # Return thank-you message
+    feedback_button.click(fn=submit_feedback, inputs=feedback, outputs=None)
+    # Thank you message section, initially hidden
+    thank_you_message = gr.Markdown("", visible=False)
+    # Display thank you message when feedback is submitted
+    feedback_submitted.change(
+        fn=lambda: ("Thank you for your feedback!", False),  # Show thank you message
+        inputs=feedback_submitted,
+        outputs=[thank_you_message, feedback_submitted]
+    )
+    # Main interface to show
+    with gr.Row(visible=True):
+        gr.Markdown("Thank you for your feedback!", visible=feedback_submitted)
+    # Function to process the input and output
+    submit_button.click(fn=get_label, inputs=image_input, outputs=[output_text, output_plot])
+# Launch the interface
+if __name__ == "__main__":
+    demo.launch(share=True)

models/Custom.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53fe8d9b76b222441f1e3a70318b47264be5833240bf92b828c28fd695e2bf9e
+size 934034

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+--extra-index-url https://download.pytorch.org/whl/cu118
+torch==2.1.2+cu118
+torchvision==0.16.2+cu118
+Pillow==10.1.0
+transformers==4.40.0
+sentencepiece==0.1.99
+requests
+pandas
+numpy
+scikit-learn
+opencv-python
+opencv-contrib-python
+openai-clip
+gradio==5.8.0

utils/Caption.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import torch
+from transformers import AutoModel, AutoTokenizer
+def get_caption(image):
+    print(image)
+    model = AutoModel.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True, torch_dtype=torch.float16)
+    model = model.to(device='cuda')
+    tokenizer = AutoTokenizer.from_pretrained('openbmb/MiniCPM-Llama3-V-2_5', trust_remote_code=True)
+    model.eval()
+    question = "Describe the image."
+    msgs = [{'role': 'user', 'content': question}]
+    res = model.chat(
+        image=image,
+        msgs=msgs,
+        tokenizer=tokenizer,
+        sampling=True,
+        temperature=0.7,
+        stream=True
+    )
+    generated_text = ""
+    for new_text in res:
+        generated_text += new_text
+    model.cpu()
+    del model
+    torch.cuda.empty_cache()
+    return generated_text

utils/CustomDataset.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torch
+from torch.utils.data import Dataset
+class CustomDataset(Dataset):
+    def __init__(self, image, texts, labels, tokenizer, max_len, transforms=None):
+        self.image = image
+        self.texts = texts
+        self.labels = labels
+        self.tokenizer = tokenizer
+        self.max_len = max_len
+        self.transforms = transforms
+    def __len__(self):
+        return len(self.texts)
+    def __getitem__(self, idx):
+        image = self.image
+        text = str(self.texts[idx])
+        label = self.labels[idx]
+        if self.transforms:
+            image = self.transforms(image)
+        inputs = self.tokenizer.encode_plus(
+            text,
+            None,
+            add_special_tokens=True,
+            max_length=self.max_len,
+            padding='max_length',
+            truncation=True
+        )
+        input_ids = inputs['input_ids']
+        attention_mask = inputs['attention_mask']
+        return {
+            'input_ids': torch.tensor(input_ids, dtype=torch.long),
+            'attention_mask': torch.tensor(attention_mask, dtype=torch.long),
+            'labels': torch.tensor(label, dtype=torch.float),
+            'images': image
+        }

utils/Emotions.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import numpy as np
+import torch
+import clip
+import torchvision
+from utils.Roberta import RoBERTaClassifier
+from utils.ImageOnly import Decoder4
+from utils.CustomDataset import CustomDataset
+from utils.test import test
+from transformers import RobertaTokenizer
+def get_emotions(image, text):
+    tags = ['Excitement', 'Sadness', 'Amusement', 'Disgust', 'Awe', 'Contentment', 'Fear', 'Anger']
+    max_len = 128
+    input_dim = 768
+    output_dim = 8
+    print(image)
+    test_transform = torchvision.transforms.Compose([
+        torchvision.transforms.Resize((224, 224)),
+        torchvision.transforms.ToTensor(),
+        torchvision.transforms.Normalize(
+            (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+    )
+    test_emo = np.zeros((1, 8))
+    text = [text]
+    tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
+    test_dataset = CustomDataset(image, text, test_emo, tokenizer, max_len, test_transform)
+    test_loader = torch.utils.data.DataLoader(test_dataset,
+                                              batch_size=1,
+                                              shuffle=False, num_workers=2)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model, preprocess = clip.load("ViT-L/14", device=device)
+    model2 = RoBERTaClassifier(num_labels=output_dim)
+    decoder = Decoder4(input_dim, output_dim).to(device)
+    model2.load_state_dict(torch.load('models/Roberta.pth', map_location=device))
+    decoder.load_state_dict(torch.load('models/Custom.pth', map_location=device))
+    decoder = decoder.to(device)
+    y_pred = test(model, model2, decoder, device, test_loader)
+    del model, model2, decoder, test_loader
+    torch.cuda.empty_cache()
+    pred = y_pred.flatten()
+    return pred

utils/ImageOnly.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import torch.nn as nn
+class Decoder4(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super(Decoder4, self).__init__()
+        self.fc1 = nn.Linear(input_dim, 256)
+        self.batch_norm1 = nn.BatchNorm1d(256)
+        self.relu1 = nn.ReLU()
+        self.dropout1 = nn.Dropout(0.5)
+        self.fc2 = nn.Linear(256, 128)
+        self.batch_norm2 = nn.BatchNorm1d(128)
+        self.relu2 = nn.ReLU()
+        self.dropout2 = nn.Dropout(0.5)
+        self.fc3 = nn.Linear(128, output_dim)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.batch_norm1(x)
+        x = self.relu1(x)
+        x = self.dropout1(x)
+        x = self.fc2(x)
+        x = self.batch_norm2(x)
+        x = self.relu2(x)
+        x = self.dropout2(x)
+        x = self.fc3(x)
+        x = self.sigmoid(x)
+        return x

utils/Roberta.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import torch.nn as nn
+from transformers import RobertaModel
+class RoBERTaClassifier(nn.Module):
+    def __init__(self, num_labels):
+        super(RoBERTaClassifier, self).__init__()
+        self.roberta = RobertaModel.from_pretrained('roberta-base')
+        self.dropout = nn.Dropout(0.2)
+        self.linear = nn.Linear(self.roberta.config.hidden_size, num_labels)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, input_ids, attention_mask):
+        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
+        pooled_output = outputs.pooler_output
+        pooled_output = self.dropout(pooled_output)
+        logits = self.linear(pooled_output)
+        return self.sigmoid(logits)

utils/test.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import numpy as np
+import torch
+def test(model, model2, decoder, device, test_loader):
+    model = model.to(device)
+    decoder = decoder.to(device)
+    decoder.eval()
+    model2 = model2.to(device)
+    model2.eval()
+    y_pred_val = []
+    with torch.no_grad():
+        for batch in test_loader:
+            input_ids = batch['input_ids'].to(device)
+            attention_mask = batch['attention_mask'].to(device)
+            labels = batch['labels'].to(device)
+            images = batch['images'].to(device)
+            outputs1 = model2(input_ids, attention_mask)
+            with torch.no_grad():
+                image_features = model.encode_image(images)
+            image_features = image_features.to(torch.float32)
+            outputs2 = decoder(image_features)
+            outputs = (3 * outputs1 + 1 * outputs2) / 4
+            preds = outputs
+            y_pred_val.extend(preds.cpu().numpy())
+    y_pred = np.array(y_pred_val)
+    y_pred = np.reshape(y_pred, (-1, 8))
+    model.cpu()
+    model2.cpu()
+    decoder.cpu()
+    return y_pred

utils/utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import matplotlib.pyplot as plt
+import numpy as np
+from utils.Caption import get_caption
+from utils.Emotions import get_emotions
+def get_label(image):
+    caption = get_caption(image)
+    pred = get_emotions(image, caption)
+    emotions = ['Excitement', 'Sadness', 'Amusement', 'Disgust', 'Awe', 'Contentment', 'Fear', 'Anger']
+    probabilities = pred
+    print(pred)
+    max_idx = np.argmax(probabilities)
+    # Create color list where all bars are one color, and the max bar is another color
+    bar_colors = ['skyblue' if i != max_idx else 'orange' for i in range(len(emotions))]
+    # Create bar chart
+    fig, ax = plt.subplots()
+    ax.bar(emotions, probabilities, color=bar_colors, width=0.5)
+    ax.set_ylabel('Probability')
+    ax.set_title('Emotion Prediction Probabilities')
+    plt.xticks(rotation=60, ha='right')
+    plt.tight_layout()
+    return caption, fig