Spaces:

Hayloo9838
/

uno-recognizer

Sleeping

App Files Files Community

Hayloo9838 commited on Dec 22, 2024

Commit

b2f34f1

verified ·

1 Parent(s): d6fc62d

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -50

app.py CHANGED Viewed

@@ -6,18 +6,15 @@ from transformers import CLIPProcessor, CLIPVisionModel
 from PIL import Image
 from torch import nn
 import requests
-import matplotlib.pyplot as plt
 from huggingface_hub import hf_hub_download
 MODEL_PATH = "pytorch_model.bin"
 REPO_ID = "Hayloo9838/uno-recognizer"
-MAPANDSTUFF = "mapandstuff.pth"
 class CLIPVisionClassifier(nn.Module):
     def __init__(self, num_labels):
         super().__init__()
-        self.vision_model = CLIPVisionModel.from_pretrained('openai/clip-vit-large-patch14',
-                                                          attn_implementation="eager")
         self.classifier = nn.Linear(self.vision_model.config.hidden_size, num_labels, bias=False)
         self.dropout = nn.Dropout(0.1)
@@ -38,40 +35,26 @@ def get_attention_map(attentions):
     num_patches = int(np.sqrt(attention.shape[0]))
     attention_map = attention.reshape(num_patches, num_patches)
-    attention_map = attention_map.cpu().numpy()
     attention_map = (attention_map - attention_map.min()) / (attention_map.max() - attention_map.min())
-    return attention_map
-def apply_heatmap(image, attention_map, new_size=None):
     heatmap = cv2.applyColorMap(np.uint8(255 * attention_map), cv2.COLORMAP_JET)
     if isinstance(image, Image.Image):
         image = np.array(image)
         image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-    if new_size is not None:
-        image_resized = cv2.resize(image, new_size)
-        attention_map_resized = cv2.resize(attention_map, image_resized.shape[:2][::-1] , interpolation=cv2.INTER_LINEAR)
-        attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
-        heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
-        output = cv2.addWeighted(image_resized, 0.7, heatmap_resized, 0.3, 0)
-    else:
-        attention_map_resized = cv2.resize(attention_map, image.shape[:2][::-1] , interpolation=cv2.INTER_LINEAR)
-        attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
-        heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
-        output = cv2.addWeighted(image, 0.7, heatmap_resized, 0.3, 0)
     return output
 def process_image_classification(image):
     model, processor, reverse_mapping, device = load_model()
-    # Convert image to PIL Image
     image = Image.fromarray(image)
     inputs = processor(images=image, return_tensors="pt")
     pixel_values = inputs.pixel_values.to(device)
@@ -80,53 +63,38 @@ def process_image_classification(image):
         probs = torch.nn.functional.softmax(logits, dim=-1)
         prediction = torch.argmax(probs).item()
-    # Generate attention map
     attention_map = get_attention_map(attentions)
     visualization = apply_heatmap(image, attention_map)
     card_name = reverse_mapping[prediction]
     confidence = probs[0][prediction].item()
-    # Convert back to RGB for matplotlib display
-    visualization_rgb = cv2.cvtColor(visualization, cv2.COLOR_BGR2RGB)
-    return visualization_rgb, card_name, confidence
 def load_model():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    # Download model weights and label mapping from Hugging Face Hub
     model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_PATH)
-    #mapandstuff_path = hf_hub_download(repo_id=REPO_ID, filename=MAPANDSTUFF)
     checkpoint = torch.load(model_path, map_location=device)
     label_mapping = checkpoint['label_mapping']
     reverse_mapping = {v: k for k, v in label_mapping.items()}
     model = CLIPVisionClassifier(len(label_mapping))
-    model_state_dict = checkpoint["model_state_dict"]
-    model.load_state_dict(model_state_dict)
-    model = model.to(device)
-    model.eval()
     processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
     return model, processor, reverse_mapping, device
 def gradio_interface():
-    gr_interface = gr.Interface(
         fn=process_image_classification,
-        inputs=gr.inputs.Image(type="numpy"),
         outputs=[
-            gr.outputs.Image(label="Heatmap Plot"),
-            gr.outputs.Textbox(label="Predicted Card"),
-            gr.outputs.Textbox(label="Confidence")
         ],
         title="Uno Card Recognizer",
         description="Upload an image or use your webcam to recognize an Uno card."
-    )
-    gr_interface.launch()
 if __name__ == "__main__":
-    gradio_interface()

 from PIL import Image
 from torch import nn
 import requests
 from huggingface_hub import hf_hub_download
 MODEL_PATH = "pytorch_model.bin"
 REPO_ID = "Hayloo9838/uno-recognizer"
 class CLIPVisionClassifier(nn.Module):
     def __init__(self, num_labels):
         super().__init__()
+        self.vision_model = CLIPVisionModel.from_pretrained('openai/clip-vit-large-patch14')
         self.classifier = nn.Linear(self.vision_model.config.hidden_size, num_labels, bias=False)
         self.dropout = nn.Dropout(0.1)
     num_patches = int(np.sqrt(attention.shape[0]))
     attention_map = attention.reshape(num_patches, num_patches)
     attention_map = (attention_map - attention_map.min()) / (attention_map.max() - attention_map.min())
+    return attention_map.cpu().numpy()
+def apply_heatmap(image, attention_map):
     heatmap = cv2.applyColorMap(np.uint8(255 * attention_map), cv2.COLORMAP_JET)
     if isinstance(image, Image.Image):
         image = np.array(image)
         image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    attention_map_resized = cv2.resize(attention_map, image.shape[:2][::-1], interpolation=cv2.INTER_LINEAR)
+    attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
+    heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
+    output = cv2.addWeighted(image, 0.7, heatmap_resized, 0.3, 0)
     return output
 def process_image_classification(image):
     model, processor, reverse_mapping, device = load_model()
     image = Image.fromarray(image)
     inputs = processor(images=image, return_tensors="pt")
     pixel_values = inputs.pixel_values.to(device)
         probs = torch.nn.functional.softmax(logits, dim=-1)
         prediction = torch.argmax(probs).item()
     attention_map = get_attention_map(attentions)
     visualization = apply_heatmap(image, attention_map)
     card_name = reverse_mapping[prediction]
     confidence = probs[0][prediction].item()
+    return visualization, card_name, confidence
 def load_model():
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_PATH)
     checkpoint = torch.load(model_path, map_location=device)
     label_mapping = checkpoint['label_mapping']
     reverse_mapping = {v: k for k, v in label_mapping.items()}
     model = CLIPVisionClassifier(len(label_mapping))
+    model.load_state_dict(checkpoint["model_state_dict"])
+    model.to(device).eval()
     processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
     return model, processor, reverse_mapping, device
 def gradio_interface():
+    gr.Interface(
         fn=process_image_classification,
+        inputs=gr.Image(type="numpy"),
         outputs=[
+            gr.Image(label="Heatmap Plot"),
+            gr.Textbox(label="Predicted Card"),
+            gr.Textbox(label="Confidence")
         ],
         title="Uno Card Recognizer",
         description="Upload an image or use your webcam to recognize an Uno card."
+    ).launch()
 if __name__ == "__main__":
+    gradio_interface()