Hayloo9838 commited on
Commit
b2f34f1
·
verified ·
1 Parent(s): d6fc62d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -50
app.py CHANGED
@@ -6,18 +6,15 @@ from transformers import CLIPProcessor, CLIPVisionModel
6
  from PIL import Image
7
  from torch import nn
8
  import requests
9
- import matplotlib.pyplot as plt
10
  from huggingface_hub import hf_hub_download
11
 
12
  MODEL_PATH = "pytorch_model.bin"
13
  REPO_ID = "Hayloo9838/uno-recognizer"
14
- MAPANDSTUFF = "mapandstuff.pth"
15
 
16
  class CLIPVisionClassifier(nn.Module):
17
  def __init__(self, num_labels):
18
  super().__init__()
19
- self.vision_model = CLIPVisionModel.from_pretrained('openai/clip-vit-large-patch14',
20
- attn_implementation="eager")
21
  self.classifier = nn.Linear(self.vision_model.config.hidden_size, num_labels, bias=False)
22
  self.dropout = nn.Dropout(0.1)
23
 
@@ -38,40 +35,26 @@ def get_attention_map(attentions):
38
  num_patches = int(np.sqrt(attention.shape[0]))
39
 
40
  attention_map = attention.reshape(num_patches, num_patches)
41
-
42
- attention_map = attention_map.cpu().numpy()
43
-
44
  attention_map = (attention_map - attention_map.min()) / (attention_map.max() - attention_map.min())
45
- return attention_map
 
46
 
47
- def apply_heatmap(image, attention_map, new_size=None):
48
  heatmap = cv2.applyColorMap(np.uint8(255 * attention_map), cv2.COLORMAP_JET)
49
-
50
  if isinstance(image, Image.Image):
51
  image = np.array(image)
52
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
53
 
54
- if new_size is not None:
55
- image_resized = cv2.resize(image, new_size)
56
- attention_map_resized = cv2.resize(attention_map, image_resized.shape[:2][::-1] , interpolation=cv2.INTER_LINEAR)
57
- attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
58
- heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
59
- output = cv2.addWeighted(image_resized, 0.7, heatmap_resized, 0.3, 0)
60
- else:
61
- attention_map_resized = cv2.resize(attention_map, image.shape[:2][::-1] , interpolation=cv2.INTER_LINEAR)
62
- attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
63
- heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
64
- output = cv2.addWeighted(image, 0.7, heatmap_resized, 0.3, 0)
65
-
66
 
67
  return output
68
 
69
  def process_image_classification(image):
70
  model, processor, reverse_mapping, device = load_model()
71
-
72
- # Convert image to PIL Image
73
  image = Image.fromarray(image)
74
-
75
  inputs = processor(images=image, return_tensors="pt")
76
  pixel_values = inputs.pixel_values.to(device)
77
 
@@ -80,53 +63,38 @@ def process_image_classification(image):
80
  probs = torch.nn.functional.softmax(logits, dim=-1)
81
  prediction = torch.argmax(probs).item()
82
 
83
- # Generate attention map
84
  attention_map = get_attention_map(attentions)
85
-
86
  visualization = apply_heatmap(image, attention_map)
87
 
88
  card_name = reverse_mapping[prediction]
89
  confidence = probs[0][prediction].item()
90
 
91
- # Convert back to RGB for matplotlib display
92
- visualization_rgb = cv2.cvtColor(visualization, cv2.COLOR_BGR2RGB)
93
-
94
- return visualization_rgb, card_name, confidence
95
 
96
  def load_model():
97
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
98
-
99
- # Download model weights and label mapping from Hugging Face Hub
100
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_PATH)
101
- #mapandstuff_path = hf_hub_download(repo_id=REPO_ID, filename=MAPANDSTUFF)
102
  checkpoint = torch.load(model_path, map_location=device)
103
  label_mapping = checkpoint['label_mapping']
104
  reverse_mapping = {v: k for k, v in label_mapping.items()}
105
  model = CLIPVisionClassifier(len(label_mapping))
106
-
107
- model_state_dict = checkpoint["model_state_dict"]
108
- model.load_state_dict(model_state_dict)
109
-
110
- model = model.to(device)
111
- model.eval()
112
-
113
  processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
114
-
115
  return model, processor, reverse_mapping, device
116
 
117
  def gradio_interface():
118
- gr_interface = gr.Interface(
119
  fn=process_image_classification,
120
- inputs=gr.inputs.Image(type="numpy"),
121
  outputs=[
122
- gr.outputs.Image(label="Heatmap Plot"),
123
- gr.outputs.Textbox(label="Predicted Card"),
124
- gr.outputs.Textbox(label="Confidence")
125
  ],
126
  title="Uno Card Recognizer",
127
  description="Upload an image or use your webcam to recognize an Uno card."
128
- )
129
- gr_interface.launch()
130
 
131
  if __name__ == "__main__":
132
- gradio_interface()
 
6
  from PIL import Image
7
  from torch import nn
8
  import requests
 
9
  from huggingface_hub import hf_hub_download
10
 
11
  MODEL_PATH = "pytorch_model.bin"
12
  REPO_ID = "Hayloo9838/uno-recognizer"
 
13
 
14
  class CLIPVisionClassifier(nn.Module):
15
  def __init__(self, num_labels):
16
  super().__init__()
17
+ self.vision_model = CLIPVisionModel.from_pretrained('openai/clip-vit-large-patch14')
 
18
  self.classifier = nn.Linear(self.vision_model.config.hidden_size, num_labels, bias=False)
19
  self.dropout = nn.Dropout(0.1)
20
 
 
35
  num_patches = int(np.sqrt(attention.shape[0]))
36
 
37
  attention_map = attention.reshape(num_patches, num_patches)
 
 
 
38
  attention_map = (attention_map - attention_map.min()) / (attention_map.max() - attention_map.min())
39
+
40
+ return attention_map.cpu().numpy()
41
 
42
+ def apply_heatmap(image, attention_map):
43
  heatmap = cv2.applyColorMap(np.uint8(255 * attention_map), cv2.COLORMAP_JET)
 
44
  if isinstance(image, Image.Image):
45
  image = np.array(image)
46
  image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
47
 
48
+ attention_map_resized = cv2.resize(attention_map, image.shape[:2][::-1], interpolation=cv2.INTER_LINEAR)
49
+ attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
50
+ heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
51
+ output = cv2.addWeighted(image, 0.7, heatmap_resized, 0.3, 0)
 
 
 
 
 
 
 
 
52
 
53
  return output
54
 
55
  def process_image_classification(image):
56
  model, processor, reverse_mapping, device = load_model()
 
 
57
  image = Image.fromarray(image)
 
58
  inputs = processor(images=image, return_tensors="pt")
59
  pixel_values = inputs.pixel_values.to(device)
60
 
 
63
  probs = torch.nn.functional.softmax(logits, dim=-1)
64
  prediction = torch.argmax(probs).item()
65
 
 
66
  attention_map = get_attention_map(attentions)
 
67
  visualization = apply_heatmap(image, attention_map)
68
 
69
  card_name = reverse_mapping[prediction]
70
  confidence = probs[0][prediction].item()
71
 
72
+ return visualization, card_name, confidence
 
 
 
73
 
74
  def load_model():
75
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
76
  model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_PATH)
 
77
  checkpoint = torch.load(model_path, map_location=device)
78
  label_mapping = checkpoint['label_mapping']
79
  reverse_mapping = {v: k for k, v in label_mapping.items()}
80
  model = CLIPVisionClassifier(len(label_mapping))
81
+ model.load_state_dict(checkpoint["model_state_dict"])
82
+ model.to(device).eval()
 
 
 
 
 
83
  processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
 
84
  return model, processor, reverse_mapping, device
85
 
86
  def gradio_interface():
87
+ gr.Interface(
88
  fn=process_image_classification,
89
+ inputs=gr.Image(type="numpy"),
90
  outputs=[
91
+ gr.Image(label="Heatmap Plot"),
92
+ gr.Textbox(label="Predicted Card"),
93
+ gr.Textbox(label="Confidence")
94
  ],
95
  title="Uno Card Recognizer",
96
  description="Upload an image or use your webcam to recognize an Uno card."
97
+ ).launch()
 
98
 
99
  if __name__ == "__main__":
100
+ gradio_interface()