import torch from PIL import Image import torchvision.transforms as transforms from transformers import ( AutoImageProcessor, AutoModelForImageClassification, CLIPProcessor, CLIPModel ) class DualWasteClassifier: def __init__(self): # Initialize ResNet-50 self.resnet_model = AutoModelForImageClassification.from_pretrained("microsoft/resnet-50") self.resnet_processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50") # Initialize CLIP self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") # Use GPU if available self.device = "cuda" if torch.cuda.is_available() else "cpu" self.resnet_model = self.resnet_model.to(self.device) self.clip_model = self.clip_model.to(self.device) # Categories for CLIP model self.clip_categories = [ "recyclable plastic waste like plastic bottles and containers", "paper waste like newspapers and cardboard", "organic waste like food scraps and plant materials", "electronic waste like old phones and computers", "glass waste like bottles and jars", "metal waste like cans and foil", "hazardous waste like batteries and chemicals", "general non-recyclable waste" ] def get_resnet_prediction(self, image): # Process image for ResNet inputs = self.resnet_processor(image, return_tensors="pt").to(self.device) # Get predictions with torch.no_grad(): outputs = self.resnet_model(**inputs) probs = torch.nn.functional.softmax(outputs.logits, dim=1)[0] # Get highest confidence prediction max_prob, max_idx = torch.max(probs, 0) category = self.resnet_model.config.id2label[max_idx.item()] confidence = max_prob.item() * 100 return { 'category': category, 'confidence': round(confidence, 2) } def get_clip_prediction(self, image): # Process image and text with CLIP inputs = self.clip_processor( images=image, text=self.clip_categories, return_tensors="pt", padding=True ) # Move inputs to device inputs = {k: v.to(self.device) for k, v in inputs.items()} # Get predictions outputs = self.clip_model(**inputs) probs = torch.nn.functional.softmax(outputs.logits_per_image, dim=1)[0] # Get highest confidence prediction max_prob, max_idx = torch.max(probs, 0) category = self.clip_categories[max_idx.item()].split(' like ')[0] return { 'category': category, 'confidence': round(max_prob.item() * 100, 2) } def classify_image(self, image_path): # Load and convert image to RGB image = Image.open(image_path).convert('RGB') # Get predictions from both models resnet_result = self.get_resnet_prediction(image) clip_result = self.get_clip_prediction(image) # Format the combined result result = f"This is {resnet_result['category']} with {resnet_result['confidence']}% confidence " result += f"and the waste type is {clip_result['category']}" return result def demo_classification(): # Initialize classifier classifier = DualWasteClassifier() # Replace with your image path image_path = "waste_image.jpg" result = classifier.classify_image(image_path) print("\nClassification Result:") print(result) # Example usage code if __name__ == "__main__": demo_classification()