Spaces:

mahmoud669
/

face-celebs-scrub

Sleeping

App Files Files Community

mahmoud669 commited on 18 days ago

Commit

411d1ef

•

1 Parent(s): 5863a45

Update scrub.py

Browse files

Files changed (1) hide show

scrub.py +63 -11

scrub.py CHANGED Viewed

@@ -35,15 +35,33 @@ from torch import nn
 from itertools import cycle
 import timm
 class CustomDataset(Dataset):
-    def __init__(self, root, transformations = None):
         self.transformations = transformations
         self.im_paths = [im_path for im_path in sorted(glob(f"{root}/*/*"))]
-        self.im_paths = [i for i in self.im_paths if not 'Will Smith' in i]
         self.cls_names, self.cls_counts, count, data_count = {}, {}, 0, 0
         for idx, im_path in enumerate(self.im_paths):
             class_name = self.get_class(im_path)
@@ -67,17 +85,18 @@ class CustomDataset(Dataset):
 class SingleCelebCustomDataset(Dataset):
-    def __init__(self, root, transformations = None):
         self.transformations = transformations
         self.im_paths = [im_path for im_path in sorted(glob(f"{root}/*"))]
         self.cls_names, self.cls_counts, count, data_count = {}, {}, 0, 0
         for idx, im_path in enumerate(self.im_paths):
             class_name = self.get_class(im_path)
             if class_name not in self.cls_names: self.cls_names[class_name] = count; self.cls_counts[class_name] = 1; count += 1
             else: self.cls_counts[class_name] += 1
-    def get_class(self, path): return 16
     def __len__(self): return len(self.im_paths)
@@ -92,11 +111,11 @@ class SingleCelebCustomDataset(Dataset):
         return im, gt
-def get_dls(root, transformations, bs, split = [0.9, 0.05, 0.05], ns = 4, single=False):
     if single:
-        ds = SingleCelebCustomDataset(root = root, transformations = transformations)
     else:
-        ds = CustomDataset(root = root, transformations = transformations)
     total_len = len(ds)
     tr_len = int(total_len * split[0])
@@ -293,10 +312,43 @@ class Args:
     def __init__(self, **entries):
         self.__dict__.update(entries)
-def unlearn():
-    will_tr_dl, will_val_dl, will_ts_dl, classes = get_dls(root = "forget_set/", transformations = tfs, bs = 32, single=True)
     model = timm.create_model("rexnet_150", pretrained = True, num_classes = 17)
     model.load_state_dict(torch.load('faces_best_model.pth'))
     args = Args()
     args.optim = 'sgd'
     args.gamma = 0.99
@@ -355,4 +407,4 @@ def unlearn():
         train_acc, train_loss = train_distill(epoch, celebs_tr_dl, module_list, swa_model, criterion_list, optimizer, args, "minimize")
         if epoch >= args.sstart:
             swa_model.update_parameters(model_s)

 from itertools import cycle
 import timm
+reversed_map = {
+    0: 'Angelina Jolie',
+    1: 'Brad Pitt',
+    2: 'Denzel Washington',
+    3: 'Hugh Jackman',
+    4: 'Jennifer Lawrence',
+    5: 'Johnny Depp',
+    6: 'Kate Winslet',
+    7: 'Leonardo DiCaprio',
+    8: 'Megan Fox',
+    9: 'Natalie Portman',
+    10: 'Nicole Kidman',
+    11: 'Robert Downey Jr',
+    12: 'Sandra Bullock',
+    13: 'Scarlett Johansson',
+    14: 'Tom Cruise',
+    15: 'Tom Hanks',
+    16: 'Will Smith'
+    }
 class CustomDataset(Dataset):
+    def __init__(self, forget_class=16, root, transformations = None):
         self.transformations = transformations
         self.im_paths = [im_path for im_path in sorted(glob(f"{root}/*/*"))]
+        self.im_paths = [i for i in self.im_paths if not reversed_map[forget_class] in i]
         self.cls_names, self.cls_counts, count, data_count = {}, {}, 0, 0
         for idx, im_path in enumerate(self.im_paths):
             class_name = self.get_class(im_path)
 class SingleCelebCustomDataset(Dataset):
+    def __init__(self, root, forget_class=16, transformations = None):
         self.transformations = transformations
         self.im_paths = [im_path for im_path in sorted(glob(f"{root}/*"))]
+        self.forget_class = forget_class
         self.cls_names, self.cls_counts, count, data_count = {}, {}, 0, 0
         for idx, im_path in enumerate(self.im_paths):
             class_name = self.get_class(im_path)
             if class_name not in self.cls_names: self.cls_names[class_name] = count; self.cls_counts[class_name] = 1; count += 1
             else: self.cls_counts[class_name] += 1
+    def get_class(self, path): return self.forget_class
     def __len__(self): return len(self.im_paths)
         return im, gt
+def get_dls(root, forget_class=16, transformations, bs, split = [0.9, 0.05, 0.05], ns = 4, single=False):
     if single:
+        ds = SingleCelebCustomDataset(root = root, forget_class=forget_class, transformations = transformations)
     else:
+        ds = CustomDataset(root = root, forget_class=forget_class, transformations = transformations)
     total_len = len(ds)
     tr_len = int(total_len * split[0])
     def __init__(self, **entries):
         self.__dict__.update(entries)
+# Function to process each image in a folder
+def process_images_in_folder(folder_path, model):
+    # List all files in the folder
+    image_files = os.listdir(folder_path)
+    preds = []
+    # Process each image in the folder
+    for filename in image_files:
+        # Check if the file is an image (you can add more specific checks if needed)
+        if filename.endswith(('.png', '.jpg', '.jpeg')):
+            # Construct the full file path
+            file_path = os.path.join(folder_path, filename)
+            # Open the image using PIL
+            image = Image.open(file_path).convert('RGB')
+            # Apply preprocessing
+            image_tensor = preprocess(image).unsqueeze(0)  # Add batch dimension
+            preds = []
+            # Perform inference
+            with torch.no_grad():
+                output = model(image_tensor)
+                probabilities = F.softmax(output, dim=1)
+                pred_class = torch.argmax(probabilities, dim=1)
+                preds.append(pred_class.item())
+    freq = Counter(preds)
+    top_one = freq.most_common(1)
+    forget_class, _ = top_one[0]
+    return forget_class
+def unlearn():
     model = timm.create_model("rexnet_150", pretrained = True, num_classes = 17)
     model.load_state_dict(torch.load('faces_best_model.pth'))
+    forget_class = preprocess('forget_set', model)
+    will_tr_dl, will_val_dl, will_ts_dl, classes = get_dls(root = "forget_set", forget_class=forget_class, transformations = tfs, bs = 32, single=True)
+    celebs_tr_dl, celebs_val_dl, celebs_ts_dl, classes = get_dls(root = "celeb-dataset", forget_class=forget_class, transformations = tfs, bs = 32)
     args = Args()
     args.optim = 'sgd'
     args.gamma = 0.99
         train_acc, train_loss = train_distill(epoch, celebs_tr_dl, module_list, swa_model, criterion_list, optimizer, args, "minimize")
         if epoch >= args.sstart:
             swa_model.update_parameters(model_s)
+    torch.save(model_s.state_dict(), 'celeb-model-unlearned.pth')