File size: 1,981 Bytes
8874c09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
"""
To check the files (images), whether any of them has unusual size / dimension.
"""

import os
from PIL import Image
import pandas as pd
import argparse
from tqdm import tqdm
from typing import Union


def main(args):
    dataset_path = args.dataset_path #"/workspace/llava_from_src/LLaVA/playground/data/images"
    sizes = []
    dimensions = []
    fpaths = []                 # to contain all the filenames (imagenames)
    size_less_than_100X100 = [] # to contain 1 if size of the image < 100 * 100 else 0

    for filename in tqdm(os.listdir(dataset_path)):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(dataset_path, filename)
            fpaths.append(image_path)

            with Image.open(image_path) as img:
                sizes.append(os.path.getsize(image_path))
                dim = img.size
                dimensions.append(img.size)
                size_less_than_100X100.append((lambda dim: 1 if dim[0]*dim[1] < 10000 else 0)(dim))
    
    if args.create_dataframe:
        df = pd.DataFrame({
            "fpath": fpaths,
            "img_size": sizes,
            "dimensions": dimensions,
            "small_size": size_less_than_100X100
        })

        df.to_csv(args.create_dataframe, index=False)
        print(f"Dataframe saved at {args.create_dataframe}.")

    # Analyze the sizes and dimensions
    # print("Max size:", max(sizes))
    # print("Min size:", min(sizes))
    # print("Avg size:", sum(sizes) / len(sizes))
    # print("Unique dimensions:", set(dimensions))
    print(pd.Series(sizes).describe())

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--dataset-path", type=str, required=True, help="Path of the dataset of images to be checked.")
    parser.add_argument("--create-dataframe", type=str, default="report_imgs_size.csv", help="Name of the dataframe if you want to create.")
    args = parser.parse_args()
    main(args)