File size: 2,450 Bytes
8874c09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""
This code is to get a json of the images of smaller size.
The record of all the images should be there in a dataframe.
The dataframe either :
1.  should have a column named `small_size` which contains the flag
    1 if the size is less than 100*100 else 0.
                    Or
2. should have a column named `dimensions` containing the size tuple.

"""

import json
import os
import pandas as pd
import argparse

def main(args):
    # load json
    with open(args.json_path, "r") as fp:
        data_from_json = json.load(fp)

    # load the df
    data_from_df = pd.read_csv(args.dataframe_path)

    # get the names of those files with size < 100*100
    if args.column:
        col_to_check = args.column
        files_small_size = data_from_df.loc[data_from_df[col_to_check] == 1]["fpath"]

    else:
        assert "dimensions" in data_from_df.columns, "Either column should be given or a column named 'dimension' should be there in the df." 
        
        # Get image names where any dimension is less than 100
        files_small_size = data_from_df[data_from_df['dimensions'].apply(lambda x: any(dimension < 100 for dimension in x))]["fpath"]

    # creating a list of just the basenames of the files
    files_small_size_basename = [os.path.basename(filename) for filename in files_small_size]
        
    # got the names of the files with small sizes
    # to create a json for them now
    print("Creating json records with the images with small size.")
    data_small_imgs = [record for record in data_from_json if os.path.basename(record["image"]) in files_small_size_basename]

    print(f"Saving the json at {args.save_json}.")
    with open(args.save_json, "w") as fp:
        json.dump(data_small_imgs, fp, indent=4)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--json-path", type=str, help="Path of the json containing images data.")
    parser.add_argument("--dataframe-path", type=str, help="Path of the dataframe of all the images containing info about dimension.")
    parser.add_argument("--column", type=str, 
                        help="Name of the column if any, to consider instead of `dimensions` column. This column should contain flag 1 or 0 when the dimension is < 100*100 respectively.")
    parser.add_argument("--save-json", type=str, help="Path of the json to be saved with all the files with size < 100*100.")
    args = parser.parse_args()
    main(args)