Create coco_format.py
Browse files- coco_format.py +163 -0
coco_format.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from glob import glob
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import shutil
|
| 6 |
+
|
| 7 |
+
def jsons_to_dataframe(json_dir):
|
| 8 |
+
# Initialize lists to store data
|
| 9 |
+
filename_list = []
|
| 10 |
+
image_id_list = []
|
| 11 |
+
width_list = []
|
| 12 |
+
height_list = []
|
| 13 |
+
category_name_list = []
|
| 14 |
+
bbox_list = []
|
| 15 |
+
|
| 16 |
+
# Iterate over each JSON file in the directory
|
| 17 |
+
for filename in os.listdir(json_dir):
|
| 18 |
+
image_id = int(filename.split('/')[-1].split('.')[0])
|
| 19 |
+
if (filename.endswith('.json') and image_id>=1700):
|
| 20 |
+
image_dir = json_dir.replace('annots', 'images')
|
| 21 |
+
image_filename = filename.split('/')[-1].replace('.json', '.png')
|
| 22 |
+
shutil.copy2(f"{image_dir}/{image_filename}", "/content/drive/MyDrive/final/circuit/val/")
|
| 23 |
+
json_file = os.path.join(json_dir, filename)
|
| 24 |
+
|
| 25 |
+
# Load JSON data from file
|
| 26 |
+
with open(json_file, 'r') as f:
|
| 27 |
+
data = json.load(f)
|
| 28 |
+
|
| 29 |
+
# Extract relevant data from JSON
|
| 30 |
+
filename_value = image_filename#data['filename']
|
| 31 |
+
width_value = int(data['size']['width'])
|
| 32 |
+
height_value = int(data['size']['height'])
|
| 33 |
+
|
| 34 |
+
# Process each object in the JSON data
|
| 35 |
+
for obj in data['object']:
|
| 36 |
+
category_name = obj['name']
|
| 37 |
+
xmin = int(float(obj['bndbox']['xmin']))
|
| 38 |
+
ymin = int(float(obj['bndbox']['ymin']))
|
| 39 |
+
xmax = int(float(obj['bndbox']['xmax']))
|
| 40 |
+
ymax = int(float(obj['bndbox']['ymax']))
|
| 41 |
+
|
| 42 |
+
# Calculate width and height of the bbox
|
| 43 |
+
bbox_width = xmax - xmin
|
| 44 |
+
bbox_height = ymax - ymin
|
| 45 |
+
|
| 46 |
+
# Create bbox dictionary
|
| 47 |
+
bbox_dict = {
|
| 48 |
+
"xmin": xmin,
|
| 49 |
+
"ymin": ymin,
|
| 50 |
+
"width": bbox_width,
|
| 51 |
+
"height": bbox_height
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
# Append data to lists
|
| 55 |
+
filename_list.append(filename_value)
|
| 56 |
+
image_id_list.append(image_id)
|
| 57 |
+
width_list.append(width_value)
|
| 58 |
+
height_list.append(height_value)
|
| 59 |
+
category_name_list.append(category_name)
|
| 60 |
+
bbox_list.append(bbox_dict)
|
| 61 |
+
|
| 62 |
+
# Create DataFrame
|
| 63 |
+
df = pd.DataFrame({
|
| 64 |
+
'filename': filename_list,
|
| 65 |
+
'image_id': image_id_list,
|
| 66 |
+
'width': width_list,
|
| 67 |
+
'height': height_list,
|
| 68 |
+
'category_name': category_name_list,
|
| 69 |
+
'bbox': bbox_list
|
| 70 |
+
})
|
| 71 |
+
|
| 72 |
+
return df
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
categories = [
|
| 76 |
+
{'id': 1, 'name': 'Active_IC'},
|
| 77 |
+
{'id': 2, 'name': 'capacitor'},
|
| 78 |
+
{'id': 3, 'name': 'connector'},
|
| 79 |
+
{'id': 4, 'name': 'crystal'},
|
| 80 |
+
{'id': 5, 'name': 'diode'},
|
| 81 |
+
{'id': 6, 'name': 'gnd'},
|
| 82 |
+
{'id': 7, 'name': 'inductor'},
|
| 83 |
+
{'id': 8, 'name': 'led'},
|
| 84 |
+
{'id': 9, 'name': 'misc'},
|
| 85 |
+
{'id': 10, 'name': 'nmos'},
|
| 86 |
+
{'id': 11, 'name': 'npn'},
|
| 87 |
+
{'id': 12, 'name': 'pmos'},
|
| 88 |
+
{'id': 13, 'name': 'pnp'},
|
| 89 |
+
{'id': 14, 'name': 'pwr'},
|
| 90 |
+
{'id': 15, 'name': 'pwr_connector'},
|
| 91 |
+
{'id': 16, 'name': 'resistor'},
|
| 92 |
+
{'id': 17, 'name': 'switch'}
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
def dataframe_to_coco_format(df):
|
| 96 |
+
# Initialize COCO format dictionary
|
| 97 |
+
coco_format = {
|
| 98 |
+
"info": {
|
| 99 |
+
"description": "COCO format dataset",
|
| 100 |
+
"version": "1.0",
|
| 101 |
+
"year": 2024,
|
| 102 |
+
"contributor": "Anonymous",
|
| 103 |
+
"date_created": "2024/06/30"
|
| 104 |
+
},
|
| 105 |
+
"licenses": [],
|
| 106 |
+
"categories": categories,
|
| 107 |
+
"images": [],
|
| 108 |
+
"annotations": []
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
# Track image IDs to ensure uniqueness
|
| 112 |
+
image_id_map = {}
|
| 113 |
+
|
| 114 |
+
# Iterate over DataFrame rows
|
| 115 |
+
for idx, row in df.iterrows():
|
| 116 |
+
image_id = row['image_id']
|
| 117 |
+
filename = row['filename']
|
| 118 |
+
width = row['width']
|
| 119 |
+
height = row['height']
|
| 120 |
+
category_name = row['category_name']
|
| 121 |
+
bbox = row['bbox']
|
| 122 |
+
|
| 123 |
+
# Add image information if not already added
|
| 124 |
+
if image_id not in image_id_map:
|
| 125 |
+
image_id_map[image_id] = len(coco_format['images']) + 1 # COCO image ID starts from 1
|
| 126 |
+
coco_format['images'].append({
|
| 127 |
+
'id': image_id_map[image_id],
|
| 128 |
+
'file_name': filename,
|
| 129 |
+
'width': width,
|
| 130 |
+
'height': height
|
| 131 |
+
})
|
| 132 |
+
|
| 133 |
+
# Find category ID
|
| 134 |
+
category_id = [cat['id'] for cat in categories if cat['name'] == category_name][0]
|
| 135 |
+
|
| 136 |
+
# Add annotation information
|
| 137 |
+
coco_format['annotations'].append({
|
| 138 |
+
'id': len(coco_format['annotations']) + 1, # COCO annotation ID starts from 1
|
| 139 |
+
'image_id': image_id_map[image_id],
|
| 140 |
+
'category_id': category_id,
|
| 141 |
+
'bbox': [bbox['xmin'], bbox['ymin'], bbox['width'], bbox['height']],
|
| 142 |
+
'area': bbox['width'] * bbox['height'],
|
| 143 |
+
'iscrowd': 0 # Assuming no crowds in the dataset
|
| 144 |
+
})
|
| 145 |
+
|
| 146 |
+
return coco_format
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# Example usage:
|
| 151 |
+
# json_directory = '/content/drive/MyDrive/final/full_bboxcnn_data/annots' # Replace with the directory containing your JSON files
|
| 152 |
+
# df = jsons_to_dataframe(json_directory)
|
| 153 |
+
# # Example usage:
|
| 154 |
+
# # Assuming `df` is your pandas DataFrame obtained from `jsons_to_dataframe` function
|
| 155 |
+
|
| 156 |
+
# # Convert DataFrame to COCO format
|
| 157 |
+
# coco_data = dataframe_to_coco_format(df)
|
| 158 |
+
|
| 159 |
+
# # Save COCO format JSON to a file
|
| 160 |
+
# output_json_file = '/content/drive/MyDrive/final/circuit/val/val_coco_format.json'
|
| 161 |
+
# with open(output_json_file, 'w') as f:
|
| 162 |
+
# json.dump(coco_data, f)
|
| 163 |
+
|