WARPxMetafusion / data_utils /schema_utils.py
ByteMaster01's picture
initial commit
0a65f9d
from typing import Dict, Any
def schema_to_line_based(schema: dict) -> str:
"""
Converts a schema dictionary to a line-based format:
field // description and format (str, int, ...)
Only shows field names without parent prefix (e.g. 'age' instead of 'involved_persons.age')
"""
def get_type(info):
return info.get("bsonType") or info.get("type") or ""
def process_properties(properties: dict) -> list:
lines = []
for field, info in properties.items():
typ = get_type(info)
desc = info.get("description", "")
fmt = info.get("format", "")
# Compose type/format string
type_fmt = typ
if fmt:
type_fmt += f", {fmt}"
# Compose comment
comment = desc.strip()
if type_fmt:
comment = f"{comment} ({type_fmt})" if comment else f"({type_fmt})"
lines.append(f"{field} // {comment}" if comment else field)
# Recursively process nested objects and arrays, but only add the field names without prefix
if typ == "object" and "properties" in info:
for nested_line in process_properties(info["properties"]):
lines.append(nested_line)
elif typ == "array" and "items" in info:
items = info["items"]
if get_type(items) == "object" and "properties" in items:
for nested_line in process_properties(items["properties"]):
lines.append(nested_line)
return lines
collections = schema.get("collections", [])
if not collections:
return ""
collection = collections[0]
# Support both "document" and direct "properties"
if "document" in collection and "properties" in collection["document"]:
properties = collection["document"]["properties"]
else:
properties = collection.get("properties", {})
return "\n".join(process_properties(properties))
if __name__ == "__main__":
example_schema = {
"collections": [
{
"name": "events",
"document": {
"bsonType": "object",
"properties": {
"identifier": {
"bsonType": "object",
"properties": {
"camgroup_id": {
"bsonType": "string",
"description": "Use this to filter events by group"
},
"task_id": {
"bsonType": "string",
"description": "Use this to filter events by tasks"
},
"camera_id": {
"bsonType": "string",
"description": "Use this to filter events by camera"
}
}
},
"response": {
"bsonType": "object",
"properties": {
"event": {
"bsonType": "object",
"properties": {
"severity": {
"bsonType": "string",
"description": "Can be Low, Medium, Critical"
},
"type": {
"bsonType": "string",
"description": "Type of the event. Use this to filter events of person and vehicle"
},
"blobs": {
"bsonType": "array",
"items": {
"bsonType": "object",
"properties": {
"url": {
"bsonType": "string"
},
"attribs": {
"bsonType": "object",
"description": "Use this for attributes like Gender (Only Male, Female), Upper Clothing, Lower Clothing, Age (Ranges like 20-30, 30-40 and so on) for people and Make (like maruti suzuki, toyota, tata), Color, Type (like Hatchback, sedan, xuv), label (like car, truck, van, three wheeler, motorcycle) for Vehicles"
},
"label": {
"bsonType": "string",
"description": "Use this label for number plate"
},
"score": {
"bsonType": "number",
"description": "Use this for confidence for the blob"
},
"match_id": {
"bsonType": "string",
"description": "Use this match_id for name of the person"
},
"severity": {
"bsonType": "string"
},
"subclass": {
"bsonType": "string",
"description": "Use this for subclass for the blob"
}
}
}
},
"c_timestamp": {
"bsonType": "date",
"description": "Use this for timestamp"
},
"label": {
"bsonType": "string",
"description": "Use this label for number plate"
}
}
}
}
}
}
}
}
],
"version": 1
}
parsed_schema = schema_to_line_based(example_schema)
print(parsed_schema)