|
|
"""Deep analysis of model_33 (LineLayout) to understand its OneOCRFeatureExtract usage.""" |
|
|
import onnx |
|
|
from onnx import numpy_helper |
|
|
import numpy as np |
|
|
from pathlib import Path |
|
|
|
|
|
models_dir = Path("oneocr_extracted/onnx_models") |
|
|
model_path = list(models_dir.glob("model_33_*"))[0] |
|
|
model = onnx.load(str(model_path)) |
|
|
|
|
|
print(f"Model: {model_path.name}") |
|
|
print(f"IR version: {model.ir_version}") |
|
|
print(f"Opsets: {[(o.domain, o.version) for o in model.opset_import]}") |
|
|
|
|
|
|
|
|
print(f"\nGraph inputs:") |
|
|
for inp in model.graph.input: |
|
|
shape = [d.dim_value or d.dim_param for d in inp.type.tensor_type.shape.dim] |
|
|
print(f" {inp.name}: {shape}") |
|
|
|
|
|
print(f"\nGraph outputs:") |
|
|
for out in model.graph.output: |
|
|
shape = [d.dim_value or d.dim_param for d in out.type.tensor_type.shape.dim] |
|
|
print(f" {out.name}: {shape}") |
|
|
|
|
|
|
|
|
print(f"\nInitializers ({len(model.graph.initializer)}):") |
|
|
for init in model.graph.initializer: |
|
|
if init.data_type == 8: |
|
|
raw = bytes(init.string_data[0] if init.string_data else init.raw_data) |
|
|
print(f" {init.name}: STRING, {len(raw)} bytes") |
|
|
else: |
|
|
data = numpy_helper.to_array(init) |
|
|
print(f" {init.name}: {data.shape} {data.dtype} [{data.min():.4f}, {data.max():.4f}]") |
|
|
|
|
|
|
|
|
print(f"\nNodes ({len(model.graph.node)}):") |
|
|
fe_found = False |
|
|
for i, node in enumerate(model.graph.node): |
|
|
if node.op_type == "OneOCRFeatureExtract" or fe_found: |
|
|
domain_str = f" [{node.domain}]" if node.domain else "" |
|
|
print(f" [{i}] {node.op_type}{domain_str}: {list(node.input)} → {list(node.output)}") |
|
|
if node.op_type == "OneOCRFeatureExtract": |
|
|
fe_found = True |
|
|
for attr in node.attribute: |
|
|
if attr.type == 2: |
|
|
print(f" {attr.name} = {attr.i}") |
|
|
elif attr.type == 1: |
|
|
print(f" {attr.name} = {attr.f}") |
|
|
elif attr.type == 7: |
|
|
print(f" {attr.name} = {list(attr.ints)}") |
|
|
elif i < 5 or (i > len(model.graph.node) - 5): |
|
|
|
|
|
domain_str = f" [{node.domain}]" if node.domain else "" |
|
|
print(f" [{i}] {node.op_type}{domain_str}: {list(node.input)} → {list(node.output)}") |
|
|
|
|
|
|
|
|
print(f"\nConv backbone structure (last 5 nodes before FE):") |
|
|
fe_idx = None |
|
|
for i, node in enumerate(model.graph.node): |
|
|
if node.op_type == "OneOCRFeatureExtract": |
|
|
fe_idx = i |
|
|
break |
|
|
|
|
|
if fe_idx: |
|
|
for i in range(max(0, fe_idx - 5), fe_idx + 1): |
|
|
node = model.graph.node[i] |
|
|
domain_str = f" [{node.domain}]" if node.domain else "" |
|
|
print(f" [{i}] {node.op_type}{domain_str}: {list(node.input)} → {list(node.output)}") |
|
|
|
|
|
|
|
|
for init in model.graph.initializer: |
|
|
if init.name == "feature/config": |
|
|
raw = bytes(init.string_data[0] if init.string_data else init.raw_data) |
|
|
print(f"\nfeature/config blob: {len(raw)} bytes") |
|
|
|
|
|
|
|
|
be = np.frombuffer(raw, dtype='>f4').copy() |
|
|
print(f" Big-endian float32: {len(be)} values") |
|
|
print(f" Finite: {np.isfinite(be).sum()}") |
|
|
in_range = np.sum(np.abs(be[np.isfinite(be)]) < 10) |
|
|
print(f" In [-10,10]: {in_range} ({100*in_range/len(be):.1f}%)") |
|
|
print(f" First 20: {be[:20]}") |
|
|
print(f" Last 20: {be[-20:]}") |
|
|
|
|
|
|
|
|
for i, v in enumerate(be): |
|
|
if v in [128.0, 256.0, 512.0] or (v > 0 and v == int(v) and 10 < v < 10000): |
|
|
print(f" Potential dim at [{i}]: {v}") |
|
|
|