https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet with ONNX weights to be compatible with Transformers.js.

Usage (Transformers.js)

If you haven't already, you can install the Transformers.js JavaScript library from NPM using:

npm i @xenova/transformers

Example: Perform object-detection with Oblix/yolov8x-doclaynet_ONNX.

import { AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';

const model = await AutoModel.from_pretrained(
    "Oblix/yolov8x-doclaynet_ONNX",
    {
        quantized: false,
    }
);
const processor = await AutoProcessor.from_pretrained("Oblix/yolov8x-doclaynet_ONNX");

const url = 'https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/sample1.png';
const rawImage = await RawImage.fromURL(url);
const { pixel_values } = await processor(rawImage);
const output = await model({ images: pixel_values });

// Post-process:
const permuted = output.output0[0].transpose(1, 0);
// `permuted` is a Tensor of shape [ 8400, 15 ]:
// - 8400 potential bounding boxes
// - 15 parameters for each box:
//   - first 4 are coordinates for the bounding boxes (x-center, y-center, width, height)
//   - the remaining 11 are the probabilities for each class

// Example code to format it nicely:
const results = [];
const threshold = 0.5; // Adjust the threshold as needed
const [scaledHeight, scaledWidth] = pixel_values.dims.slice(-2);
for (const [xc, yc, w, h, ...scores] of permuted.tolist()) {

    // Get pixel values, taking into account the original image size
    const x1 = (xc - w/2) / scaledWidth * rawImage.width;
    const y1 = (yc - h/2) / scaledHeight * rawImage.height;
    const x2 = (xc + w/2) / scaledWidth * rawImage.width;
    const y2 = (yc + h/2) / scaledHeight * rawImage.height;

    // Get best class
    const argmax = scores.reduce((maxIndex, currentVal, currentIndex, arr) => currentVal > arr[maxIndex] ? currentIndex : maxIndex, 0);
    const score = scores[argmax];
    if (score < threshold) continue; // Not confident enough

    const label = model.config.id2label[argmax];
    results.push({
        x1, x2, y1, y2, score, label, index: argmax,
    });
}

const iouThreshold = 0.5; // Adjust the threshold as needed
const filteredResults = removeDuplicates(results, iouThreshold);
console.log(filteredResults);

function removeDuplicates(detections, iouThreshold) {
    const filteredDetections = [];

    for (const detection of detections) {
        let isDuplicate = false;
        let duplicateIndex = -1;
        let maxIoU = 0;

        for (let i = 0; i < filteredDetections.length; i++) {
            const filteredDetection = filteredDetections[i];
            const iou = calculateIoU(detection, filteredDetection);
            if (iou > iouThreshold) {
                isDuplicate = true;
                if (iou > maxIoU) {
                    maxIoU = iou;
                    duplicateIndex = i;
                }
            }
        }

        if (!isDuplicate) {
            filteredDetections.push(detection);
        } else if (duplicateIndex !== -1) {
            if (detection.score > filteredDetections[duplicateIndex].score) {
                filteredDetections[duplicateIndex] = detection;
            }
        }
    }

    return filteredDetections;
}

function calculateIoU(detection1, detection2) {
    const xOverlap = Math.max(0, Math.min(detection1.x2, detection2.x2) - Math.max(detection1.x1, detection2.x1));
    const yOverlap = Math.max(0, Math.min(detection1.y2, detection2.y2) - Math.max(detection1.y1, detection2.y1));
    const overlapArea = xOverlap * yOverlap;

    const area1 = (detection1.x2 - detection1.x1) * (detection1.y2 - detection1.y1);
    const area2 = (detection2.x2 - detection2.x1) * (detection2.y2 - detection2.y1);
    const unionArea = area1 + area2 - overlapArea;

    return overlapArea / unionArea;
}

Result

[
    {
        "x1": 54.53195288479328,
        "y1": 170.06781649589539,
        "x2": 95.52642979323865,
        "y2": 186.62115139961244,
        "score": 0.8901662826538086,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 53.96503926515579,
        "y1": 195.67131299972536,
        "x2": 221.8717828631401,
        "y2": 212.6188931465149,
        "score": 0.8967247605323792,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 54.53195288479328,
        "y1": 221.1506155014038,
        "x2": 98.4759178608656,
        "y2": 238.44384784698488,
        "score": 0.8795284032821655,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 55.731045877933504,
        "y1": 338.1506155014038,
        "x2": 103.58089088201523,
        "y2": 355.22782917022704,
        "score": 0.9104153513908386,
        "label": "Section-header",
        "index": 7
    },
    {
        "x1": 54.501348263025285,
        "y1": 452.59601612091063,
        "x2": 144.76493505835532,
        "y2": 469.1547849655152,
        "score": 0.9181555509567261,
        "label": "Section-header",
        "index": 7
    },
    {
        "x1": 54.37510642111301,
        "y1": 568.1918724060059,
        "x2": 73.67877252995967,
        "y2": 584.1619010925293,
        "score": 0.899300754070282,
        "label": "Section-header",
        "index": 7
    },
    {
        "x1": 54.27563991844654,
        "y1": 840.2569072723389,
        "x2": 70.35437833964825,
        "y2": 859.4512378692626,
        "score": 0.6805046796798706,
        "label": "Section-header",
        "index": 7
    },
    {
        "x1": 309.2861147403717,
        "y1": 908.7717830657958,
        "x2": 373.8879840373993,
        "y2": 922.6841892242431,
        "score": 0.8969672918319702,
        "label": "Page-footer",
        "index": 4
    },
    {
        "x1": 311.53335428237915,
        "y1": 10.31740515232086,
        "x2": 607.2475433349609,
        "y2": 33.85392036437988,
        "score": 0.9498511552810669,
        "label": "Page-header",
        "index": 5
    },
    {
        "x1": 56.66784882545471,
        "y1": 289.38916368484496,
        "x2": 416.7734823703766,
        "y2": 306.94164075851444,
        "score": 0.856067419052124,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 56.03344459533691,
        "y1": 309.5055012702942,
        "x2": 317.7232768535614,
        "y2": 325.49175367355343,
        "score": 0.8314194083213806,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 53.00637502670288,
        "y1": 429.9619674682617,
        "x2": 414.61163306236267,
        "y2": 445.95904312133786,
        "score": 0.8927980661392212,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 55.619012689590456,
        "y1": 638.6609138488769,
        "x2": 384.32462439537045,
        "y2": 656.8182655334473,
        "score": 0.9029342532157898,
        "label": "List-item",
        "index": 3
    },
    {
        "x1": 58.06927928924561,
        "y1": 794.932172012329,
        "x2": 520.523375415802,
        "y2": 811.1884700775146,
        "score": 0.9037705063819885,
        "label": "List-item",
        "index": 3
    },
    {
        "x1": 54.25830144882202,
        "y1": 76.01902542114259,
        "x2": 552.8331304550171,
        "y2": 158.67227897644042,
        "score": 0.9725438356399536,
        "label": "Title",
        "index": 10
    },
    {
        "x1": 53.636448097229,
        "y1": 244.93504171371458,
        "x2": 610.1452471733094,
        "y2": 274.8768593788147,
        "score": 0.8954038619995117,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 54.76330833435059,
        "y1": 364.74734601974484,
        "x2": 625.0439935684204,
        "y2": 405.74994478225705,
        "score": 0.7930819988250732,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 55.78299608230591,
        "y1": 480.10940895080563,
        "x2": 623.4623931884765,
        "y2": 556.692225265503,
        "score": 0.9482676982879639,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 52.160629177093504,
        "y1": 593.5841983795166,
        "x2": 609.7405840873719,
        "y2": 635.7749668121338,
        "score": 0.9440742135047913,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 53.12467575073242,
        "y1": 654.1885282516479,
        "x2": 615.2034725189209,
        "y2": 697.286619758606,
        "score": 0.9134702086448669,
        "label": "List-item",
        "index": 3
    },
    {
        "x1": 52.52786092758179,
        "y1": 712.9350305557251,
        "x2": 622.7321027755737,
        "y2": 754.2832815170287,
        "score": 0.9259238243103027,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 56.837522792816166,
        "y1": 758.6981185913086,
        "x2": 607.179635810852,
        "y2": 787.9486541748047,
        "score": 0.9015638828277588,
        "label": "List-item",
        "index": 3
    },
    {
        "x1": 56.57186779975891,
        "y1": 810.8556049346925,
        "x2": 446.48612236976624,
        "y2": 828.0084697723388,
        "score": 0.8806689977645874,
        "label": "List-item",
        "index": 3
    }
]

image/png

Labels

  • Caption
  • Footnote
  • Formula
  • List-item
  • Page-footer
  • Page-header
  • Picture
  • Section-header
  • Table
  • Text
  • Title
Downloads last month
14
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The HF Inference API does not support object-detection models for transformers.js library.

Model tree for Oblix/yolov8x-doclaynet_ONNX

Quantized
(1)
this model