Spaces:

Xenova
/

segment-anything-web

Running

App Files Files Community

Xenova HF Staff commited on Jan 10, 2024

Commit

b21b1c5

verified ·

1 Parent(s): f31a6ca

Upload 4 files

Browse files

Files changed (4) hide show

index.css +116 -0
index.html +39 -18
index.js +295 -0
worker.js +109 -0

index.css ADDED Viewed

	@@ -0,0 +1,116 @@

+* {
+    box-sizing: border-box;
+    padding: 0;
+    margin: 0;
+    font-family: sans-serif;
+}
+html,
+body {
+    height: 100%;
+}
+body {
+    padding: 16px 32px;
+}
+body,
+#container,
+#upload-button {
+    display: flex;
+    flex-direction: column;
+    justify-content: center;
+    align-items: center;
+}
+h1 {
+    text-align: center;
+}
+#container {
+    position: relative;
+    width: 640px;
+    height: 420px;
+    max-width: 100%;
+    max-height: 100%;
+    border: 2px dashed #D1D5DB;
+    border-radius: 0.75rem;
+    overflow: hidden;
+    cursor: pointer;
+    margin-top: 1rem;
+    background-size: 100% 100%;
+    background-position: center;
+    background-repeat: no-repeat;
+}
+#mask-output {
+    position: absolute;
+    width: 100%;
+    height: 100%;
+    pointer-events: none;
+}
+#upload-button {
+    gap: 0.4rem;
+    font-size: 18px;
+    cursor: pointer;
+}
+#upload {
+    display: none;
+}
+svg {
+    pointer-events: none;
+}
+#example {
+    font-size: 14px;
+    text-decoration: underline;
+    cursor: pointer;
+}
+#example:hover {
+    color: #2563EB;
+}
+canvas {
+    position: absolute;
+    width: 100%;
+    height: 100%;
+    opacity: 0.6;
+}
+#status {
+    min-height: 16px;
+    margin: 8px 0;
+}
+.icon {
+    height: 16px;
+    width: 16px;
+    position: absolute;
+    transform: translate(-50%, -50%);
+}
+#controls>button {
+    padding: 6px 12px;
+    background-color: #3498db;
+    color: white;
+    border: 1px solid #2980b9;
+    border-radius: 5px;
+    cursor: pointer;
+    font-size: 16px;
+}
+#controls>button:disabled {
+    background-color: #d1d5db;
+    color: #6b7280;
+    border: 1px solid #9ca3af;
+    cursor: not-allowed;
+}
+#information {
+    margin-top: 0.25rem;
+    font-size: 15px;
+}

index.html CHANGED Viewed

@@ -1,19 +1,40 @@
 <!DOCTYPE html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
-</html>

 <!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8" />
+    <link rel="stylesheet" href="index.css" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Transformers.js - Segment Anything</title>
+</head>
+<body>
+    <h1>Segment Anything w/ 🤗 Transformers.js</h1>
+    <div id="container">
+        <label id="upload-button" for="upload">
+            <svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
+                <path fill="#000"
+                    d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z">
+                </path>
+            </svg>
+            Click to upload image
+            <label id="example">(or try example)</label>
+        </label>
+        <canvas id="mask-output"></canvas>
+    </div>
+    <label id="status"></label>
+    <div id="controls">
+        <button id="reset-image">Reset image</button>
+        <button id="clear-points">Clear points</button>
+        <button id="cut-mask" disabled>Cut mask</button>
+    </div>
+    <p id="information">
+        Left click = positive points, right click = negative points.
+    </p>
+    <input id="upload" type="file" accept="image/*" />
+    <script src="index.js" type="module"></script>
+</body>
+</html>

index.js ADDED Viewed

	@@ -0,0 +1,295 @@

+// Reference the elements we will use
+const statusLabel = document.getElementById('status');
+const fileUpload = document.getElementById('upload');
+const imageContainer = document.getElementById('container');
+const example = document.getElementById('example');
+const maskCanvas = document.getElementById('mask-output');
+const uploadButton = document.getElementById('upload-button');
+const resetButton = document.getElementById('reset-image');
+const clearButton = document.getElementById('clear-points');
+const cutButton = document.getElementById('cut-mask');
+// State variables
+let lastPoints = null;
+let isEncoded = false;
+let isDecoding = false;
+let isMultiMaskMode = false;
+let modelReady = false;
+let imageDataURI = null;
+// Constants
+const BASE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/';
+const EXAMPLE_URL = BASE_URL + 'corgi.jpg';
+// Create a web worker so that the main (UI) thread is not blocked during inference.
+const worker = new Worker('worker.js', {
+    type: 'module',
+});
+// Preload star and cross images to avoid lag on first click
+const star = new Image();
+star.src = BASE_URL + 'star-icon.png';
+star.className = 'icon';
+const cross = new Image();
+cross.src = BASE_URL + 'cross-icon.png';
+cross.className = 'icon';
+// Set up message handler
+worker.addEventListener('message', (e) => {
+    const { type, data } = e.data;
+    if (type === 'ready') {
+        modelReady = true;
+        statusLabel.textContent = 'Ready';
+    } else if (type === 'decode_result') {
+        isDecoding = false;
+        if (!isEncoded) {
+            return; // We are not ready to decode yet
+        }
+        if (!isMultiMaskMode && lastPoints) {
+            // Perform decoding with the last point
+            decode();
+            lastPoints = null;
+        }
+        const { mask, scores } = data;
+        // Update canvas dimensions (if different)
+        if (maskCanvas.width !== mask.width || maskCanvas.height !== mask.height) {
+            maskCanvas.width = mask.width;
+            maskCanvas.height = mask.height;
+        }
+        // Create context and allocate buffer for pixel data
+        const context = maskCanvas.getContext('2d');
+        const imageData = context.createImageData(maskCanvas.width, maskCanvas.height);
+        // Select best mask
+        const numMasks = scores.length; // 3
+        let bestIndex = 0;
+        for (let i = 1; i < numMasks; ++i) {
+            if (scores[i] > scores[bestIndex]) {
+                bestIndex = i;
+            }
+        }
+        statusLabel.textContent = `Segment score: ${scores[bestIndex].toFixed(2)}`;
+        // Fill mask with colour
+        const pixelData = imageData.data;
+        for (let i = 0; i < pixelData.length; ++i) {
+            if (mask.data[numMasks * i + bestIndex] === 1) {
+                const offset = 4 * i;
+                pixelData[offset] = 0;       // red
+                pixelData[offset + 1] = 114; // green
+                pixelData[offset + 2] = 189; // blue
+                pixelData[offset + 3] = 255; // alpha
+            }
+        }
+        // Draw image data to context
+        context.putImageData(imageData, 0, 0);
+    } else if (type === 'segment_result') {
+        if (data === 'start') {
+            statusLabel.textContent = 'Extracting image embedding...';
+        } else {
+            statusLabel.textContent = 'Embedding extracted!';
+            isEncoded = true;
+        }
+    }
+});
+function decode() {
+    isDecoding = true;
+    worker.postMessage({ type: 'decode', data: lastPoints });
+}
+function clearPointsAndMask() {
+    // Reset state
+    isMultiMaskMode = false;
+    lastPoints = null;
+    // Remove points from previous mask (if any)
+    document.querySelectorAll('.icon').forEach(e => e.remove());
+    // Disable cut button
+    cutButton.disabled = true;
+    // Reset mask canvas
+    maskCanvas.getContext('2d').clearRect(0, 0, maskCanvas.width, maskCanvas.height);
+}
+clearButton.addEventListener('click', clearPointsAndMask);
+resetButton.addEventListener('click', () => {
+    // Update state
+    isEncoded = false;
+    imageDataURI = null;
+    // Indicate to worker that we have reset the state
+    worker.postMessage({ type: 'reset' });
+    // Clear points and mask (if present)
+    clearPointsAndMask();
+    // Update UI
+    cutButton.disabled = true;
+    imageContainer.style.backgroundImage = 'none';
+    uploadButton.style.display = 'flex';
+    statusLabel.textContent = 'Ready';
+});
+function segment(data) {
+    // Update state
+    isEncoded = false;
+    if (!modelReady) {
+        statusLabel.textContent = 'Loading model...';
+    }
+    imageDataURI = data;
+    // Update UI
+    imageContainer.style.backgroundImage = `url(${data})`;
+    uploadButton.style.display = 'none';
+    cutButton.disabled = true;
+    // Instruct worker to segment the image
+    worker.postMessage({ type: 'segment', data });
+}
+// Handle file selection
+fileUpload.addEventListener('change', function (e) {
+    const file = e.target.files[0];
+    if (!file) {
+        return;
+    }
+    const reader = new FileReader();
+    // Set up a callback when the file is loaded
+    reader.onload = e2 => segment(e2.target.result);
+    reader.readAsDataURL(file);
+});
+example.addEventListener('click', (e) => {
+    e.preventDefault();
+    segment(EXAMPLE_URL);
+});
+function addIcon({ point, label }) {
+    const icon = (label === 1 ? star : cross).cloneNode();
+    icon.style.left = `${point[0] * 100}%`;
+    icon.style.top = `${point[1] * 100}%`;
+    imageContainer.appendChild(icon);
+}
+// Attach hover event to image container
+imageContainer.addEventListener('mousedown', e => {
+    if (e.button !== 0 && e.button !== 2) {
+        return; // Ignore other buttons
+    }
+    if (!isEncoded) {
+        return; // Ignore if not encoded yet
+    }
+    if (!isMultiMaskMode) {
+        lastPoints = [];
+        isMultiMaskMode = true;
+        cutButton.disabled = false;
+    }
+    const point = getPoint(e);
+    lastPoints.push(point);
+    // add icon
+    addIcon(point);
+    decode();
+});
+// Clamp a value inside a range [min, max]
+function clamp(x, min = 0, max = 1) {
+    return Math.max(Math.min(x, max), min)
+}
+function getPoint(e) {
+    // Get bounding box
+    const bb = imageContainer.getBoundingClientRect();
+    // Get the mouse coordinates relative to the container
+    const mouseX = clamp((e.clientX - bb.left) / bb.width);
+    const mouseY = clamp((e.clientY - bb.top) / bb.height);
+    return {
+        point: [mouseX, mouseY],
+        label: e.button === 2 // right click
+            ? 0  // negative prompt
+            : 1, // positive prompt
+    }
+}
+// Do not show context menu on right click
+imageContainer.addEventListener('contextmenu', e => {
+    e.preventDefault();
+});
+// Attach hover event to image container
+imageContainer.addEventListener('mousemove', e => {
+    if (!isEncoded || isMultiMaskMode) {
+        // Ignore mousemove events if the image is not encoded yet,
+        // or we are in multi-mask mode
+        return;
+    }
+    lastPoints = [getPoint(e)];
+    if (!isDecoding) {
+        decode(); // Only decode if we are not already decoding
+    }
+});
+// Handle cut button click
+cutButton.addEventListener('click', () => {
+    const [w, h] = [maskCanvas.width, maskCanvas.height];
+    // Get the mask pixel data
+    const maskContext = maskCanvas.getContext('2d');
+    const maskPixelData = maskContext.getImageData(0, 0, w, h);
+    // Load the image
+    const image = new Image();
+    image.crossOrigin = 'anonymous';
+    image.onload = async () => {
+        // Create a new canvas to hold the image
+        const imageCanvas = new OffscreenCanvas(w, h);
+        const imageContext = imageCanvas.getContext('2d');
+        imageContext.drawImage(image, 0, 0, w, h);
+        const imagePixelData = imageContext.getImageData(0, 0, w, h);
+        // Create a new canvas to hold the cut-out
+        const cutCanvas = new OffscreenCanvas(w, h);
+        const cutContext = cutCanvas.getContext('2d');
+        const cutPixelData = cutContext.getImageData(0, 0, w, h);
+        // Copy the image pixel data to the cut canvas
+        for (let i = 3; i < maskPixelData.data.length; i += 4) {
+            if (maskPixelData.data[i] > 0) {
+                for (let j = 0; j < 4; ++j) {
+                    const offset = i - j;
+                    cutPixelData.data[offset] = imagePixelData.data[offset];
+                }
+            }
+        }
+        cutContext.putImageData(cutPixelData, 0, 0);
+        // Download image
+        const link = document.createElement('a');
+        link.download = 'image.png';
+        link.href = URL.createObjectURL(await cutCanvas.convertToBlob());
+        link.click();
+        link.remove();
+    }
+    image.src = imageDataURI;
+});

worker.js ADDED Viewed

	@@ -0,0 +1,109 @@

+import { env, SamModel, AutoProcessor, RawImage, Tensor } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
+// Since we will download the model from the Hugging Face Hub, we can skip the local model check
+env.allowLocalModels = false;
+// We adopt the singleton pattern to enable lazy-loading of the model and processor.
+export class SegmentAnythingSingleton {
+    static model_id = 'Xenova/slimsam-77-uniform';
+    static model;
+    static processor;
+    static quantized = true;
+    static getInstance() {
+        if (!this.model) {
+            this.model = SamModel.from_pretrained(this.model_id, {
+                quantized: this.quantized,
+            });
+        }
+        if (!this.processor) {
+            this.processor = AutoProcessor.from_pretrained(this.model_id);
+        }
+        return Promise.all([this.model, this.processor]);
+    }
+}
+// State variables
+let image_embeddings = null;
+let image_inputs = null;
+let ready = false;
+self.onmessage = async (e) => {
+    const [model, processor] = await SegmentAnythingSingleton.getInstance();
+    if (!ready) {
+        // Indicate that we are ready to accept requests
+        ready = true;
+        self.postMessage({
+            type: 'ready',
+        });
+    }
+    const { type, data } = e.data;
+    if (type === 'reset') {
+        image_inputs = null;
+        image_embeddings = null;
+    } else if (type === 'segment') {
+        // Indicate that we are starting to segment the image
+        self.postMessage({
+            type: 'segment_result',
+            data: 'start',
+        });
+        // Read the image and recompute image embeddings
+        const image = await RawImage.read(e.data.data);
+        image_inputs = await processor(image);
+        image_embeddings = await model.get_image_embeddings(image_inputs)
+        // Indicate that we have computed the image embeddings, and we are ready to accept decoding requests
+        self.postMessage({
+            type: 'segment_result',
+            data: 'done',
+        });
+    } else if (type === 'decode') {
+        // Prepare inputs for decoding
+        const reshaped = image_inputs.reshaped_input_sizes[0];
+        const points = data.map(x => [x.point[0] * reshaped[1], x.point[1] * reshaped[0]])
+        const labels = data.map(x => BigInt(x.label));
+        const input_points = new Tensor(
+            'float32',
+            points.flat(Infinity),
+            [1, 1, points.length, 2],
+        )
+        const input_labels = new Tensor(
+            'int64',
+            labels.flat(Infinity),
+            [1, 1, labels.length],
+        )
+        // Generate the mask
+        const outputs = await model({
+            ...image_embeddings,
+            input_points,
+            input_labels,
+        })
+        // Post-process the mask
+        const masks = await processor.post_process_masks(
+            outputs.pred_masks,
+            image_inputs.original_sizes,
+            image_inputs.reshaped_input_sizes,
+        );
+        // Send the result back to the main thread
+        self.postMessage({
+            type: 'decode_result',
+            data: {
+                mask: RawImage.fromTensor(masks[0][0]),
+                scores: outputs.iou_scores.data,
+            },
+        });
+    } else {
+        throw new Error(`Unknown message type: ${type}`);
+    }
+}