Spaces:
Duplicated from Xenova/segment-anything-web

bestofaiml
/

mask-generation

Running

App Files Files Community

mask-generation / worker.js

Xenova's picture

Xenova HF staff

Upload 4 files

b21b1c5 verified 10 months ago

3.33 kB

	import { env, SamModel, AutoProcessor, RawImage, Tensor } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';

	// Since we will download the model from the Hugging Face Hub, we can skip the local model check
	env.allowLocalModels = false;

	// We adopt the singleton pattern to enable lazy-loading of the model and processor.
	export class SegmentAnythingSingleton {
	static model_id = 'Xenova/slimsam-77-uniform';
	static model;
	static processor;
	static quantized = true;

	static getInstance() {
	if (!this.model) {
	this.model = SamModel.from_pretrained(this.model_id, {
	quantized: this.quantized,
	});
	}
	if (!this.processor) {
	this.processor = AutoProcessor.from_pretrained(this.model_id);
	}

	return Promise.all([this.model, this.processor]);
	}
	}


	// State variables
	let image_embeddings = null;
	let image_inputs = null;
	let ready = false;

	self.onmessage = async (e) => {
	const [model, processor] = await SegmentAnythingSingleton.getInstance();
	if (!ready) {
	// Indicate that we are ready to accept requests
	ready = true;
	self.postMessage({
	type: 'ready',
	});
	}

	const { type, data } = e.data;
	if (type === 'reset') {
	image_inputs = null;
	image_embeddings = null;

	} else if (type === 'segment') {
	// Indicate that we are starting to segment the image
	self.postMessage({
	type: 'segment_result',
	data: 'start',
	});

	// Read the image and recompute image embeddings
	const image = await RawImage.read(e.data.data);
	image_inputs = await processor(image);
	image_embeddings = await model.get_image_embeddings(image_inputs)

	// Indicate that we have computed the image embeddings, and we are ready to accept decoding requests
	self.postMessage({
	type: 'segment_result',
	data: 'done',
	});

	} else if (type === 'decode') {
	// Prepare inputs for decoding
	const reshaped = image_inputs.reshaped_input_sizes[0];
	const points = data.map(x => [x.point[0] * reshaped[1], x.point[1] * reshaped[0]])
	const labels = data.map(x => BigInt(x.label));

	const input_points = new Tensor(
	'float32',
	points.flat(Infinity),
	[1, 1, points.length, 2],
	)
	const input_labels = new Tensor(
	'int64',
	labels.flat(Infinity),
	[1, 1, labels.length],
	)

	// Generate the mask
	const outputs = await model({
	...image_embeddings,
	input_points,
	input_labels,
	})

	// Post-process the mask
	const masks = await processor.post_process_masks(
	outputs.pred_masks,
	image_inputs.original_sizes,
	image_inputs.reshaped_input_sizes,
	);

	// Send the result back to the main thread
	self.postMessage({
	type: 'decode_result',
	data: {
	mask: RawImage.fromTensor(masks[0][0]),
	scores: outputs.iou_scores.data,
	},
	});

	} else {
	throw new Error(`Unknown message type: ${type}`);
	}
	}