Spaces:

AnthoneoJ
/

semantic-similarity

Runtime error

App Files Files Community

semantic-similarity / src /worker.js

AnthoneoJ's picture

Upload 14 files

cac59f2 about 2 years ago

history blame contribute delete

4.8 kB

	import { pipeline, env } from '@xenova/transformers';

	// Specify a custom location for models (defaults to '/models/').
	env.localModelPath = '/models/';

	// Disable the loading of remote models from the Hugging Face Hub:
	env.allowRemoteModels = false;

	// cache the model in the .cache directory in the current working directory
	env.cacheDir = './.cache';

	// Use the Singleton pattern to enable lazy construction of the pipeline.
	class SemanticSimilarityPipeline {
	static task = 'feature-extraction'; // follow model's task
	static model = 'Xenova/multi-qa-MiniLM-L6-cos-v1'; //https://huggingface.co/Xenova/multi-qa-MiniLM-L6-cos-v1
	static instance = null;

	static async getInstance(progress_callback = null) {
	if (this.instance === null) {
	this.instance = pipeline(this.task, this.model, { progress_callback });
	}

	return this.instance;
	}
	}

	function dotProduct(a, b) {
	if (a.length !== b.length) {
	throw new Error('Both arguments must have the same length');
	}
	let result = 0;
	for (let i = 0; i < a.length; i++) {
	result += a[i] * b[i]
	}
	return result.toFixed(3);
	}

	// Listen for messages from the main thread
	self.addEventListener('message', async (event) => {
	// Retrieve the similarity pipeline. When called for the first time,
	// this will load the pipeline and save it for future use.
	let extractor = await SemanticSimilarityPipeline.getInstance(x => {
	// We also add a progress callback to the pipeline so that we can track model loading.
	self.postMessage(x);
	});
	/* let extractor = await pipeline('feature-extraction', 'Xenova/multi-qa-MiniLM-L6-cos-v1', {
	progress_callback: x => {
	self.postMessage(x);
	}
	}); */

	// Actually compute the similarity
	let ref_embeddings = await extractor(event.data.refsent, {
	pooling: 'mean',
	normalize: true
	});
	console.log(ref_embeddings);
	let cand1_embeddings = await extractor(event.data.cand1, {
	pooling: 'mean',
	normalize: true
	});
	let cand2_embeddings = await extractor(event.data.cand2, {
	pooling: 'mean',
	normalize: true
	});
	let cand3_embeddings = await extractor(event.data.cand3, {
	pooling: 'mean',
	normalize: true
	});
	let output_1 = dotProduct(ref_embeddings.data, cand1_embeddings.data);
	let output_2 = dotProduct(ref_embeddings.data, cand2_embeddings.data);
	let output_3 = dotProduct(ref_embeddings.data, cand3_embeddings.data);


	// Send the output back to the main thread
	self.postMessage({
	status: 'update_1',
	output: output_1
	});
	self.postMessage({
	status: 'update_2',
	output: output_2
	});
	self.postMessage({
	status: 'update_3',
	output: output_3
	});
	self.postMessage({
	status: 'complete',
	});
	});

	/**
	* This JavaScript code is essentially using a machine learning model to perform semantic similarity tasks on some
	* input data. It uses the library "@xenova/transformers" which is a JavaScript implementation similar to Hugging
	* Face Transformers. It uses a specific model from Hugging Face ('Xenova/multi-qa-MiniLM-L6-cos-v1') to perform the
	* task.

	Here is a breakdown of the script:

	1. SemanticSimilarityPipeline class: This is a singleton class, meaning it restricts the instantiation of a class
	to a single instance. It will create an instance of the pipeline using the specified task and model only if an
	instance does not already exist. This is done through the `getInstance` method. This method takes an optional
	progress_callback function, which would be called to report the progress of model loading.

	2. The `message` event listener: This is listening for messages sent from the main thread. The event object it
	receives contains the data for computing the semantic similarity. When a message is received, it retrieves the
	singleton instance of the SemanticSimilarityPipeline, performs the computation, and then sends the result back to
	the main thread using `postMessage`.

	3. The `extractor` function call: This uses the instance of the SemanticSimilarityPipeline to compute semantic
	similarity. It takes two arguments. The first one is a reference sentence (`event.data.refsent`). The second argument
	is an object that includes the candidate sentences (`cand1`, `cand2`, and `cand3`) and a callback function. This
	callback function is called after computing the similarity for each candidate sentence and it sends partial output
	back to the main thread.

	4. Sending result back to the main thread: After the semantic similarity computation is complete, the output is
	sent back to the main thread using `postMessage`.

	*/