AnthoneoJ's picture
Upload 14 files
cac59f2
import { pipeline, env } from '@xenova/transformers';
// Specify a custom location for models (defaults to '/models/').
env.localModelPath = '/models/';
// Disable the loading of remote models from the Hugging Face Hub:
env.allowRemoteModels = false;
// cache the model in the .cache directory in the current working directory
env.cacheDir = './.cache';
// Use the Singleton pattern to enable lazy construction of the pipeline.
class SemanticSimilarityPipeline {
static task = 'feature-extraction'; // follow model's task
static model = 'Xenova/multi-qa-MiniLM-L6-cos-v1'; //https://huggingface.co/Xenova/multi-qa-MiniLM-L6-cos-v1
static instance = null;
static async getInstance(progress_callback = null) {
if (this.instance === null) {
this.instance = pipeline(this.task, this.model, { progress_callback });
}
return this.instance;
}
}
function dotProduct(a, b) {
if (a.length !== b.length) {
throw new Error('Both arguments must have the same length');
}
let result = 0;
for (let i = 0; i < a.length; i++) {
result += a[i] * b[i]
}
return result.toFixed(3);
}
// Listen for messages from the main thread
self.addEventListener('message', async (event) => {
// Retrieve the similarity pipeline. When called for the first time,
// this will load the pipeline and save it for future use.
let extractor = await SemanticSimilarityPipeline.getInstance(x => {
// We also add a progress callback to the pipeline so that we can track model loading.
self.postMessage(x);
});
/* let extractor = await pipeline('feature-extraction', 'Xenova/multi-qa-MiniLM-L6-cos-v1', {
progress_callback: x => {
self.postMessage(x);
}
}); */
// Actually compute the similarity
let ref_embeddings = await extractor(event.data.refsent, {
pooling: 'mean',
normalize: true
});
console.log(ref_embeddings);
let cand1_embeddings = await extractor(event.data.cand1, {
pooling: 'mean',
normalize: true
});
let cand2_embeddings = await extractor(event.data.cand2, {
pooling: 'mean',
normalize: true
});
let cand3_embeddings = await extractor(event.data.cand3, {
pooling: 'mean',
normalize: true
});
let output_1 = dotProduct(ref_embeddings.data, cand1_embeddings.data);
let output_2 = dotProduct(ref_embeddings.data, cand2_embeddings.data);
let output_3 = dotProduct(ref_embeddings.data, cand3_embeddings.data);
// Send the output back to the main thread
self.postMessage({
status: 'update_1',
output: output_1
});
self.postMessage({
status: 'update_2',
output: output_2
});
self.postMessage({
status: 'update_3',
output: output_3
});
self.postMessage({
status: 'complete',
});
});
/**
* This JavaScript code is essentially using a machine learning model to perform semantic similarity tasks on some
* input data. It uses the library "@xenova/transformers" which is a JavaScript implementation similar to Hugging
* Face Transformers. It uses a specific model from Hugging Face ('Xenova/multi-qa-MiniLM-L6-cos-v1') to perform the
* task.
Here is a breakdown of the script:
1. **SemanticSimilarityPipeline class**: This is a singleton class, meaning it restricts the instantiation of a class
to a single instance. It will create an instance of the pipeline using the specified task and model only if an
instance does not already exist. This is done through the `getInstance` method. This method takes an optional
progress_callback function, which would be called to report the progress of model loading.
2. **The `message` event listener**: This is listening for messages sent from the main thread. The event object it
receives contains the data for computing the semantic similarity. When a message is received, it retrieves the
singleton instance of the SemanticSimilarityPipeline, performs the computation, and then sends the result back to
the main thread using `postMessage`.
3. **The `extractor` function call**: This uses the instance of the SemanticSimilarityPipeline to compute semantic
similarity. It takes two arguments. The first one is a reference sentence (`event.data.refsent`). The second argument
is an object that includes the candidate sentences (`cand1`, `cand2`, and `cand3`) and a callback function. This
callback function is called after computing the similarity for each candidate sentence and it sends partial output
back to the main thread.
4. **Sending result back to the main thread**: After the semantic similarity computation is complete, the output is
sent back to the main thread using `postMessage`.
*/