Spaces:
Runtime error
Runtime error
import { pipeline, env } from '@xenova/transformers'; | |
// Specify a custom location for models (defaults to '/models/'). | |
env.localModelPath = '/models/'; | |
// Disable the loading of remote models from the Hugging Face Hub: | |
env.allowRemoteModels = false; | |
// cache the model in the .cache directory in the current working directory | |
env.cacheDir = './.cache'; | |
// Use the Singleton pattern to enable lazy construction of the pipeline. | |
class SemanticSimilarityPipeline { | |
static task = 'feature-extraction'; // follow model's task | |
static model = 'Xenova/multi-qa-MiniLM-L6-cos-v1'; //https://huggingface.co/Xenova/multi-qa-MiniLM-L6-cos-v1 | |
static instance = null; | |
static async getInstance(progress_callback = null) { | |
if (this.instance === null) { | |
this.instance = pipeline(this.task, this.model, { progress_callback }); | |
} | |
return this.instance; | |
} | |
} | |
function dotProduct(a, b) { | |
if (a.length !== b.length) { | |
throw new Error('Both arguments must have the same length'); | |
} | |
let result = 0; | |
for (let i = 0; i < a.length; i++) { | |
result += a[i] * b[i] | |
} | |
return result.toFixed(3); | |
} | |
// Listen for messages from the main thread | |
self.addEventListener('message', async (event) => { | |
// Retrieve the similarity pipeline. When called for the first time, | |
// this will load the pipeline and save it for future use. | |
let extractor = await SemanticSimilarityPipeline.getInstance(x => { | |
// We also add a progress callback to the pipeline so that we can track model loading. | |
self.postMessage(x); | |
}); | |
/* let extractor = await pipeline('feature-extraction', 'Xenova/multi-qa-MiniLM-L6-cos-v1', { | |
progress_callback: x => { | |
self.postMessage(x); | |
} | |
}); */ | |
// Actually compute the similarity | |
let ref_embeddings = await extractor(event.data.refsent, { | |
pooling: 'mean', | |
normalize: true | |
}); | |
console.log(ref_embeddings); | |
let cand1_embeddings = await extractor(event.data.cand1, { | |
pooling: 'mean', | |
normalize: true | |
}); | |
let cand2_embeddings = await extractor(event.data.cand2, { | |
pooling: 'mean', | |
normalize: true | |
}); | |
let cand3_embeddings = await extractor(event.data.cand3, { | |
pooling: 'mean', | |
normalize: true | |
}); | |
let output_1 = dotProduct(ref_embeddings.data, cand1_embeddings.data); | |
let output_2 = dotProduct(ref_embeddings.data, cand2_embeddings.data); | |
let output_3 = dotProduct(ref_embeddings.data, cand3_embeddings.data); | |
// Send the output back to the main thread | |
self.postMessage({ | |
status: 'update_1', | |
output: output_1 | |
}); | |
self.postMessage({ | |
status: 'update_2', | |
output: output_2 | |
}); | |
self.postMessage({ | |
status: 'update_3', | |
output: output_3 | |
}); | |
self.postMessage({ | |
status: 'complete', | |
}); | |
}); | |
/** | |
* This JavaScript code is essentially using a machine learning model to perform semantic similarity tasks on some | |
* input data. It uses the library "@xenova/transformers" which is a JavaScript implementation similar to Hugging | |
* Face Transformers. It uses a specific model from Hugging Face ('Xenova/multi-qa-MiniLM-L6-cos-v1') to perform the | |
* task. | |
Here is a breakdown of the script: | |
1. **SemanticSimilarityPipeline class**: This is a singleton class, meaning it restricts the instantiation of a class | |
to a single instance. It will create an instance of the pipeline using the specified task and model only if an | |
instance does not already exist. This is done through the `getInstance` method. This method takes an optional | |
progress_callback function, which would be called to report the progress of model loading. | |
2. **The `message` event listener**: This is listening for messages sent from the main thread. The event object it | |
receives contains the data for computing the semantic similarity. When a message is received, it retrieves the | |
singleton instance of the SemanticSimilarityPipeline, performs the computation, and then sends the result back to | |
the main thread using `postMessage`. | |
3. **The `extractor` function call**: This uses the instance of the SemanticSimilarityPipeline to compute semantic | |
similarity. It takes two arguments. The first one is a reference sentence (`event.data.refsent`). The second argument | |
is an object that includes the candidate sentences (`cand1`, `cand2`, and `cand3`) and a callback function. This | |
callback function is called after computing the similarity for each candidate sentence and it sends partial output | |
back to the main thread. | |
4. **Sending result back to the main thread**: After the semantic similarity computation is complete, the output is | |
sent back to the main thread using `postMessage`. | |
*/ |