File size: 3,122 Bytes
c55df02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
// Quick script to generate embeddings for existing documents
import fs from 'fs';
async function generateEmbeddings() {
// Document contents to generate embeddings for
const documents = [
{
id: 1,
title: "Attention Is All You Need",
content: "The Transformer, a model architecture eschewing recurrence and instead relying entirely on an attention mechanism to draw global dependencies between input and output. The Transformer allows for significantly more parallelization and can reach a new state of the art in translation quality."
},
{
id: 2,
title: "GPT-4 Technical Report",
content: "We report the development of GPT-4, a large-scale, multimodal model which can accept image and text inputs and produce text outputs. While less capable than humans in many real-world scenarios, GPT-4 exhibits human-level performance on various professional and academic benchmarks."
},
{
id: 3,
title: "Constitutional AI",
content: "As AI systems become more capable, we would like to enlist their help to supervise other AI systems. We experiment with methods for training a harmless AI assistant through self-improvement, without any human labels identifying harmful outputs."
},
{
id: 4,
title: "Retrieval-Augmented Generation",
content: "Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely manipulate knowledge is still limited."
}
];
console.log('Generating embeddings for documents...');
for (const doc of documents) {
try {
console.log(`Processing document ${doc.id}: ${doc.title}`);
// Generate embedding
const response = await fetch('http://localhost:5000/api/embeddings', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ input: doc.content })
});
if (response.ok) {
const result = await response.json();
console.log(`β
Generated embedding for ${doc.title} (${result.data[0].embedding.length} dimensions)`);
// Note: In a real implementation, you would update the database here
// For now, just log success
} else {
console.log(`β Failed to generate embedding for ${doc.title}`);
}
// Small delay to avoid overwhelming the API
await new Promise(resolve => setTimeout(resolve, 1000));
} catch (error) {
console.log(`β Error processing ${doc.title}: ${error.message}`);
}
}
console.log('β
Embedding generation completed!');
console.log('\nπ Now you can test vector search with these queries:');
console.log('- "attention mechanism transformer architecture"');
console.log('- "multimodal language model GPT"');
console.log('- "constitutional AI safety alignment"');
console.log('- "retrieval augmented generation knowledge"');
}
// Run the function
generateEmbeddings().catch(console.error); |