File size: 3,122 Bytes
c55df02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Quick script to generate embeddings for existing documents
import fs from 'fs';

async function generateEmbeddings() {
  // Document contents to generate embeddings for
  const documents = [
    {
      id: 1,
      title: "Attention Is All You Need",
      content: "The Transformer, a model architecture eschewing recurrence and instead relying entirely on an attention mechanism to draw global dependencies between input and output. The Transformer allows for significantly more parallelization and can reach a new state of the art in translation quality."
    },
    {
      id: 2, 
      title: "GPT-4 Technical Report",
      content: "We report the development of GPT-4, a large-scale, multimodal model which can accept image and text inputs and produce text outputs. While less capable than humans in many real-world scenarios, GPT-4 exhibits human-level performance on various professional and academic benchmarks."
    },
    {
      id: 3,
      title: "Constitutional AI",
      content: "As AI systems become more capable, we would like to enlist their help to supervise other AI systems. We experiment with methods for training a harmless AI assistant through self-improvement, without any human labels identifying harmful outputs."
    },
    {
      id: 4,
      title: "Retrieval-Augmented Generation",
      content: "Large pre-trained language models have been shown to store factual knowledge in their parameters, and achieve state-of-the-art results when fine-tuned on downstream NLP tasks. However, their ability to access and precisely manipulate knowledge is still limited."
    }
  ];

  console.log('Generating embeddings for documents...');
  
  for (const doc of documents) {
    try {
      console.log(`Processing document ${doc.id}: ${doc.title}`);
      
      // Generate embedding
      const response = await fetch('http://localhost:5000/api/embeddings', {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ input: doc.content })
      });
      
      if (response.ok) {
        const result = await response.json();
        console.log(`βœ… Generated embedding for ${doc.title} (${result.data[0].embedding.length} dimensions)`);
        
        // Note: In a real implementation, you would update the database here
        // For now, just log success
      } else {
        console.log(`❌ Failed to generate embedding for ${doc.title}`);
      }
      
      // Small delay to avoid overwhelming the API
      await new Promise(resolve => setTimeout(resolve, 1000));
      
    } catch (error) {
      console.log(`❌ Error processing ${doc.title}: ${error.message}`);
    }
  }
  
  console.log('βœ… Embedding generation completed!');
  console.log('\nπŸ” Now you can test vector search with these queries:');
  console.log('- "attention mechanism transformer architecture"');
  console.log('- "multimodal language model GPT"');
  console.log('- "constitutional AI safety alignment"');
  console.log('- "retrieval augmented generation knowledge"');
}

// Run the function
generateEmbeddings().catch(console.error);