File size: 2,606 Bytes
fbab9a5
 
 
 
 
70288fd
 
 
 
 
fbab9a5
 
70288fd
fbab9a5
 
 
 
70288fd
fbab9a5
 
 
 
 
 
 
 
 
 
70288fd
 
fbab9a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import pinecone
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os
import uuid
from pymongo import MongoClient
from dotenv import load_dotenv
load_dotenv()
FLASH_API = os.getenv("FLASH_API")
PINECONE_API=os.getenv("PINECONE_API")
PINECONE_INDEX=os.getenv("PINECONE_INDEX")
google_embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",  # Correct model name
    google_api_key=FLASH_API  # Your API key
)

# Initialize Pinecone instance
pc = pinecone.Pinecone(
    api_key=PINECONE_API  # Your Pinecone API key
)

MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")

mongo_client = MongoClient(MONGO_URI)
db = mongo_client[DB_NAME]
collection = db[COLLECTION_NAME]
# Define the Pinecone index name (make sure it exists in your Pinecone dashboard)

index = pc.Index(PINECONE_INDEX)


def create_embedding(object_url,tags,categories):
    try:
        document = collection.find_one({'object_url': object_url})
        content = document.get("description")
        file_type = document.get("type")
        mongo_id = str(document.get('_id'))  # Convert ObjectId to string for storage in metadata

        # Generate the embedding
        embedding = google_embeddings.embed_query(content)

        # Generate a unique ID for Pinecone
        pinecone_id = str(uuid.uuid4())

        # Prepare the vector with metadata
        vectors = [{
            'id': pinecone_id,
            'values': embedding,
            'metadata': {
                'description': content,
                "url": object_url,
                "tag": file_type,
                "mongo_id": mongo_id,
                "tags":','.join(tags),
                "categories":','.join(categories)# Store MongoDB ID in metadata
            }
        }]

        # Upsert the vector to Pinecone
        index.upsert(vectors)
        print(f"Inserted: {object_url} in Pinecone with MongoDB ID in metadata")

        # Update MongoDB document with Pinecone ID and success status
        collection.update_one(
            {"_id": document["_id"]},
            {"$set": {
                "pinecone_id": pinecone_id,
                "successfully_embedding_created": True
            }}
        )

        return True
    except Exception as e:
        print(f"Error occurred: {e}")

        # Update MongoDB document with failure status
        collection.update_one(
            {"_id": document["_id"]},
            {"$set": {
                "successfully_embedding_created": False
            }}
        )

        return False