Spaces:
Sleeping
Sleeping
import pinecone | |
from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
import os | |
import uuid | |
from pymongo import MongoClient | |
from dotenv import load_dotenv | |
load_dotenv() | |
FLASH_API = os.getenv("FLASH_API") | |
PINECONE_API=os.getenv("PINECONE_API") | |
PINECONE_INDEX=os.getenv("PINECONE_INDEX") | |
google_embeddings = GoogleGenerativeAIEmbeddings( | |
model="models/embedding-001", # Correct model name | |
google_api_key=FLASH_API # Your API key | |
) | |
# Initialize Pinecone instance | |
pc = pinecone.Pinecone( | |
api_key=PINECONE_API # Your Pinecone API key | |
) | |
MONGO_URI = os.getenv("MONGO_URI") | |
DB_NAME = os.getenv("DB_NAME") | |
COLLECTION_NAME = os.getenv("COLLECTION_NAME") | |
mongo_client = MongoClient(MONGO_URI) | |
db = mongo_client[DB_NAME] | |
collection = db[COLLECTION_NAME] | |
# Define the Pinecone index name (make sure it exists in your Pinecone dashboard) | |
index = pc.Index(PINECONE_INDEX) | |
def create_embedding(object_url,tags,categories): | |
try: | |
document = collection.find_one({'object_url': object_url}) | |
content = document.get("description") | |
file_type = document.get("type") | |
mongo_id = str(document.get('_id')) # Convert ObjectId to string for storage in metadata | |
# Generate the embedding | |
embedding = google_embeddings.embed_query(content) | |
# Generate a unique ID for Pinecone | |
pinecone_id = str(uuid.uuid4()) | |
# Prepare the vector with metadata | |
vectors = [{ | |
'id': pinecone_id, | |
'values': embedding, | |
'metadata': { | |
'description': content, | |
"url": object_url, | |
"tag": file_type, | |
"mongo_id": mongo_id, | |
"tags":','.join(tags), | |
"categories":','.join(categories)# Store MongoDB ID in metadata | |
} | |
}] | |
# Upsert the vector to Pinecone | |
index.upsert(vectors) | |
print(f"Inserted: {object_url} in Pinecone with MongoDB ID in metadata") | |
# Update MongoDB document with Pinecone ID and success status | |
collection.update_one( | |
{"_id": document["_id"]}, | |
{"$set": { | |
"pinecone_id": pinecone_id, | |
"successfully_embedding_created": True | |
}} | |
) | |
return True | |
except Exception as e: | |
print(f"Error occurred: {e}") | |
# Update MongoDB document with failure status | |
collection.update_one( | |
{"_id": document["_id"]}, | |
{"$set": { | |
"successfully_embedding_created": False | |
}} | |
) | |
return False | |