Spaces:
Sleeping
Sleeping
| import pinecone | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| import os | |
| import uuid | |
| from pymongo import MongoClient | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| FLASH_API = os.getenv("FLASH_API") | |
| PINECONE_API=os.getenv("PINECONE_API") | |
| PINECONE_INDEX=os.getenv("PINECONE_INDEX") | |
| google_embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/embedding-001", # Correct model name | |
| google_api_key=FLASH_API # Your API key | |
| ) | |
| # Initialize Pinecone instance | |
| pc = pinecone.Pinecone( | |
| api_key=PINECONE_API # Your Pinecone API key | |
| ) | |
| MONGO_URI = os.getenv("MONGO_URI") | |
| DB_NAME = os.getenv("DB_NAME") | |
| COLLECTION_NAME = os.getenv("COLLECTION_NAME") | |
| mongo_client = MongoClient(MONGO_URI) | |
| db = mongo_client[DB_NAME] | |
| collection = db[COLLECTION_NAME] | |
| # Define the Pinecone index name (make sure it exists in your Pinecone dashboard) | |
| index = pc.Index(PINECONE_INDEX) | |
| def create_embedding(object_url,tags,categories): | |
| try: | |
| document = collection.find_one({'object_url': object_url}) | |
| content = document.get("description") | |
| file_type = document.get("type") | |
| mongo_id = str(document.get('_id')) # Convert ObjectId to string for storage in metadata | |
| # Generate the embedding | |
| embedding = google_embeddings.embed_query(content) | |
| # Generate a unique ID for Pinecone | |
| pinecone_id = str(uuid.uuid4()) | |
| # Prepare the vector with metadata | |
| vectors = [{ | |
| 'id': pinecone_id, | |
| 'values': embedding, | |
| 'metadata': { | |
| 'description': content, | |
| "url": object_url, | |
| "tag": file_type, | |
| "mongo_id": mongo_id, | |
| "tags":','.join(tags), | |
| "categories":','.join(categories)# Store MongoDB ID in metadata | |
| } | |
| }] | |
| # Upsert the vector to Pinecone | |
| index.upsert(vectors) | |
| print(f"Inserted: {object_url} in Pinecone with MongoDB ID in metadata") | |
| # Update MongoDB document with Pinecone ID and success status | |
| collection.update_one( | |
| {"_id": document["_id"]}, | |
| {"$set": { | |
| "pinecone_id": pinecone_id, | |
| "successfully_embedding_created": True | |
| }} | |
| ) | |
| return True | |
| except Exception as e: | |
| print(f"Error occurred: {e}") | |
| # Update MongoDB document with failure status | |
| collection.update_one( | |
| {"_id": document["_id"]}, | |
| {"$set": { | |
| "successfully_embedding_created": False | |
| }} | |
| ) | |
| return False | |