import os import numpy as np from google.cloud import aiplatform # --- Configuration --- PROJECT_ID = "your-gcp-project-id" # Replace with your Google Cloud project ID REGION = "your-gcp-region" # Replace with your Google Cloud region (e.g., "us-central1") EMBEDDING_MODEL_NAME = "textembedding-gecko@004" # Or the latest version # --- Initialize Vertex AI --- aiplatform.init(project=PROJECT_ID, location=REGION) class GoogleTextEmbedding004: def __init__(self, model_name=EMBEDDING_MODEL_NAME): self.model_name = model_name self.embedding_model = aiplatform.TextEmbeddingModel.from_pretrained(self.model_name) print(f"[INFO] Loaded Google Text Embedding Model: {self.model_name}") def encode(self, texts): """ Generates embeddings for the given list of texts using the Google Text Embedding API. Args: texts (list of str): A list of strings to embed. Returns: numpy.ndarray: A 2D numpy array where each row represents the embedding for the corresponding input text. """ embeddings_response = self.embedding_model.get_embeddings(texts) embeddings = np.array([e.values for e in embeddings_response]) return embeddings if __name__ == "__main__": # Example Usage google_embedder = GoogleTextEmbedding004() sentences = [ "This is a sample sentence for embedding.", "Another example text to generate a vector for.", "Google's powerful language models." ] embeddings = google_embedder.encode(sentences) print("Embeddings shape:", embeddings.shape) print("Embeddings (first 2 vectors):\n", embeddings[:2])