Bedrock Titan Text Embeddings v2

This repository contains the MTEB scores and usage examples of Bedrock Titan Text Embeddings v2. You can use the embedding model either via the Bedrock InvokeModel API or via Bedrock's batch jobs. For RAG use cases we recommend the former to embed queries during search (latency optimized) and the latter to index corpus (throughput optimized).

Using Bedrock's InvokeModel API

import json
import boto3
class TitanEmbeddings(object):
    accept = "application/json"
    content_type = "application/json"
    
    def __init__(self, model_id="amazon.titan-embed-text-v2:0"):
        self.bedrock = boto3.client(service_name='bedrock-runtime')
        self.model_id = model_id
    def __call__(self, text, dimensions, normalize=True):
        """
        Returns Titan Embeddings
        Args:
            text (str): text to embed
            dimensions (int): Number of output dimensions.
            normalize (bool): Whether to return the normalized embedding or not.
        Return:
            List[float]: Embedding
            
        """
        body = json.dumps({
            "inputText": text,
            "dimensions": dimensions,
            "normalize": normalize
        })
        response = self.bedrock.invoke_model(
            body=body, modelId=self.model_id, accept=self.accept, contentType=self.content_type
        )
        response_body = json.loads(response.get('body').read())
        return response_body['embedding']

if __name__ == '__main__':
    """
    Entrypoint for Amazon Titan Embeddings V2 - Text example.
    """
    dimensions = 1024
    normalize = True
    
    titan_embeddings_v2 = TitanEmbeddings(model_id="amazon.titan-embed-text-v2:0")

    input_text = "What are the different services that you offer?"
    embedding = titan_embeddings_v2(input_text, dimensions, normalize)
    
    print(f"{input_text=}")
    print(f"{embedding[:10]=}")

Using Bedrock's batch jobs

import requests
from aws_requests_auth.boto_utils import BotoAWSRequestsAuth

region = "us-east-1"
base_uri = f"bedrock.{region}.amazonaws.com"
batch_job_uri = f"https://{base_uri}/model-invocation-job/"

# For details on how to set up an IAM role for batch inference, see
# https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference-permissions.html
role_arn = "arn:aws:iam::111122223333:role/my-batch-inference-role"

payload = {
   "inputDataConfig": { 
        "s3InputDataConfig": { 
            "s3Uri": "s3://my-input-bucket/batch-input/",
            "s3InputFormat": "JSONL"
        }
   },
   "jobName": "embeddings-v2-batch-job",
   "modelId": "amazon.titan-embed-text-v2:0",
   "outputDataConfig": {
        "s3OutputDataConfig": {
            "s3Uri": "s3://my-output-bucket/batch-output/"
        }
   },
   "roleArn": role_arn
}

request_auth = BotoAWSRequestsAuth(
    aws_host=base_uri,
    aws_region=region,
    aws_service="bedrock"
)


response= requests.request("POST", batch_job_uri, json=payload, auth=request_auth)
print(response.json())
Downloads last month
3,132
Inference Examples
Inference API (serverless) has been turned off for this model.

Evaluation results