Spaces:
Sleeping
Sleeping
""" | |
utils.py | |
""" | |
# Standard imports | |
import os | |
from typing import List | |
# Third party imports | |
import numpy as np | |
from openai import OpenAI | |
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) | |
# Maximum tokens for text-embedding-3-large | |
MAX_TOKENS = 8191 # We don't have access to the tokenizer for text-embedding-3-large, and just assume 1 character = 1 token here | |
def get_embeddings( | |
texts: List[str], model: str = "text-embedding-3-large" | |
) -> List[List[float]]: | |
""" | |
Generate embeddings for a list of texts using OpenAI API synchronously. | |
Args: | |
texts: List of strings to embed. | |
model: OpenAI embedding model to use (default: text-embedding-3-large). | |
Returns: | |
A list of embeddings (each embedding is a list of floats). | |
Raises: | |
Exception: If the OpenAI API call fails. | |
""" | |
# Truncate texts to max token limit | |
truncated_texts = [text[:MAX_TOKENS] for text in texts] | |
# Make the API call | |
response = client.embeddings.create(input=truncated_texts, model=model) | |
# Extract embeddings from response | |
embeddings = np.array([data.embedding for data in response.data]) | |
return embeddings | |