ctp-slack-bot / src /ctp_slack_bot /services /vectorization_service.py
LiKenun's picture
Refactor #5
a1a6d79
from loguru import logger
from pydantic import ConfigDict
from typing import Self, Sequence
from ctp_slack_bot.core import ApplicationComponentBase, Settings
from ctp_slack_bot.models import Chunk, VectorizedChunk
from .embeddings_model_service import EmbeddingsModelService
class VectorizationService(ApplicationComponentBase):
"""
Service for vectorizing chunks of text data.
"""
model_config = ConfigDict(frozen=True)
settings: Settings
embeddings_model_service: EmbeddingsModelService
async def vectorize(self: Self, chunks: Sequence[Chunk]) -> Sequence[VectorizedChunk]:
embeddings = await self.embeddings_model_service.get_embeddings([chunk.text for chunk in chunks])
return tuple(VectorizedChunk(
text=chunk.text,
parent_id=chunk.parent_id,
chunk_id=chunk.chunk_id,
metadata=chunk.metadata,
embedding=embedding
)
for chunk, embedding
in zip(chunks, embeddings))
@property
def name(self: Self) -> str:
return "vectorization_service"