File size: 1,194 Bytes
c6a2a56
bb7c9a3
9fd6e20
fddb754
bb7c9a3
9fd6e20
a1a6d79
fddb754
bb7c9a3
 
fddb754
 
 
 
bb7c9a3
 
c6a2a56
9fd6e20
c6a2a56
f7e11c1
 
9fd6e20
 
 
 
 
 
 
 
 
bb7c9a3
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from loguru import logger
from pydantic import ConfigDict
from typing import Self, Sequence

from ctp_slack_bot.core import ApplicationComponentBase, Settings
from ctp_slack_bot.models import Chunk, VectorizedChunk
from .embeddings_model_service import EmbeddingsModelService


class VectorizationService(ApplicationComponentBase):
    """
    Service for vectorizing chunks of text data.
    """

    model_config = ConfigDict(frozen=True)

    settings: Settings
    embeddings_model_service: EmbeddingsModelService

    async def vectorize(self: Self, chunks: Sequence[Chunk]) -> Sequence[VectorizedChunk]:
        embeddings = await self.embeddings_model_service.get_embeddings([chunk.text for chunk in chunks])
        return tuple(VectorizedChunk(
                         text=chunk.text,
                         parent_id=chunk.parent_id,
                         chunk_id=chunk.chunk_id,
                         metadata=chunk.metadata,
                         embedding=embedding
                     )
                     for chunk, embedding
                     in zip(chunks, embeddings))

    @property
    def name(self: Self) -> str:
        return "vectorization_service"