Spaces:
Sleeping
Sleeping
| // _ _ | |
| // __ _____ __ ___ ___ __ _| |_ ___ | |
| // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ | |
| // \ V V / __/ (_| |\ V /| | (_| | || __/ | |
| // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| | |
| // | |
| // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. | |
| // | |
| // CONTACT: hello@weaviate.io | |
| // | |
| package vectorizer | |
| import ( | |
| "context" | |
| "github.com/pkg/errors" | |
| "github.com/go-openapi/strfmt" | |
| "github.com/weaviate/weaviate/entities/models" | |
| "github.com/weaviate/weaviate/entities/moduletools" | |
| "github.com/weaviate/weaviate/modules/multi2vec-clip/ent" | |
| libvectorizer "github.com/weaviate/weaviate/usecases/vectorizer" | |
| ) | |
| type Vectorizer struct { | |
| client Client | |
| } | |
| func New(client Client) *Vectorizer { | |
| return &Vectorizer{ | |
| client: client, | |
| } | |
| } | |
| type Client interface { | |
| Vectorize(ctx context.Context, | |
| texts, images []string) (*ent.VectorizationResult, error) | |
| } | |
| type ClassSettings interface { | |
| ImageField(property string) bool | |
| ImageFieldsWeights() ([]float32, error) | |
| TextField(property string) bool | |
| TextFieldsWeights() ([]float32, error) | |
| } | |
| func (v *Vectorizer) Object(ctx context.Context, object *models.Object, | |
| objDiff *moduletools.ObjectDiff, settings ClassSettings, | |
| ) error { | |
| vec, err := v.object(ctx, object.ID, object.Properties, objDiff, settings) | |
| if err != nil { | |
| return err | |
| } | |
| object.Vector = vec | |
| return nil | |
| } | |
| func (v *Vectorizer) VectorizeImage(ctx context.Context, image string) ([]float32, error) { | |
| res, err := v.client.Vectorize(ctx, []string{}, []string{image}) | |
| if err != nil { | |
| return nil, err | |
| } | |
| if len(res.ImageVectors) != 1 { | |
| return nil, errors.New("empty vector") | |
| } | |
| return res.ImageVectors[0], nil | |
| } | |
| func (v *Vectorizer) object(ctx context.Context, id strfmt.UUID, | |
| schema interface{}, objDiff *moduletools.ObjectDiff, ichek ClassSettings, | |
| ) ([]float32, error) { | |
| vectorize := objDiff == nil || objDiff.GetVec() == nil | |
| // vectorize image and text | |
| texts := []string{} | |
| images := []string{} | |
| if schema != nil { | |
| for prop, value := range schema.(map[string]interface{}) { | |
| if ichek.ImageField(prop) { | |
| valueString, ok := value.(string) | |
| if ok { | |
| images = append(images, valueString) | |
| vectorize = vectorize || (objDiff != nil && objDiff.IsChangedProp(prop)) | |
| } | |
| } | |
| if ichek.TextField(prop) { | |
| valueString, ok := value.(string) | |
| if ok { | |
| texts = append(texts, valueString) | |
| vectorize = vectorize || (objDiff != nil && objDiff.IsChangedProp(prop)) | |
| } | |
| } | |
| valueArr, ok := value.([]interface{}) | |
| if ok { | |
| for _, value := range valueArr { | |
| valueString, ok := value.(string) | |
| if ok { | |
| texts = append(texts, valueString) | |
| vectorize = vectorize || (objDiff != nil && objDiff.IsChangedProp(prop)) | |
| } | |
| } | |
| } | |
| } | |
| } | |
| // no property was changed, old vector can be used | |
| if !vectorize { | |
| return objDiff.GetVec(), nil | |
| } | |
| vectors := [][]float32{} | |
| if len(texts) > 0 || len(images) > 0 { | |
| res, err := v.client.Vectorize(ctx, texts, images) | |
| if err != nil { | |
| return nil, err | |
| } | |
| vectors = append(vectors, res.TextVectors...) | |
| vectors = append(vectors, res.ImageVectors...) | |
| } | |
| weights, err := v.getWeights(ichek) | |
| if err != nil { | |
| return nil, err | |
| } | |
| return libvectorizer.CombineVectorsWithWeights(vectors, weights), nil | |
| } | |
| func (v *Vectorizer) getWeights(ichek ClassSettings) ([]float32, error) { | |
| weights := []float32{} | |
| textFieldsWeights, err := ichek.TextFieldsWeights() | |
| if err != nil { | |
| return nil, err | |
| } | |
| imageFieldsWeights, err := ichek.ImageFieldsWeights() | |
| if err != nil { | |
| return nil, err | |
| } | |
| weights = append(weights, textFieldsWeights...) | |
| weights = append(weights, imageFieldsWeights...) | |
| normalizedWeights := v.normalizeWeights(weights) | |
| return normalizedWeights, nil | |
| } | |
| func (v *Vectorizer) normalizeWeights(weights []float32) []float32 { | |
| if len(weights) > 0 { | |
| var denominator float32 | |
| for i := range weights { | |
| denominator += weights[i] | |
| } | |
| normalizer := 1 / denominator | |
| normalized := make([]float32, len(weights)) | |
| for i := range weights { | |
| normalized[i] = weights[i] * normalizer | |
| } | |
| return normalized | |
| } | |
| return nil | |
| } | |