File size: 3,630 Bytes
b110593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
//                           _       _
// __      _____  __ ___   ___  __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
//  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
//   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
//  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
//  CONTACT: hello@weaviate.io
//

package crossref

import (
	"fmt"

	"github.com/go-openapi/strfmt"
	"github.com/weaviate/weaviate/usecases/byteops"
)

// BulkBuilder is a low-alloc tool to build many beacon strings (as []byte). It
// is optimized to allocate just once as opposed to once per ID. This makes it
// considerably faster when generating 100s of thousand of beacons strings. The
// main intended use case for this is building propValuePairs in ref-filters.
//
// The BulkBuilder makes some estimations for how much memory will be necessary
// based on expected input params. If those requirements get exceeded, it will
// still be safe to use, but will fallback to allocating dynamically.
type BulkBuilder struct {
	byteops.ReadWriter
	prefix []byte
}

func NewBulkBuilderWithEstimates(expectedCount int, exampleClassName string,

	overheadRatio float64,

) *BulkBuilder {
	prefix := []byte("weaviate://localhost/")

	lenOfTypicalClassName := int(float64(len(exampleClassName)) * overheadRatio)
	predictedSize := expectedCount * (len(prefix) + 1 + lenOfTypicalClassName + 36)

	bb := &BulkBuilder{
		prefix:     prefix,
		ReadWriter: byteops.NewReadWriter(make([]byte, predictedSize)),
	}

	return bb
}

func (bb *BulkBuilder) ClassAndID(className string,
	id strfmt.UUID,
) []byte {
	requiredSpace := len(bb.prefix) + len(id)
	if int(bb.Position)+requiredSpace >= len(bb.Buffer) {
		return bb.fallbackWithClassName(className, id)
	}

	// copy the start pos, we will need this at the end to know what to return to
	// the caller
	start := bb.Position
	bb.CopyBytesToBuffer(bb.prefix)

	// This is a safe way, in case a class-name ever contains non-ASCII
	// characters. If we could be 100% sure that a class is ASCII-only, we could
	// remove this allocation and instead use the same copy-by-rune approach that
	// we use later on for the ID.
	bb.CopyBytesToBuffer([]byte(className))
	bb.WriteByte('/') // The separating slash between class and ID
	for _, runeValue := range id {
		// We know that the UUID-string never contains non-ASCII characters. This
		// means it safe to convert the uint32-rune into a uint8. This allows us to
		// copy char by char without any additional allocs
		bb.WriteByte(uint8(runeValue))
	}

	return bb.Buffer[start:bb.Position]
}

func (bb *BulkBuilder) LegacyIDOnly(id strfmt.UUID) []byte {
	requiredSpace := len(bb.prefix) + len(id)
	if int(bb.Position)+requiredSpace >= len(bb.Buffer) {
		return bb.fallbackWithoutClassName(id)
	}

	// copy the start pos, we will need this at the end to know what to return to
	// the caller
	start := bb.Position
	bb.CopyBytesToBuffer(bb.prefix)
	for _, runeValue := range id {
		// We know that the UUID-string never contains non-ASCII characters. This
		// means it safe to convert the uint32-rune into a uint8. This allows us to
		// copy char by char without any additional allocs
		bb.WriteByte(uint8(runeValue))
	}

	return bb.Buffer[start:bb.Position]
}

func (bb *BulkBuilder) fallbackWithClassName(
	className string, id strfmt.UUID,
) []byte {
	return []byte(fmt.Sprintf("%s%s/%s", bb.prefix, className, id))
}

func (bb *BulkBuilder) fallbackWithoutClassName(id strfmt.UUID) []byte {
	return []byte(fmt.Sprintf("%s%s", bb.prefix, id))
}