File size: 4,753 Bytes
b396e7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
/**
 * Returns a random integer between min (inclusive) and max (inclusive).
 * The value is no lower than min (or the next integer greater than min
 * if min isn't an integer) and no greater than max (or the next integer
 * lower than max if max isn't an integer).
 * Using Math.round() will give you a non-uniform distribution!
 * https://stackoverflow.com/questions/1527803/generating-random-whole-numbers-in-javascript-in-a-specific-range
 */
 const getRandomInt = (min, max)  => {
    min = Math.ceil(min)
    max = Math.floor(max)
    return Math.floor(Math.random() * (max - min + 1)) + min
}

/**
 * Reply Buffer.
 */
class ReplyBuffer {
    /**
     * Constructor.
     * 
     * @param {*} limit maximum number of transitions
     * @param {*} onDiscard callback triggered on discard a transition
     */
    constructor(limit = 500, onDiscard = () => {}) {
        this._limit = limit
        this._onDiscard = onDiscard

        this._buffer = new Array(limit).fill()
        this._pool = []

        this.size = 0
    }

    /**
     * Add a new transition to the reply buffer. 
     * Transition doesn't contain the next state. The next state is derived when sampling.
     * 
     * @param {{id: number, priority: number, state: array, action, reward: number}} transition transition
     */
    add(transition) {
        let { id, priority = 1 } = transition
        if (id === undefined || id < 0 || priority < 1) 
            throw new Error('Invalid arguments')

        id = id % this._limit

        if (this._buffer[id]) {
            const start = this._pool.indexOf(id)
            let deleteCount = 0
            while (this._pool[start + deleteCount] == id)
                deleteCount++

            this._pool.splice(start, deleteCount)
            
            this._onDiscard(this._buffer[id])
        } else
            this.size++

        while (priority--) 
            this._pool.push(id)

        this._buffer[id] = transition
    }

    /**
     * Return `n` random samples from the buffer. 
     * Returns an empty array if impossible provide required number of samples.
     * 
     * @param {number} [n = 1] - number of samples 
     * @returns array of exactly `n` samples
     */
    sample(n = 1) {
        if (this.size < n) 
            return []

        const 
            sampleIndices = new Set(),
            samples = []

        let counter = n
        while (counter--)
            while (sampleIndices.size < this.size) {
                const randomIndex = this._pool[getRandomInt(0, this._pool.length - 1)]
                if (sampleIndices.has(randomIndex))
                    continue

                sampleIndices.add(randomIndex)

                const { id, state, action, reward } = this._buffer[randomIndex]
                const nextId = id + 1
                const next = this._buffer[nextId % this._limit]

                if (next && next.id === nextId) { // the case when sampled the last element that still waiting for next state
                    samples.push({ state, action, reward, nextState: next.state})
                    break
                }
            }

        return samples.length == n ? samples : []
    }
}

/** TESTS */
(() => {
    return

    const rb = new ReplyBuffer(5)
    rb.add({id: 0, state: 0})
    rb.add({id: 1, state: 1})
    rb.add({id: 2, state: 2, priority: 3})
    
    console.assert(rb.size === 3)
    console.assert(rb._pool.length === 5)
    console.assert(rb._buffer[0].id === 0)
    
    rb.add({id: 2, state: 2})
    rb.add({id: 4, state: 4, priority: 2})
    
    console.assert(rb.size === 4)
    console.assert(rb._pool.length === 5)
    console.assert(JSON.stringify(rb._pool) === '[0,1,2,4,4]')
    
    rb.add({id: 5, state: 0, priority: 2}) // 5%5 = 0 => state = 0
    
    console.assert(rb.size === 4)
    console.assert(rb._pool.length === 6)
    console.assert(rb._buffer.length === 5)
    console.assert(rb._buffer[0].id === 5)
    console.assert(JSON.stringify(rb._pool) === '[1,2,4,4,0,0]')

    console.assert(rb.sample(999).length === 0, 'Too many samples')
    
    let samples1 = rb.sample(2)
    console.assert(samples1.length === 2, 'Only two samples possible')
    console.assert(samples1[0].nextState === (samples1[0].state + 1) % 5, 'Next state should be valid', samples1)

    rb.add({id: 506, state: 506, priority: 3})

    let samples2 = rb.sample(1)
    console.assert(samples2.length === 1, 'Only one suitable sample with valid next state')
    console.assert(samples2[0].state === 4, 'Sample with id:4')
    console.assert(rb._buffer[1].id === 506, '506 % 5 = 1')

    console.assert(rb.sample(2).length === 0, 
        'Can not sample 2 transitions since next state is available only for one state')
})()