File size: 2,529 Bytes
66d1c99
 
 
 
 
 
 
a3cf1a2
30a53de
66d1c99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baa0ba8
66d1c99
 
 
 
325fdb5
d8f4cef
f9ef5de
325fdb5
fb173e2
d8f4cef
fb173e2
 
 
 
 
 
325fdb5
 
 
 
 
 
 
66d1c99
325fdb5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from rwkvstic.agnostic.backends import TORCH, TORCH_QUANT
import torch

quantized = {
    "mode": TORCH_QUANT,
    "runtimedtype": torch.bfloat16,
    "useGPU": torch.cuda.is_available(),
    "chunksize": 32,  # larger = more accurate, but more memory (and slower)
    "target": 15  # your gpu max size, excess vram offloaded to cpu
}

# UNCOMMENT TO SELECT OPTIONS
# Not full list of options, see https://pypi.org/project/rwkvstic/ and https://huggingface.co/BlinkDL/ for more models/modes

# RWKV 1B5 instruct test 2 model
# Approximate
# [Vram usage: 6.0GB]
# [File size: 3.0GB]


# config = {
#     "path": "https://huggingface.co/BlinkDL/rwkv-4-pile-1b5/resolve/main/RWKV-4-Pile-1B5-Instruct-test2-20230209.pth",
#     "mode": TORCH,
#     "runtimedtype": torch.float32,
#     "useGPU": torch.cuda.is_available(),
#     "dtype": torch.float32
# }

# title = "RWKV-4 (1.5b Instruct Test 2)"

# RWKV 1B5 instruct model quantized
# Approximate
# [Vram usage: 1.3GB]
# [File size: 3.0GB]

# config = {
#     "path": "https://huggingface.co/BlinkDL/rwkv-4-pile-1b5/resolve/main/RWKV-4-Pile-1B5-Instruct-test1-20230124.pth",
#     **quantized
# }

# title = "RWKV-4 (1.5b Instruct Quantized)"

# RWKV 7B instruct pre-quantized (settings baked into model)
# Approximate
# [Vram usage: 7.0GB]
# [File size: 8.0GB]

# config = {
#     "path": "https://huggingface.co/Hazzzardous/RWKV-8Bit/resolve/main/RWKV-4-Pile-7B-Instruct.pqth"
# }

# title = "RWKV-4 (7b Instruct Quantized)"

# RWKV 14B quantized (latest as of feb 9)
# Approximate
# [Vram usage: 15.0GB]
# [File size: 28.0GB]

# config = {
#     "path": "https://huggingface.co/BlinkDL/rwkv-4-pile-14b/resolve/main/RWKV-4-Pile-14B-20230204-7324.pth",
#     **quantized
# }

# title = "RWKV-4 (14b Quantized)"


# RWKV 14B quantized (latest as of feb 13)
# Approximate
# [Vram usage: 15.0GB]
# [File size: 14.4GB]

config = {
#    "path": "https://huggingface.co/Hazzzardous/RWKV-8Bit/resolve/main/RWKV-4-Pile-14B-20230204-7324.pqth"
    "path": "https://huggingface.co/yahma/RWKV-14b_quant/resolve/main/RWKV-4-Pile-14B-20230213-8019.pqth"
}

title = "RWKV-4 (14b Quantized - Feb 13)"

# RWKV 14B (latest as of feb 9)
# Approximate
# [Vram usage: 27.0GB]
# [File size: 28.4GB]

# config = {
#     "path": "https://huggingface.co/BlinkDL/rwkv-4-pile-14b/resolve/main/RWKV-4-Pile-14B-20230204-7324.pth",
#     "mode": TORCH,
#     "runtimedtype": torch.bfloat16,
#     "useGPU": torch.cuda.is_available(),
#     "dtype": torch.bfloat16
# }

# title = "RWKV-4 (14b Feb 4 Snapshot)"