File size: 3,065 Bytes
b1b6ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
from transformers import AutoConfig
LLM_MODEL_ARCHS = {
"stablelm_epoch": "π΄ StableLM-Epoch",
"stablelm_alpha": "π΄ StableLM-Alpha",
"mixformer-sequential": "π§βπ» Phi Ο",
"RefinedWebModel": "π¦
Falcon",
"gpt_bigcode": "β StarCoder",
"RefinedWeb": "π¦
Falcon",
"baichuan": "π Baichuan ηΎε·", # river
"internlm": "π§βπ InternLM δΉ¦η", # scholar
"mistral": "βοΈ Mistral",
"mixtral": "βοΈ Mixtral",
"codegen": "βΎοΈ CodeGen",
"chatglm": "π¬ ChatGLM",
"falcon": "π¦
Falcon",
"bloom": "πΈ Bloom",
"llama": "π¦ LLaMA",
"rwkv": "π¦ββ¬ RWKV",
"deci": "π΅ deci",
"Yi": "π« Yi δΊΊ", # people
"mpt": "𧱠MPT",
# suggest something
"gpt_neox": "GPT-NeoX",
"gpt_neo": "GPT-Neo",
"gpt2": "GPT-2",
"gptj": "GPT-J",
"bart": "BART",
}
def model_hyperlink(link, model_name):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def process_architectures(model):
# return "Unknown"
try:
config = AutoConfig.from_pretrained(model, trust_remote_code=True)
return LLM_MODEL_ARCHS.get(config.model_type, "Unknown")
except Exception:
return "Unknown"
def process_score(score, quantization):
if quantization != "Unquantized":
return f"{score:.2f}*"
else:
return f"{score:.2f} "
def process_quantizations(x):
if (
x["config.backend.quantization_scheme"] == "bnb"
and x["config.backend.quantization_config.load_in_4bit"] is True
):
return "BnB.4bit"
elif (
x["config.backend.quantization_scheme"] == "bnb"
and x["config.backend.quantization_config.load_in_8bit"] is True
):
return "BnB.8bit"
elif x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.bits"] == 4:
return "GPTQ.4bit"
elif x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.bits"] == 4:
return "AWQ.4bit"
else:
return "Unquantized"
def process_kernels(x):
if x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.version"] == 1:
return "GPTQ.ExllamaV1"
elif x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.version"] == 2:
return "GPTQ.ExllamaV2"
elif (
x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.version"] == "gemm"
):
return "AWQ.GEMM"
elif (
x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.version"] == "gemv"
):
return "AWQ.GEMV"
else:
return "No Kernel"
def test():
model = "Qwen/Qwen1.5-32B"
config = AutoConfig.from_pretrained(model, trust_remote_code=True)
import pdb
pdb.set_trace()
if __name__ == "__main__":
test()
|