Sharathhebbar24's picture
Update models.py
2014213
def return_task_name():
return ('text2text-generation', "text-generation")
def return_models(task_name):
if task_name == "text2text-generation":
models_parent = (
'google',
"facebook",
"microsoft",
"salesforce",
"langboat",
"bloom",
"allenai",
"mbzuai",
"lmsys",
"starmpcc",
"haining",
"kaludi",
)
else:
models_parent = (
'google',
"facebook",
"microsoft",
"salesforce",
"langboat",
"databricks",
"aisquared",
"bloom",
"allenai",
"tiiuae",
"openlm",
"stabilityai",
"eleutherai",
"mbzuai",
"cerebras",
"open_assistant",
"nomic_ai",
"blinkdl",
"lmsys",
"together_computer",
"mosaic_ml",
"h20ai",
"deciai",
)
return models_parent
def return_text2text_generation_models(model_parent, count=False):
google_models_list = (
"google/flan-t5-small",
"google/flan-t5-base",
'google/flan-t5-large',
'google/flan-t5-xl',
'google/flan-t5-xxl',
"google/byt5-xxl",
"google/byt5-xl",
"google/byt5-large",
"google/byt5-small",
"google/byt5-base",
"google/mt5-xxl",
"google/mt5-xl",
"google/mt5-large",
"google/mt5-small",
"google/long-t5-local-base",
"google/long-t5-local-large",
"google/long-t5-tglobal-base",
"google/long-t5-tglobal-large",
"google/pegasus-big_patent",
"google/pegasus-x-base",
"google/pegasus-x-large",
"google/pegasus-x-base-arxiv",
"google/roberta2roberta_L-24_wikisplit",
"google/roberta2roberta_L-24_discofuse",
"google/switch-base-8",
"google/switch-base-16",
"google/switch-base-32",
"google/switch-base-64",
"google/switch-base-128",
"google/switch-base-256",
"google/switch-large-128",
"google/switch-xxl-128",
"google/switch-c-2048",
"google/t5-11b-ssm",
"google/t5-11b-ssm-nq",
"google/t5-11b-ssm-nqo",
"google/t5-11b-ssm-tqa",
"google/t5-11b-ssm-tqao",
"google/t5-11b-ssm-wq",
"google/t5-11b-ssm-wqo",
"google/t5-3b-ssm",
"google/t5-3b-ssm-nq",
"google/t5-3b-ssm-nqo",
"google/t5-base-lm-adapt",
"google/t5-base-lm-adapt",
"google/t5-efficient-base",
"google/t5-efficient-base-dl2",
"google/t5-efficient-base-dl4",
"google/t5-efficient-base-dl6",
"google/t5-efficient-base-dl8",
"google/t5-efficient-base-dm256",
"google/t5-efficient-base-dm512",
"google/t5-efficient-base-dm1000",
"google/t5-efficient-base-dm2000",
"google/t5-efficient-base-el2",
"google/t5-efficient-base-el4",
"google/t5-efficient-base-el6",
"google/t5-efficient-base-el8",
"google/t5-efficient-base-el16",
"google/t5-efficient-base-nl40",
"google/t5-efficient-base-nl48",
"google/t5-efficient-base-nl8",
"google/t5-efficient-large",
"google/t5-efficient-large-dl12",
"google/t5-efficient-large-dl16",
"google/t5-efficient-large-dl2",
"google/t5-efficient-large-dl32",
"google/t5-efficient-large-dl4",
"google/t5-efficient-large-dl6",
"google/t5-efficient-large-dl8",
"google/t5-efficient-large-dm128",
"google/t5-efficient-large-dm2000",
"google/t5-efficient-large-dm256",
"google/t5-efficient-large-dm512",
"google/t5-efficient-large-dm768",
"google/t5-efficient-large-el12",
"google/t5-efficient-large-el2",
"google/t5-efficient-large-el4",
"google/t5-efficient-large-el6",
"google/t5-efficient-large-el8",
"google/t5-efficient-large-kv128",
"google/t5-efficient-large-kv16",
"google/t5-efficient-large-kv256",
"google/t5-efficient-large-kv32",
"google/t5-efficient-large-nh12",
"google/t5-efficient-large-nh2",
"google/t5-efficient-large-nh24",
"google/t5-efficient-large-nh32",
"google/t5-efficient-large-nh4",
"google/t5-efficient-large-nh8",
"google/t5-efficient-large-nh8-nl32",
"google/t5-efficient-large-nl10",
"google/t5-efficient-large-nl12",
"google/t5-efficient-large-nl16",
"google/t5-efficient-large-nl2",
"google/t5-efficient-large-nl20",
"google/t5-efficient-large-nl32",
"google/t5-efficient-large-nl36",
"google/t5-efficient-large-nl4",
"google/t5-efficient-large-nl8",
"google/t5-efficient-mini",
"google/t5-efficient-mini-nl12",
"google/t5-efficient-mini-nl24",
"google/t5-efficient-mini-nl6",
"google/t5-efficient-mini-nl8",
"google/t5-efficient-small",
"google/t5-efficient-small-dl12",
"google/t5-efficient-small-dl16",
"google/t5-efficient-small-dl2",
"google/t5-efficient-small-dl4",
"google/t5-efficient-small-dl8",
"google/t5-efficient-small-dm1000",
"google/t5-efficient-small-dm128",
"google/t5-efficient-small-dm2000",
"google/t5-efficient-small-dm256",
"google/t5-efficient-small-dm768",
"google/t5-efficient-small-el12",
"google/t5-efficient-small-el16",
"google/t5-efficient-small-el16-dl1",
"google/t5-efficient-small-el16-dl2",
"google/t5-efficient-small-el16-dl4",
"google/t5-efficient-small-el16-dl8",
"google/t5-efficient-small-el2",
"google/t5-efficient-small-el32",
"google/t5-efficient-small-el4",
"google/t5-efficient-small-el48",
"google/t5-efficient-small-el64",
"google/t5-efficient-small-el8",
"google/t5-efficient-small-el8-dl1",
"google/t5-efficient-small-el8-dl2",
"google/t5-efficient-small-el8-dl4",
"google/t5-efficient-small-ff1000",
"google/t5-efficient-small-ff12000",
"google/t5-efficient-small-ff3000",
"google/t5-efficient-small-ff6000",
"google/t5-efficient-small-ff9000",
"google/t5-efficient-small-kv128",
"google/t5-efficient-small-kv16",
"google/t5-efficient-small-kv256",
"google/t5-efficient-small-kv32",
"google/t5-efficient-small-nl16",
"google/t5-efficient-small-nl2",
"google/t5-efficient-small-nl20",
"google/t5-efficient-small-nl22",
"google/t5-efficient-small-nl24",
"google/t5-efficient-small-nl32",
"google/t5-efficient-small-nl36",
"google/t5-efficient-small-nl4",
"google/t5-efficient-small-nl40",
"google/t5-efficient-small-nl48",
"google/t5-efficient-small-nl8",
"google/t5-efficient-tiny",
"google/t5-efficient-tiny-dl2",
"google/t5-efficient-tiny-dl6",
"google/t5-efficient-tiny-dl8",
"google/t5-efficient-tiny-el12",
"google/t5-efficient-tiny-el2",
"google/t5-efficient-tiny-el6",
"google/t5-efficient-tiny-el8",
"google/t5-efficient-tiny-ff12000",
"google/t5-efficient-tiny-ff2000",
"google/t5-efficient-tiny-ff3000",
"google/t5-efficient-tiny-ff6000",
"google/t5-efficient-tiny-ff9000",
"google/t5-efficient-tiny-nh1",
"google/t5-efficient-tiny-nh16",
"google/t5-efficient-tiny-nh32",
"google/t5-efficient-tiny-nh8",
"google/t5-efficient-tiny-nl12",
"google/t5-efficient-tiny-nl16",
"google/t5-efficient-tiny-nl2",
"google/t5-efficient-tiny-nl24",
"google/t5-efficient-tiny-nl32",
"google/t5-efficient-tiny-nl6",
"google/t5-efficient-tiny-nl8",
"google/t5-efficient-xl",
"google/t5-efficient-xl-nl12",
"google/t5-efficient-xl-nl16",
"google/t5-efficient-xl-nl2",
"google/t5-efficient-xl-nl28",
"google/t5-efficient-xl-nl4",
"google/t5-efficient-xl-nl6",
"google/t5-efficient-xl-nl8",
"google/t5-efficient-xxl",
"google/t5-efficient-xxl-nl4",
"google/t5-large-lm-adapt",
"google/t5-large-ssm",
"google/t5-large-ssm-nq",
"google/t5-large-ssm-nqo",
"google/t5-small-lm-adapt",
"google/t5-small-ssm",
"google/t5-small-ssm-nq",
"google/t5-v1_1-base",
"google/t5-v1_1-large",
"google/t5-v1_1-small",
"google/t5-v1_1-xl",
"google/t5-v1_1-xxl",
"google/t5-xl-lm-adapt",
"google/t5-xl-ssm-nq",
"google/t5-xxl-lm-adapt",
"google/t5-xxl-ssm",
"google/t5-xxl-ssm-nq",
"google/t5-xxl-ssm-nqo",
"google/t5-xxl-ssm-tqa",
"google/t5-xxl-ssm-tqao",
"google/t5-xxl-ssm-wq",
"google/t5-xxl-ssm-wqo",
"google/t5_11b_trueteacher_and_anli",
"google/ul2",
"google/umt5-base",
"google/umt5-small",
"google/umt5-xl",
"google/umt5-xxl",
)
mbzuai_models_list = (
"MBZUAI/LaMini-Flan-T5-783M",
"MBZUAI/LaMini-Flan-T5-248M",
"MBZUAI/LaMini-Flan-T5-77M",
"MBZUAI/LaMini-T5-738M",
"MBZUAI/LaMini-T5-223M",
"MBZUAI/LaMini-T5-61M",
)
bloom_models_list = (
"bigscience/T0_3B",
"bigscience/T0_original_task_only",
"bigscience/T0_single_prompt",
"bigscience/T0p",
"bigscience/T0",
"bigscience/T0pp",
"bigscience/mt0-xxl-p3",
"bigscience/mt0-xxl",
"bigscience/mt0-large",
"bigscience/mt0-base",
"bigscience/mt0-small",
"bigscience/mt0-xxl-mt",
)
lmsys_models_list = (
"lmsys/fastchat-t5-3b-v1.0",
)
facebook_models_list = (
'facebook/mbart-large-50-many-to-many-mmt',
'facebook/musicgen-small',
'facebook/musicgen-medium',
"facebook/musicgen-large",
'facebook/m2m100_418M',
'facebook/mbart-large-50-one-to-many-mmt',
'facebook/mbart-large-50-many-to-one-mmt',
'facebook/mbart-large-50',
'facebook/mgenre-wiki',
'facebook/genre-linking-aidayago2',
'facebook/genre-linking-blink',
'facebook/genre-kilt',
'facebook/m2m100-12B-avg-10-ckpt',
'facebook/m2m100-12B-avg-5-ckpt',
'facebook/m2m100-12B-last-ckpt',
'facebook/m2m100_1.2B'
)
microsoft_models_list = (
"microsoft/prophetnet-large-uncased",
"microsoft/codereviewer",
"microsoft/xprophetnet-large-wiki100-cased",
"microsoft/xprophetnet-large-wiki100-cased-xglue-qg",
"microsoft/xprophetnet-large-wiki100-cased-xglue-ntg",
"microsoft/prophetnet-large-uncased-squad-qg",
"microsoft/prophetnet-large-uncased-cnndm",
)
starmpcc_models_list = (
"starmpcc/Asclepius-13B",
"starmpcc/Asclepius-7B"
)
langboat_models_list = (
"Langboat/mengzi-t5-base",
"Langboat/mengzi-t5-base-mt"
)
haining_models_list = (
"haining/scientific_abstract_simplification",
"haining/poem_interpretation_allpoetry169k_baseline",
"haining/poem_interpretation_allpoetry169k_full",
"haining/lyrics_interpretation_nonnegative",
"haining/poem_interpretation_allpoetry169k",
"haining/sas_baseline",
)
kaludi_models_list = (
"Kaludi/chatgpt-gpt4-prompts-bart-large-cnn-samsum",
"Kaludi/Customer-Support-Assistant",
"Kaludi/Customer-Support-Assistant-V2",
"Kaludi/historical-events-reimagined"
)
allenai_models_list = (
'allenai/macaw-large',
'allenai/entailer-11b',
'allenai/entailer-large',
'allenai/System4_classify_FigLang2022',
'allenai/System4_explain_FigLang2022',
'allenai/System3_DREAM_FLUTE_all_dimensions_FigLang2022',
'allenai/System3_DREAM_FLUTE_social_norm_FigLang2022',
'allenai/System3_DREAM_FLUTE_consequence_FigLang2022',
'allenai/System3_DREAM_FLUTE_motivation_FigLang2022',
'allenai/System3_DREAM_FLUTE_emotion_FigLang2022',
'allenai/System2_FigLang2022',
'allenai/System1_FigLang2022',
'allenai/DREAM',
'allenai/led-base-16384-cochrane',
'allenai/bart-large-multi_lexsum-long-multitask',
'allenai/bart-large-multi_lexsum-source-multitask',
'allenai/pegasus-multi_lexsum-short-tiny',
'allenai/pegasus-multi_lexsum-long-tiny',
'allenai/pegasus-multi_lexsum-long-short',
'allenai/bart-large-multi_lexsum-short-tiny',
'allenai/bart-large-multi_lexsum-long-tiny',
'allenai/bart-large-multi_lexsum-long-short',
'allenai/primera-multi_lexsum-source-tiny',
'allenai/primera-multi_lexsum-source-short',
'allenai/primera-multi_lexsum-source-long',
'allenai/led-base-16384-multi_lexsum-source-tiny',
'allenai/led-base-16384-multi_lexsum-source-short',
'allenai/led-base-16384-multi_lexsum-source-long',
'allenai/led-base-16384-ms2',
'allenai/mtk-instruct-11b-def-pos',
'allenai/tk-instruct-small-def-pos',
'allenai/tk-instruct-base-def-pos',
'allenai/tk-instruct-large-def-pos',
'allenai/mtk-instruct-3b-def-pos',
'allenai/tk-instruct-3b-def-pos-neg-expl',
'allenai/tk-instruct-3b-def-pos-neg',
'allenai/tk-instruct-3b-pos',
'allenai/tk-instruct-3b-def-pos',
'allenai/tk-instruct-3b-def',
'allenai/tk-instruct-11b-def-pos-neg-expl',
'allenai/tk-instruct-11b-def-pos',
'allenai/tk-instruct-11b-def',
'allenai/PRIMERA-arxiv',
'allenai/PRIMERA-wcep',
'allenai/PRIMERA-multixscience',
'allenai/PRIMERA-multinews',
'allenai/PRIMERA',
'allenai/unifiedqa-v2-t5-small-1363200',
'allenai/unifiedqa-v2-t5-small-1251000',
'allenai/unifiedqa-v2-t5-large-1363200',
'allenai/unifiedqa-v2-t5-large-1251000',
'allenai/unifiedqa-v2-t5-base-1363200',
'allenai/unifiedqa-v2-t5-base-1251000',
'allenai/unifiedqa-v2-t5-3b-1363200',
'allenai/unifiedqa-v2-t5-3b-1251000',
'allenai/unifiedqa-v2-t5-11b-1363200',
'allenai/unifiedqa-v2-t5-11b-1251000',
'allenai/unifiedqa-t5-small',
'allenai/unifiedqa-t5-large',
'allenai/unifiedqa-t5-base',
'allenai/unifiedqa-t5-3b',
'allenai/unifiedqa-t5-11b',
'allenai/tailor',
'allenai/t5-small-squad2-question-generation',
'allenai/t5-small-squad2-next-word-generator-squad',
'allenai/t5-small-squad11',
'allenai/t5-small-next-word-generator-qoogle',
'allenai/macaw-answer-11b',
'allenai/macaw-3b',
'allenai/macaw-11b',
'allenai/led-large-16384',
'allenai/led-large-16384-arxiv',
'allenai/led-base-16384'
)
salesforce_models_list = (
'Salesforce/socratic-books-30M',
'Salesforce/instructcodet5p-16b',
'Salesforce/codet5p-2b',
'Salesforce/codet5p-6b',
'Salesforce/codet5p-16b',
'Salesforce/dialogstudio-t5-3b-v1.0',
'Salesforce/dialogstudio-t5-base-v1.0',
'Salesforce/dialogstudio-t5-large-v1.0',
'Salesforce/bart-large-xsum-samsum',
'Salesforce/cods-bart-large-xsum-samsum',
'Salesforce/qaconv-unifiedqa-t5-3b',
'Salesforce/qaconv-unifiedqa-t5-base',
'Salesforce/qaconv-unifiedqa-t5-large',
'Salesforce/mixqg-base',
'Salesforce/mixqg-3b',
'Salesforce/mixqg-large',
'Salesforce/codet5-small',
'Salesforce/codet5-base',
'Salesforce/codet5-large',
'Salesforce/codet5-large-ntp-py',
'Salesforce/codet5-base-multi-sum',
'Salesforce/discord_qg',
'Salesforce/codet5-base-codexglue-sum-python',
'Salesforce/codet5-base-codexglue-sum-go',
'Salesforce/codet5-base-codexglue-sum-php',
'Salesforce/codet5-base-codexglue-sum-javascript',
'Salesforce/codet5-base-codexglue-sum-java',
'Salesforce/codet5-base-codexglue-sum-ruby',
'Salesforce/codet5-base-codexglue-clone',
'Salesforce/codet5-base-codexglue-concode',
'Salesforce/codet5-base-codexglue-defect',
'Salesforce/codet5-base-codexglue-refine-medium',
'Salesforce/codet5-base-codexglue-refine-small',
'Salesforce/codet5-base-codexglue-translate-cs-java',
'Salesforce/codet5-base-codexglue-translate-java-cs',
'Salesforce/safety-flan-t5-base',
'Salesforce/safety-flan-t5-small',
'Salesforce/codet5p-770m-py',
'Salesforce/codet5p-770m',
'Salesforce/codet5p-220m',
'Salesforce/codet5p-220m-py',
'Salesforce/bart-large-swipe',
'Salesforce/bart-large-swipe-clean'
)
model_dict = {
"google": google_models_list,
"mbzuai": mbzuai_models_list,
"bloom": bloom_models_list,
"lmsys": lmsys_models_list,
"facebook": facebook_models_list,
"microsoft": microsoft_models_list,
"starmpcc": starmpcc_models_list,
"langboat": langboat_models_list,
"haining": haining_models_list,
"kaludi": kaludi_models_list,
"allenai": allenai_models_list,
"salesforce": salesforce_models_list
}
if count is True:
models_count = 0
for i in model_dict:
models_count += len(model_dict[i])
return models_count
return model_dict[model_parent]
# Text Generation Models
def return_text_generation_models(model_parent, count=False):
google_models_list = (
"google/reformer-enwik8",
"google/reformer-crime-and-punishment",
)
mbzuai_models_list = (
"MBZUAI/LaMini-Cerebras-111M",
"MBZUAI/LaMini-Cerebras-256M",
"MBZUAI/LaMini-Cerebras-590M",
"MBZUAI/LaMini-Cerebras-1.3B",
"MBZUAI/LaMini-GPT-774M",
"MBZUAI/LaMini-GPT-124M",
"MBZUAI/LaMini-GPT-1.5B",
"MBZUAI/LaMini-Neo-125M",
"MBZUAI/LaMini-Neo-1.3B",
)
eleutherai_models_list=(
"EleutherAI/pythia-14m",
"EleutherAI/pythia-31m",
"EleutherAI/pythia-1b-deduped",
"EleutherAI/pythia-2.8b-v0",
"EleutherAI/pythia-1b-v0",
"EleutherAI/pythia-410m-v0",
"EleutherAI/pythia-70m-deduped-v0",
"EleutherAI/pythia-2.8b-deduped-v0",
"EleutherAI/pythia-1b-deduped-v0",
"EleutherAI/pythia-410m-deduped-v0",
"EleutherAI/pythia-160m-deduped-v0",
"EleutherAI/pythia-6.9b-deduped-v0",
"EleutherAI/pythia-70m-deduped",
"EleutherAI/pythia-70m",
"EleutherAI/pythia-2.8b-deduped",
"EleutherAI/pythia-1b",
"EleutherAI/pythia-410m-deduped",
"EleutherAI/pythia-160m-deduped",
"EleutherAI/pythia-160m-v0",
"EleutherAI/pythia-1.4b-deduped-v0",
"EleutherAI/pythia-1.4b",
"EleutherAI/pythia-410m",
"EleutherAI/pythia-intervention-410m-deduped",
"EleutherAI/gpt-neo-125m",
"EleutherAI/gpt-neo-2.7B",
"EleutherAI/gpt-neo-1.3B",
"EleutherAI/pythia-160m",
"EleutherAI/gpt-neox-20b",
"EleutherAI/gpt-j-6b",
"EleutherAI/pythia-2.8b",
"EleutherAI/pythia-12b-deduped",
"EleutherAI/pythia-6.9b-deduped",
"EleutherAI/pythia-1.4b-deduped",
"EleutherAI/pythia-12b",
"EleutherAI/pythia-6.9b",
"EleutherAI/polyglot-ko-12.8b",
"EleutherAI/polyglot-ko-5.8b",
"EleutherAI/polyglot-ko-3.8b",
"EleutherAI/polyglot-ko-1.3b",
"EleutherAI/pythia-intervention-6.9b-deduped",
"EleutherAI/pythia-intervention-1.4b-deduped",
"EleutherAI/pythia-intervention-70m-deduped",
"EleutherAI/pythia-intervention-long-1.4b-deduped",
"EleutherAI/pythia-70m-v0",
"EleutherAI/pythia-1.4b-v0",
"EleutherAI/pythia-6.9b-v0",
"EleutherAI/pythia-12b-deduped-v0",
"EleutherAI/pythia-12b-v0",
"EleutherAI/pythia-160m-seed3",
"EleutherAI/pythia-160m-seed2",
"EleutherAI/pythia-160m-seed1",
"EleutherAI/neox-ckpt-pythia-6.9b-deduped",
"EleutherAI/pythia-160m-hiddendropout",
"EleutherAI/pythia-160m-attndropout",
"EleutherAI/pythia-160m-alldropout",
"EleutherAI/pythia-6.9b-deduped-v0-seed42",
)
cerebras_models_list = (
"cerebras/btlm-3b-8k-base",
"cerebras/cerebras-GPT-13B",
"cerebras/cerebras-GPT-6.7B",
"cerebras/cerebras-GPT-2.7B",
"cerebras/cerebras-GPT-1.3B",
"cerebras/cerebras-GPT-590M",
"cerebras/cerebras-GPT-256M",
"cerebras/cerebras-GPT-111M",
)
open_assistant_models_list = (
"OpenAssistant/codellama-13b-oasst-sft-v10",
"OpenAssistant/llama2-70b-oasst-sft-v10",
"OpenAssistant/llama2-13b-megacode2-oasst",
"OpenAssistant/falcon-40b-megacode2-oasst",
"OpenAssistant/pythia-12b-sft-v8-rlhf-2k-steps",
"OpenAssistant/llama2-13b-orca-8k-3319",
"OpenAssistant/falcon-7b-sft-mix-2000",
"OpenAssistant/falcon-7b-sft-top1-696",
"OpenAssistant/falcon-40b-sft-mix-1226",
"OpenAssistant/falcon-40b-sft-top1-560",
"OpenAssistant/pythia-12b-sft-v8-2.5k-steps",
"OpenAssistant/pythia-12b-sft-v8-7k-steps",
"OpenAssistant/pythia-12b-pre-v8-12.5k-steps",
"OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5",
"OpenAssistant/stablelm-7b-sft-v7-epoch-3",
"OpenAssistant/oasst-sft-1-pythia-12b",
"OpenAssistant/galactica-6.7b-finetuned",
)
databricks_models_list = (
"databricks/dolly-v2-7b",
"databricks/dolly-v2-3b",
"databricks/dolly-v2-12b",
"databricks/dolly-v1-6b",
)
nomic_ai_models_list = (
"nomic-ai/gpt4all-falcon",
"nomic-ai/gpt4all-j",
"nomic-ai/gpt4all-mpt",
"nomic-ai/gpt4all-13b-snoozy",
"nomic-ai/gpt4all-j-lora",
)
blinkdl_models_list = (
"BlinkDL/rwkv-5-world",
"BlinkDL/rwkv-4-world",
"BlinkDL/rwkv-4-raven",
"BlinkDL/rwkv-4-pile-7b",
"BlinkDL/rwkv-4-pile-14b",
"BlinkDL/rwkv-4-novel",
"BlinkDL/rwkv-4-pileplus",
"BlinkDL/rwkv-4-pile-430m",
"BlinkDL/rwkv-4-pile-3b",
"BlinkDL/rwkv-4-pile-1b5",
"BlinkDL/rwkv-4-pile-169m",
"BlinkDL/rwkv-3-pile-1b5",
"BlinkDL/rwkv-3-pile-430m",
"BlinkDL/rwkv-2-pile-430m",
"BlinkDL/rwkv-3-pile-169m",
)
ai_squared_models_list = (
"aisquared/dlite-dais-2023",
"aisquared/chopt-1_3b",
"aisquared/chopt-350m",
"aisquared/chopt-125m",
"aisquared/chopt-2_7b",
"aisquared/dlite-v2-1_5b",
"aisquared/dlite-v2-774m",
"aisquared/dlite-v2-355m",
"aisquared/dlite-v2-124m",
"aisquared/dlite-v1-355m",
"aisquared/dlite-v1-774m",
"aisquared/dlite-v1-1_5b",
"aisquared/dlite-v1-124m",
"aisquared/chopt-research-350m",
"aisquared/chopt-research-125m",
"aisquared/chopt-research-2_7b",
"aisquared/chopt-research-1_3b",
)
bloom_models_list = (
"bigscience/bloom-3b-intermediate",
"bigscience/bloom",
"bigscience/bloomz-p3",
"bigscience/bloomz-mt",
"bigscience/bloomz-7b1-mt",
"bigscience/bloom-1b7-intermediate",
"bigscience/bloom-560m-intermediate",
"bigscience/bloomz-560m",
"bigscience/bloomz-1b1",
"bigscience/bloomz-1b7",
"bigscience/bloomz-3b",
"bigscience/bloomz-7b1",
'bigscience/bloomz',
"bigscience/bloom-1b7",
"bigscience/bloom-560m",
"bigscience/bloom-3b",
"bigscience/bigscience-small-testing",
"bigscience/distill-bloom-1b3",
"bigscience/bloom-1b1",
"bigscience/distill-bloom-1b3-10x",
"bigscience/test-bloomd",
"bigscience/test-bloomd-6b3",
"bigscience/bloom-7b1",
"bigscience/bloom-petals",
"bigscience/bloom-1b1-intermediate",
"bigscience/bloom-7b1-intermediate",
"bigscience/bloom-7b1-petals",
"bigscience/bloomz-petals",
"bigscience/bloomz-7b1-p3",
)
tiiuae_models_list = (
"tiiuae/falcon-180B",
"tiiuae/falcon-180B-chat",
"tiiuae/falcon-40b",
"tiiuae/falcon-7b",
"tiiuae/falcon-7b-instruct",
"tiiuae/falcon-40b-instruct",
"tiiuae/falcon-rw-7b",
"tiiuae/falcon-rw-1b",
)
openlm_models_list = (
"openlm-research/open_llama_3b_v2",
"openlm-research/open_llama_7b_v2",
"openlm-research/open_llama_13b",
"openlm-research/open_llama_7b",
"openlm-research/open_llama_3b",
)
stabilityai_models_list = (
"stabilityai/StableBeluga-7B",
"stabilityai/StableBeluga-13B",
"stabilityai/StableBeluga2",
"stabilityai/stablelm-base-alpha-3b-v2",
"stabilityai/stablelm-base-alpha-7b-v2",
"stabilityai/japanese-stablelm-instruct-alpha-7b",
"stabilityai/japanese-stablelm-base-alpha-7b",
"stabilityai/stablecode-completion-alpha-3b-4k",
"stabilityai/stablecode-instruct-alpha-3b",
"stabilityai/stablecode-completion-alpha-3b",
"stabilityai/StableBeluga1-Delta",
"stabilityai/stablelm-base-alpha-3b",
"stabilityai/stablelm-base-alpha-7b",
"stabilityai/stablelm-tuned-alpha-3b",
"stabilityai/stablelm-tuned-alpha-7b",
)
lmsys_models_list = (
"lmsys/vicuna-13b-v1.5-16k",
"lmsys/vicuna-13b-v1.5",
"lmsys/vicuna-7b-v1.5-16k",
"lmsys/longchat-7b-v1.5-32k",
"lmsys/vicuna-7b-v1.5",
"lmsys/vicuna-7b-v1.3",
"lmsys/vicuna-13b-v1.3",
"lmsys/vicuna-7b-v1.1",
"lmsys/vicuna-13b-v1.1",
"lmsys/vicuna-13b-delta-v0",
"lmsys/vicuna-7b-delta-v0",
"lmsys/vicuna-13b-delta-v1.1",
"lmsys/vicuna-7b-delta-v1.1",
"lmsys/vicuna-33b-v1.3",
"lmsys/longchat-13b-16k",
'lmsys/longchat-7b-16k',
)
togethercomputer_models_list = (
'togethercomputer/Llama-2-7B-32K-Instruct',
'togethercomputer/RedPajama-INCITE-7B-Instruct',
'togethercomputer/LLaMA-2-7B-32K',
'togethercomputer/RedPajama-INCITE-7B-Base',
'togethercomputer/RedPajama-INCITE-7B-Chat',
'togethercomputer/RedPajama-INCITE-Chat-3B-v1',
'togethercomputer/RedPajama-INCITE-Instruct-3B-v1',
'togethercomputer/RedPajama-INCITE-Base-3B-v1',
'togethercomputer/GPT-NeoXT-Chat-Base-20B',
'togethercomputer/Pythia-Chat-Base-7B',
'togethercomputer/GPT-JT-Moderation-6B',
'togethercomputer/GPT-JT-6B-v1',
'togethercomputer/GPT-JT-6B-v0'
)
mosaic_models_list = (
'mosaicml/mpt-7b-chat',
'mosaicml/mpt-30b-chat',
'mosaicml/mpt-7b-8k-chat',
'mosaicml/mpt-7b-instruct',
'mosaicml/mpt-7b-8k-instruct',
'mosaicml/mpt-7b-8k',
'mosaicml/mpt-30b',
'mosaicml/mpt-7b',
'mosaicml/mpt-7b-storywriter',
'mosaicml/mpt-30b-instruct',
'mosaicml/mpt-1b-redpajama-200b',
'mosaicml/mpt-1b-redpajama-200b-dolly'
)
h20ai_models_list = (
'h2oai/h2ogpt-16k-codellama-7b-python',
'h2oai/h2ogpt-16k-codellama-7b-instruct',
'h2oai/h2ogpt-16k-codellama-7b',
'h2oai/h2ogpt-16k-codellama-34b-python',
'h2oai/h2ogpt-16k-codellama-34b-instruct',
'h2oai/h2ogpt-16k-codellama-13b-python',
'h2oai/h2ogpt-16k-codellama-13b-instruct',
'h2oai/h2ogpt-16k-codellama-13b',
'h2oai/h2ogpt-16k-codellama-34b',
'h2oai/h2ogpt-4096-llama2-13b-chat',
'h2oai/h2ogpt-4096-llama2-70b-chat',
'h2oai/h2ogpt-4096-llama2-7b-chat',
'h2oai/h2ogpt-4096-llama2-13b',
'h2oai/h2ogpt-4096-llama2-7b',
'h2oai/h2ogpt-4096-llama2-70b',
'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2',
'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v3',
'h2oai/h2ogpt-research-oasst1-llama-65b',
'h2oai/h2ogpt-gm-oasst1-en-xgen-7b-8k',
'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-3b',
'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b',
'h2oai/h2ogpt-oasst1-falcon-40b',
'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1',
'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-13b',
'h2oai/h2ogpt-oig-oasst1-falcon-40b',
'h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b',
'h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b',
'h2oai/h2ogpt-oasst1-512-12b',
'h2oai/h2ogpt-oig-oasst1-256-6_9b',
'h2oai/h2ogpt-oig-oasst1-512-6_9b',
'h2oai/h2ogpt-research-oig-oasst1-512-30b',
'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-700bt',
'h2oai/h2ogpt-gm-oasst1-en-1024-open-llama-7b-preview-400bt',
'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2',
'h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt',
'h2oai/h2ogpt-gm-oasst1-en-1024-12b',
'h2oai/h2ogpt-gm-oasst1-en-1024-20b',
'h2oai/h2ogpt-gm-oasst1-multilang-1024-20b',
'h2oai/h2ogpt-oasst1-512-20b'
)
facebook_models_list = (
'facebook/xglm-4.5B',
'facebook/galactica-125m',
'facebook/opt-iml-1.3b',
'facebook/opt-iml-max-1.3b',
'facebook/opt-iml-max-30b',
'facebook/opt-iml-30b',
'facebook/galactica-120b',
'facebook/galactica-30b',
'facebook/galactica-6.7b',
'facebook/galactica-1.3b',
'facebook/opt-66b',
'facebook/opt-30b',
'facebook/opt-13b',
'facebook/opt-6.7b',
'facebook/opt-2.7b',
'facebook/opt-1.3b',
'facebook/opt-350m',
'facebook/opt-125m',
'facebook/incoder-1B',
'facebook/incoder-6B',
'facebook/xglm-7.5B',
'facebook/xglm-564M',
'facebook/xglm-2.9B',
'facebook/xglm-1.7B'
)
microsoft_models_list = (
"microsoft/dolly-v2-7b-olive-optimized",
"microsoft/lts-gpt2-sm",
"microsoft/BioGPT-Large",
"microsoft/BioGPT-Large-PubMedQA",
"microsoft/biogpt",
"microsoft/git-large-vatex",
"microsoft/git-base-msrvtt-qa",
"microsoft/git-base-vatex",
"microsoft/Promptist",
"microsoft/CodeGPT-small-py",
"microsoft/CodeGPT-small-py-adaptedGPT2",
"microsoft/CodeGPT-small-java",
"microsoft/CodeGPT-small-java-adaptedGPT2",
"microsoft/phi-1",
"microsoft/phi-1_5",
)
langboat_models_list = (
"Langboat/bloom-1b4-zh",
"Langboat/ReGPT-125M-200G",
"Langboat/bloom-6b4-zh",
"Langboat/bloom-2b5-zh",
"Langboat/bloom-800m-zh",
"Langboat/bloom-389m-zh",
"Langboat/mengzi-gpt-neo-base",
)
allenai_models_list = (
'allenai/bhaskara',
'allenai/open-instruct-pythia-6.9b-tulu',
'allenai/open-instruct-opt-6.7b-tulu',
'allenai/open-instruct-sni-13b',
'allenai/open-instruct-flan-v2-13b',
'allenai/open-instruct-dolly-13b',
'allenai/open-instruct-self-instruct-13b',
'allenai/open-instruct-self-instruct-7b',
'allenai/open-instruct-oasst1-13b',
'allenai/open-instruct-oasst1-7b',
'allenai/open-instruct-sni-7b',
'allenai/open-instruct-baize-13b',
'allenai/open-instruct-baize-7b',
'allenai/open-instruct-cot-13b',
'allenai/open-instruct-cot-7b',
'allenai/open-instruct-gpt4-alpaca-13b',
'allenai/open-instruct-gpt4-alpaca-7b',
'allenai/open-instruct-human-mix-7b',
'allenai/open-instruct-human-mix-13b',
'allenai/open-instruct-sharegpt-7b',
'allenai/open-instruct-sharegpt-13b',
'allenai/open-instruct-sharegpt-30b',
'allenai/open-instruct-human-mix-30b',
'allenai/tulu-7b',
'allenai/tulu-13b',
'allenai/tulu-30b',
'allenai/open-instruct-code-alpaca-13b',
'allenai/open-instruct-code-alpaca-7b',
'allenai/open-instruct-unnatural-instructions-13b',
'allenai/open-instruct-unnatural-instructions-7b',
'allenai/open-instruct-stanford-alpaca-13b',
'allenai/open-instruct-stanford-alpaca-7b',
'allenai/open-instruct-flan-v2-7b',
'allenai/open-instruct-dolly-7b',
'allenai/tulu-65b',
'allenai/open-instruct-sharegpt-65b',
'allenai/open-instruct-human-mix-65b'
)
deciai_models_list = (
"Deci/DeciLM-6b",
"Deci/DeciLM-6b-instruct",
"Deci/DeciCoder-1b",
)
salesforce_models_list = (
'Salesforce/codegen25-7b-multi',
'Salesforce/xgen-7b-4k-base',
'Salesforce/xgen-7b-8k-inst',
'Salesforce/xgen-7b-8k-base',
'Salesforce/codegen25-7b-mono',
'Salesforce/codegen25-7b-instruct',
'Salesforce/ctrl',
'Salesforce/codegen2-16B',
'Salesforce/codegen2-7B',
'Salesforce/codegen2-3_7B',
'Salesforce/codegen2-1B',
'Salesforce/codegen-350M-mono',
'Salesforce/codegen-350M-multi',
'Salesforce/codegen-16B-nl',
'Salesforce/codegen-2B-mono',
'Salesforce/codegen-16B-multi',
'Salesforce/codegen-2B-multi',
'Salesforce/codegen-6B-nl',
'Salesforce/codegen-2B-nl',
'Salesforce/codegen-350M-nl',
'Salesforce/codegen-6B-multi',
'Salesforce/codegen-6B-mono',
'Salesforce/codegen-16B-mono'
)
model_dict = {
"google": google_models_list,
"mbzuai": mbzuai_models_list,
"eleutherai": eleutherai_models_list,
"cerebras": cerebras_models_list,
"open_assistant": open_assistant_models_list,
"databricks": databricks_models_list,
"nomic_ai": nomic_ai_models_list,
"blinkdl": blinkdl_models_list,
"aisquared": ai_squared_models_list,
"bloom": bloom_models_list,
"tiiuae": tiiuae_models_list,
"openlm": openlm_models_list,
"stabilityai": stabilityai_models_list,
"lmsys": lmsys_models_list,
"together_computer": togethercomputer_models_list,
"mosaic_ml": mosaic_models_list,
"h20ai": h20ai_models_list,
"facebook": facebook_models_list,
"microsoft": microsoft_models_list,
"langboat": langboat_models_list,
"allenai": allenai_models_list,
"deciai": deciai_models_list,
"salesforce": salesforce_models_list
}
if count is True:
models_count = 0
for i in model_dict:
models_count += len(model_dict[i])
return models_count
return model_dict[model_parent]