oliver-aizip commited on
Commit
c5183c8
·
1 Parent(s): 491e00d

add a ton of models and update arena dataset

Browse files
Files changed (2) hide show
  1. utils/arena_df.csv +0 -0
  2. utils/models.py +13 -2
utils/arena_df.csv CHANGED
The diff for this file is too large to render. See raw diff
 
utils/models.py CHANGED
@@ -18,7 +18,14 @@ models = {
18
  "Phi-4-mini-instruct": "microsoft/phi-4-mini-instruct",
19
  #"Cogito-v1-preview-llama-3b": "deepcogito/cogito-v1-preview-llama-3b",
20
  "IBM Granite-3.3-2b-instruct": "ibm-granite/granite-3.3-2b-instruct",
21
- "Bitnet-b1.58-2B4T": "microsoft/bitnet-b1.58-2B-4T"
 
 
 
 
 
 
 
22
 
23
  }
24
 
@@ -94,6 +101,10 @@ def run_inference(model_name, context, question):
94
 
95
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
96
  result = ""
 
 
 
 
97
 
98
  try:
99
  tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", token=True)
@@ -114,10 +125,10 @@ def run_inference(model_name, context, question):
114
  model=model_name,
115
  tokenizer=tokenizer,
116
  device_map='auto',
117
- max_length=512,
118
  do_sample=True,
119
  temperature=0.6,
120
  top_p=0.9,
 
121
  )
122
 
123
  text_input = format_rag_prompt(question, context, accepts_sys)
 
18
  "Phi-4-mini-instruct": "microsoft/phi-4-mini-instruct",
19
  #"Cogito-v1-preview-llama-3b": "deepcogito/cogito-v1-preview-llama-3b",
20
  "IBM Granite-3.3-2b-instruct": "ibm-granite/granite-3.3-2b-instruct",
21
+ #"Bitnet-b1.58-2B4T": "microsoft/bitnet-b1.58-2B-4T",
22
+ "MiniCPM3-RAG-LoRA": "openbmb/MiniCPM3-RAG-LoRA",
23
+ "Qwen3-0.6b": "qwen/qwen3-0.6b",
24
+ "Qwen3-1.7b": "qwen/qwen3-1.7b",
25
+ "Qwen3-4b": "qwen/qwen3-4b",
26
+ "SmolLM2-1.7b-Instruct": "huggingfacetb/smolllm2-1.7b-instruct",
27
+ "EXAONE-3.5-2.4B-instruct": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct",
28
+ "OLMo-2-1B-Instruct": "allenai/OLMo-2-0425-1B-Instruct",
29
 
30
  }
31
 
 
101
 
102
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
103
  result = ""
104
+ model_kwargs = {} # make sure qwen3 doesn't use thinking
105
+ if "qwen3" in model_name.lower(): # Making it case-insensitive and checking for substring
106
+ print(f"Recognized {model_name} as a Qwen3 model. Setting enable_thinking=False.")
107
+ model_kwargs["enable_thinking"] = False
108
 
109
  try:
110
  tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left", token=True)
 
125
  model=model_name,
126
  tokenizer=tokenizer,
127
  device_map='auto',
 
128
  do_sample=True,
129
  temperature=0.6,
130
  top_p=0.9,
131
+ model_kwargs=model_kwargs,
132
  )
133
 
134
  text_input = format_rag_prompt(question, context, accepts_sys)