rashmi commited on
Commit
f4d8307
1 Parent(s): 700d0e6
Files changed (2) hide show
  1. app.py +56 -6
  2. model_finetuned/config.pth +0 -3
app.py CHANGED
@@ -34,7 +34,7 @@ title = "H2O AI Predict the LLM"
34
 
35
  description =" The objective of this [competition](https://www.kaggle.com/competitions/h2oai-predict-the-llm) was to \
36
  detect which out of 7 possible LLM models produced a particular response. \n\n\
37
- This demo is utilizing finetuned HuggingFaceH4/zephyr-7b-beta model for a multiclass classification task. \
38
  Our team's solution is [here](https://www.kaggle.com/competitions/h2oai-predict-the-llm/discussion/453728)"
39
 
40
  title = title + "\n" + description
@@ -163,11 +163,61 @@ model = CustomModel()
163
  ### End Load the model
164
 
165
  def do_inference(full_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
-
168
- return "result"
169
-
170
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
 
173
 
@@ -175,7 +225,7 @@ def do_inference(full_text):
175
  def do_submit(question, response):
176
  full_text = question + " " + response
177
  result = do_inference(full_text)
178
- return "result"
179
 
180
  @spaces.GPU
181
  def greet():
 
34
 
35
  description =" The objective of this [competition](https://www.kaggle.com/competitions/h2oai-predict-the-llm) was to \
36
  detect which out of 7 possible LLM models produced a particular response. \n\n\
37
+ This demo is utilizing finetuned HuggingFaceH4/zephyr-7b-beta model for a multiclass classification task. \n\n \
38
  Our team's solution is [here](https://www.kaggle.com/competitions/h2oai-predict-the-llm/discussion/453728)"
39
 
40
  title = title + "\n" + description
 
163
  ### End Load the model
164
 
165
  def do_inference(full_text):
166
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
167
+ model_paths = [
168
+ 'model_finetuned/HuggingFaceH4-zephyr-7b-beta_fold0_best.pth']
169
+
170
+ # config_path = ("/home/rashmi/Documents/kaggle/h2oai_predict_llm/src/models_exp56/config.pth")
171
+
172
+ def prepare_input(cfg, text):
173
+ inputs = cfg.tokenizer.encode_plus(
174
+ text,
175
+ return_tensors=None,
176
+ add_special_tokens=True,
177
+ max_length=CFG.max_len,
178
+ pad_to_max_length=True,
179
+ truncation="longest_first",
180
+ )
181
+ for k, v in inputs.items():
182
+ inputs[k] = torch.tensor(v, dtype=torch.long)
183
+ return inputs
184
+
185
+ # model = CustomModel()
186
+ state = torch.load(model_paths[0], map_location=torch.device("cpu"))
187
+ model.load_state_dict(state["model"]) # ,strict=False)
188
+ model.eval()
189
+ model.to(device)
190
+
191
+ inputs = prepare_input(CFG, full_text)
192
+ inputs["input_ids"] = inputs["input_ids"].reshape(1, -1).to(device)
193
+ inputs["attention_mask"] = inputs["attention_mask"].reshape(1, -1).to(device)
194
 
195
+ with torch.no_grad():
196
+ with torch.cuda.amp.autocast(
197
+ enabled=True, dtype=torch.float16, cache_enabled=True
198
+ ):
199
+ y_preds = model(inputs)
200
+ y_preds = y_preds.detach().to("cpu").numpy().astype(np.float32)
201
+ y_preds= torch.softmax(torch.tensor(y_preds), 1).numpy()
202
+
203
+ result = np.argmax(y_preds)
204
+
205
+ if result == 0:
206
+ return "0. llama2-70b-chat"
207
+ elif result == 1:
208
+ return "1. wizardLM-13b"
209
+ elif result == 2:
210
+ return "2. llama2-13b-chat"
211
+ elif result == 3:
212
+ return "3. wizardLM-70b"
213
+ elif result == 4:
214
+ return "4. llama2-7b-chat"
215
+ elif result == 5:
216
+ return "5. tinyllama-1b-chat"
217
+ elif result == 6:
218
+ return "6. mistral-7b-openorca"
219
+ else:
220
+ return "Error"
221
 
222
 
223
 
 
225
  def do_submit(question, response):
226
  full_text = question + " " + response
227
  result = do_inference(full_text)
228
+ return result
229
 
230
  @spaces.GPU
231
  def greet():
model_finetuned/config.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a170d96950730d29ea3f6fdc76b3beb9bc9806126ee0be945cffbc12419d2c9
3
- size 3356