Spaces:

rashmi
/

h2oai-predict-llm

Sleeping

App Files Files Community

rashmi commited on Nov 9, 2023

Commit

f4d8307

1 Parent(s): 700d0e6

update

Browse files

Files changed (2) hide show

app.py +56 -6
model_finetuned/config.pth +0 -3

app.py CHANGED Viewed

@@ -34,7 +34,7 @@ title = "H2O AI Predict the LLM"
 description =" The objective of this [competition](https://www.kaggle.com/competitions/h2oai-predict-the-llm) was to \
 detect which out of 7 possible LLM models produced a particular response. \n\n\
-This demo is utilizing finetuned HuggingFaceH4/zephyr-7b-beta model for a multiclass classification task. \
 Our team's solution is [here](https://www.kaggle.com/competitions/h2oai-predict-the-llm/discussion/453728)"
 title = title + "\n" + description
@@ -163,11 +163,61 @@ model = CustomModel()
 ### End Load the model
 def do_inference(full_text):
-    return "result"
@@ -175,7 +225,7 @@ def do_inference(full_text):
 def do_submit(question, response):
     full_text = question + " " + response
     result = do_inference(full_text)
-    return "result"
 @spaces.GPU
 def greet():

 description =" The objective of this [competition](https://www.kaggle.com/competitions/h2oai-predict-the-llm) was to \
 detect which out of 7 possible LLM models produced a particular response. \n\n\
+This demo is utilizing finetuned HuggingFaceH4/zephyr-7b-beta model for a multiclass classification task. \n\n \
 Our team's solution is [here](https://www.kaggle.com/competitions/h2oai-predict-the-llm/discussion/453728)"
 title = title + "\n" + description
 ### End Load the model
 def do_inference(full_text):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model_paths = [
+    'model_finetuned/HuggingFaceH4-zephyr-7b-beta_fold0_best.pth']
+    # config_path = ("/home/rashmi/Documents/kaggle/h2oai_predict_llm/src/models_exp56/config.pth")
+    def prepare_input(cfg, text):
+        inputs = cfg.tokenizer.encode_plus(
+            text,
+            return_tensors=None,
+            add_special_tokens=True,
+            max_length=CFG.max_len,
+            pad_to_max_length=True,
+            truncation="longest_first",
+        )
+        for k, v in inputs.items():
+            inputs[k] = torch.tensor(v, dtype=torch.long)
+        return inputs
+    # model = CustomModel()
+    state = torch.load(model_paths[0], map_location=torch.device("cpu"))
+    model.load_state_dict(state["model"])  # ,strict=False)
+    model.eval()
+    model.to(device)
+    inputs = prepare_input(CFG, full_text)
+    inputs["input_ids"] = inputs["input_ids"].reshape(1, -1).to(device)
+    inputs["attention_mask"] = inputs["attention_mask"].reshape(1, -1).to(device)
+    with torch.no_grad():
+        with torch.cuda.amp.autocast(
+            enabled=True, dtype=torch.float16, cache_enabled=True
+        ):
+            y_preds = model(inputs)
+    y_preds = y_preds.detach().to("cpu").numpy().astype(np.float32)
+    y_preds=  torch.softmax(torch.tensor(y_preds), 1).numpy()
+    result = np.argmax(y_preds)
+    if result == 0:
+        return "0. llama2-70b-chat"
+    elif result == 1:
+        return "1. wizardLM-13b"
+    elif result == 2:
+        return "2. llama2-13b-chat"
+    elif result == 3:
+        return "3. wizardLM-70b"
+    elif result == 4:
+        return "4. llama2-7b-chat"
+    elif result == 5:
+        return "5. tinyllama-1b-chat"
+    elif result == 6:
+        return "6. mistral-7b-openorca"
+    else:
+        return "Error"
 def do_submit(question, response):
     full_text = question + " " + response
     result = do_inference(full_text)
+    return result
 @spaces.GPU
 def greet():

model_finetuned/config.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7a170d96950730d29ea3f6fdc76b3beb9bc9806126ee0be945cffbc12419d2c9
-size 3356