File size: 2,587 Bytes
7cf0ad9 192a496 7cf0ad9 192a496 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
---
license: cc-by-nc-sa-4.0
language:
- 'no'
---
# Model Card
NorGPT-369M-NO-BoolQ-peft is trained on top of [NorGPT-369M](https://huggingface.co/NorGLM/NorGPT-369M) model on [NO-BoolQ](https://huggingface.co/datasets/NorGLM/NO-BoolQ) dataset.
Data format:
```
input: {passage}[SEP]{question}
label: {True, False} -> {1,0}
```
## Run the Model
```python
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
source_model_id = "NorGLM/NorGPT-369M"
peft_model_id = "NorGLM/NorGPT-369M-NO-BoolQ-peft"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(source_model_id, device_map='balanced')
tokenizer_max_len = 2048
tokenizer_config = {'pretrained_model_name_or_path': source_model_id,
'max_len': tokenizer_max_len}
tokenizer = tokenizer = AutoTokenizer.from_pretrained(**tokenizer_config)
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(model, peft_model_id)
```
## Inference Example
Load the model to evaluate on the validation set:
```python
def getDataSetFromFiles(df):
# convert dataset
df["text"] = df[["passage", "question"]].apply(lambda x: " [SEP] ".join(x.astype(str)), axis =1)
df = df.drop(["idx", "passage", "question"], axis=1)
#df['label'] = df['label'].replace({1:'contradiction', -1:'entailment', 0:'neutral'})
df["label"] = df.label.map({True: 1, False: 0})
return Dataset.from_pandas(df)
print("--LOADING EVAL DATAS---")
eval_data = load_dataset("NorGLM/NO-BoolQ", data_files="val.jsonl")
eval_data = getDataSetFromFiles(eval_data["train"].to_pandas())
print("--MAKING PREDICTIONS---")
model.eval()
y_true = []
y_pred = []
count = 0
for data in eval_data:
count = count + 1
if count % 100 == 0:
print(count)
inputs = tokenizer(data['text'], return_tensors="pt").to(torch_device)
with torch.no_grad():
logits = model(**inputs).logits
#print(logits)
predicted_class_id = logits.argmax().item()
y_true.append(data['label'])
y_pred.append(predicted_class_id)
print(y_pred)
print(f"Lenght of true_values: {len(y_true)}")
print(f"Lenght of predicted_values: {len(y_pred)}")
y_true = np.array(y_true)
y_pred = np.array(y_pred)
F_score = f1_score(y_true, y_pred, average="macro")
print(f"F1 score: {F_score}")
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy}")
```
## Note
More training details will be released soon! |