File size: 948 Bytes
280d87f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import torch
from transformers import (AutoTokenizer, BertForTokenClassification,
                          get_linear_schedule_with_warmup)

# Assuming the JSON data is stored in a file named 'data.json'
DEFAULT_TEXT_ANNOTATION_FILE = "Datasets/Query/datasets_text.json"

# Pretrained model
pretrain_model_name = "vinai/phobert-base-v2"
tokenizer = AutoTokenizer.from_pretrained(pretrain_model_name)

# Hyperparameters for training
batch_size = 64  # Number of samples per batch
epochs = 50  # Number of training epochs
device = "cuda" if torch.cuda.is_available() else "cpu"  # Check if GPU is available
lr = 5e-5
eps = 1e-8
weight_decay= 1e-5

# Paths for saving the trained model and test response tags
model_saved_path = "Model_API\Saved_Model\key_ner_new_data_method"
model_load_path = "Model_API/Saved_Model/key_ner_new_data_method"
save_respone_tags_path = "Datasets/Query/answer_test.json"
onnx_path = "Model_API/Saved_Model/key_ner.onnx"