Text Generation
Transformers
Safetensors
English
deberta
reward_model
reward-model
RLHF
evaluation
llm
instruction
reranking
Inference Endpoints
File size: 508 Bytes
7b9ee76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
{
    "ranker_type": "pairranker",
    "model_type": "deberta",
    "model_name": "microsoft/deberta-v3-large",
    "cache_dir": "./hf_models/deberta-v3-large/",
    "load_checkpoint": null,
    "source_maxlength": 1224,
    "candidate_maxlength": 412,
    "n_tasks": 1,
    "num_pos": 5,
    "num_neg": 5,
    "sub_sampling_mode": "all_pair",
    "sub_sampling_ratio": 0.4,
    "loss_type": "instructgpt",
    "reduce_type": "linear",
    "inference_mode": "bubble",
    "drop_out": 0.05,
    "fp16": true
}