Text Generation
Transformers
Safetensors
English
deberta
reward_model
reward-model
RLHF
evaluation
llm
instruction
reranking
Inference Endpoints
PairRM / ranker_config.json
Dongfu Jiang
Upload 9 files
7b9ee76
{
"ranker_type": "pairranker",
"model_type": "deberta",
"model_name": "microsoft/deberta-v3-large",
"cache_dir": "./hf_models/deberta-v3-large/",
"load_checkpoint": null,
"source_maxlength": 1224,
"candidate_maxlength": 412,
"n_tasks": 1,
"num_pos": 5,
"num_neg": 5,
"sub_sampling_mode": "all_pair",
"sub_sampling_ratio": 0.4,
"loss_type": "instructgpt",
"reduce_type": "linear",
"inference_mode": "bubble",
"drop_out": 0.05,
"fp16": true
}