Text Generation
Transformers
PyTorch
English
llama
text-generation-inference
Inference Endpoints
winglian commited on
Commit
4c55828
1 Parent(s): 678c148

Create configs/openorca.yml

Browse files
Files changed (1) hide show
  1. configs/openorca.yml +67 -0
configs/openorca.yml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: huggyllama/llama-13b
2
+ base_model_config: huggyllama/llama-13b
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ load_in_8bit: false
6
+ load_in_4bit: false
7
+ strict: false
8
+ push_dataset_to_hub: openaccess-ai-collective
9
+ hf_use_auth_token: true
10
+ datasets:
11
+ - path: Open-Orca/oo-gpt4-200k
12
+ type: alpaca_w_system.load_open_orca
13
+ dataset_prepared_path: last_run_prepared
14
+ val_set_size: 0.05
15
+ adapter:
16
+ lora_model_dir:
17
+ sequence_len: 2048
18
+ max_packed_sequence_len:
19
+ lora_r:
20
+ lora_alpha:
21
+ lora_dropout:
22
+ lora_target_modules:
23
+ lora_target_linear:
24
+ lora_fan_in_fan_out:
25
+ wandb_project: open-orca-13b
26
+ wandb_watch:
27
+ wandb_run_id:
28
+ wandb_log_model:
29
+ output_dir: ./open-orca-13b
30
+ gradient_accumulation_steps: 1
31
+ micro_batch_size: 8
32
+ num_epochs: 5
33
+ optimizer: adamw_bnb_8bit
34
+ torchdistx_path:
35
+ lr_scheduler: cosine
36
+ learning_rate: 0.00005
37
+ train_on_inputs: false
38
+ group_by_length: true
39
+ bf16: true
40
+ fp16: false
41
+ tf32: true
42
+ gradient_checkpointing: true
43
+ early_stopping_patience:
44
+ resume_from_checkpoint:
45
+ local_rank:
46
+ logging_steps: 1
47
+ xformers_attention: true
48
+ flash_attention:
49
+ gptq_groupsize:
50
+ gptq_model_v1:
51
+ warmup_steps: 150
52
+ eval_steps: 495
53
+ save_steps: 2970
54
+ debug:
55
+ deepspeed:
56
+ weight_decay: 0.03
57
+ fsdp:
58
+ - full_shard
59
+ - auto_wrap
60
+ fsdp_config:
61
+ fsdp_offload_params: true
62
+ fsdp_state_dict_type: FULL_STATE_DICT
63
+ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
64
+ special_tokens:
65
+ bos_token: "<s>"
66
+ eos_token: "</s>"
67
+ unk_token: "<unk>"