jordiclive commited on
Commit
2069da8
1 Parent(s): 1ea9dde

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -51,7 +51,7 @@ The model was trained with flash attention and gradient checkpointing and deepsp
51
  - Batch size: 128
52
  - Max Length: 2048
53
  - Learning rate: 5e-5
54
- - Lora _r_: 16
55
  - Lora Alpha: 32
56
 
57
  ## Prompting
@@ -80,7 +80,7 @@ from transformers import GenerationConfig
80
 
81
  device = "cuda" if torch.cuda.is_available() else "cpu"
82
  dtype = torch.float16
83
- repo_id = "jordiclive/alpaca_gpt4-dolly_15k-vicuna-lora-30b-r64"
84
  base_model = "decapoda-research/llama-30b-hf"
85
 
86
  # Model Loading
 
51
  - Batch size: 128
52
  - Max Length: 2048
53
  - Learning rate: 5e-5
54
+ - Lora _r_: 64
55
  - Lora Alpha: 32
56
 
57
  ## Prompting
 
80
 
81
  device = "cuda" if torch.cuda.is_available() else "cpu"
82
  dtype = torch.float16
83
+ repo_id = "jordiclive/lora-llama-33B-alpaca_gpt4-dolly_15k-vicuna-r64"
84
  base_model = "decapoda-research/llama-30b-hf"
85
 
86
  # Model Loading