Vivek
/

gptneo_storycloze

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

Vivek commited on Jul 27, 2021

Commit

853d225

·

1 Parent(s): 0972377

3e-7 lr update

Files changed (2) hide show

.DS_Store +0 -0
src/gptneo_story.py +1 -1

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

src/gptneo_story.py CHANGED Viewed

@@ -75,7 +75,7 @@ print('The overall batch size (both for training and eval) is', total_batch_size
 num_train_steps = len(train_dataset) // total_batch_size * num_train_epochs
 num_validation_steps=len(validation_dataset)//total_batch_size*num_train_epochs
-learning_rate_function = optax.linear_schedule(init_value=learning_rate, end_value=0, transition_steps=num_train_steps)
 class TrainState(train_state.TrainState):
   logits_function:Callable=flax.struct.field(pytree_node=False)

 num_train_steps = len(train_dataset) // total_batch_size * num_train_epochs
 num_validation_steps=len(validation_dataset)//total_batch_size*num_train_epochs
+learning_rate_function = optax.linear_schedule(init_value=learning_rate, end_value=3e-7, transition_steps=num_train_steps)
 class TrainState(train_state.TrainState):
   logits_function:Callable=flax.struct.field(pytree_node=False)