Image-Text-to-Text
Transformers
Safetensors
English
qwen2_5_vl
ocr
vision
qwen2.5-vl
pdf
document-understanding
conversational
text-generation-inference
Instructions to use rootsautomation/GutenOCR-7B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use rootsautomation/GutenOCR-7B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="rootsautomation/GutenOCR-7B") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("rootsautomation/GutenOCR-7B") model = AutoModelForImageTextToText.from_pretrained("rootsautomation/GutenOCR-7B") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use rootsautomation/GutenOCR-7B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "rootsautomation/GutenOCR-7B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "rootsautomation/GutenOCR-7B", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/rootsautomation/GutenOCR-7B
- SGLang
How to use rootsautomation/GutenOCR-7B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "rootsautomation/GutenOCR-7B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "rootsautomation/GutenOCR-7B", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "rootsautomation/GutenOCR-7B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "rootsautomation/GutenOCR-7B", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use rootsautomation/GutenOCR-7B with Docker Model Runner:
docker model run hf.co/rootsautomation/GutenOCR-7B
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.125, | |
| "eval_steps": 500, | |
| "global_step": 2080, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0, | |
| "eval_loss": 0.5193641781806946, | |
| "eval_runtime": 15.6757, | |
| "eval_samples_per_second": 2.041, | |
| "eval_steps_per_second": 0.255, | |
| "step": 0 | |
| }, | |
| { | |
| "epoch": 0.0006009615384615385, | |
| "grad_norm": 55.403633140710234, | |
| "learning_rate": 9e-09, | |
| "loss": 0.5113, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.001201923076923077, | |
| "grad_norm": 23.47426110450185, | |
| "learning_rate": 1.8999999999999998e-08, | |
| "loss": 0.4385, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0018028846153846155, | |
| "grad_norm": 10.224385416254647, | |
| "learning_rate": 2.9e-08, | |
| "loss": 0.4586, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.002403846153846154, | |
| "grad_norm": 20.74032508741799, | |
| "learning_rate": 3.9e-08, | |
| "loss": 0.4234, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0030048076923076925, | |
| "grad_norm": 21.168707763774822, | |
| "learning_rate": 4.9e-08, | |
| "loss": 0.526, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.003605769230769231, | |
| "grad_norm": 22.648636815908606, | |
| "learning_rate": 5.899999999999999e-08, | |
| "loss": 0.5596, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.004206730769230769, | |
| "grad_norm": 13.508051762249275, | |
| "learning_rate": 6.900000000000001e-08, | |
| "loss": 0.4981, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.004807692307692308, | |
| "grad_norm": 23.403551137207025, | |
| "learning_rate": 7.899999999999999e-08, | |
| "loss": 0.5008, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.005408653846153846, | |
| "grad_norm": 48.6142950712791, | |
| "learning_rate": 8.899999999999999e-08, | |
| "loss": 0.4585, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.006009615384615385, | |
| "grad_norm": 8.884428417822688, | |
| "learning_rate": 9.9e-08, | |
| "loss": 0.4388, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.006610576923076923, | |
| "grad_norm": 9.705307595727518, | |
| "learning_rate": 1.09e-07, | |
| "loss": 0.4471, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.007211538461538462, | |
| "grad_norm": 30.306963311413714, | |
| "learning_rate": 1.19e-07, | |
| "loss": 0.4293, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0078125, | |
| "grad_norm": 14.67235522750005, | |
| "learning_rate": 1.29e-07, | |
| "loss": 0.4955, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.008413461538461538, | |
| "grad_norm": 41.261340367060896, | |
| "learning_rate": 1.3900000000000001e-07, | |
| "loss": 0.5347, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.009014423076923076, | |
| "grad_norm": 28.145775622131612, | |
| "learning_rate": 1.49e-07, | |
| "loss": 0.4671, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.009615384615384616, | |
| "grad_norm": 13.92342254703078, | |
| "learning_rate": 1.59e-07, | |
| "loss": 0.4318, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.010216346153846154, | |
| "grad_norm": 42.72542828466139, | |
| "learning_rate": 1.69e-07, | |
| "loss": 0.4232, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.010817307692307692, | |
| "grad_norm": 18.910952118646854, | |
| "learning_rate": 1.7899999999999997e-07, | |
| "loss": 0.5434, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.01141826923076923, | |
| "grad_norm": 9.945569021169186, | |
| "learning_rate": 1.8899999999999999e-07, | |
| "loss": 0.5091, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.01201923076923077, | |
| "grad_norm": 12.064577086348633, | |
| "learning_rate": 1.99e-07, | |
| "loss": 0.4811, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.012620192307692308, | |
| "grad_norm": 21.22554429640729, | |
| "learning_rate": 2.0899999999999998e-07, | |
| "loss": 0.486, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.013221153846153846, | |
| "grad_norm": 51.114586227567436, | |
| "learning_rate": 2.19e-07, | |
| "loss": 0.5489, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.013822115384615384, | |
| "grad_norm": 33.767338939816305, | |
| "learning_rate": 2.29e-07, | |
| "loss": 0.5006, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.014423076923076924, | |
| "grad_norm": 20.989645421924674, | |
| "learning_rate": 2.3899999999999996e-07, | |
| "loss": 0.5026, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.015024038461538462, | |
| "grad_norm": 46.94572882286708, | |
| "learning_rate": 2.4899999999999997e-07, | |
| "loss": 0.4662, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.015625, | |
| "grad_norm": 16.1857675192416, | |
| "learning_rate": 2.59e-07, | |
| "loss": 0.4428, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.01622596153846154, | |
| "grad_norm": 22.356923049427976, | |
| "learning_rate": 2.69e-07, | |
| "loss": 0.466, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.016826923076923076, | |
| "grad_norm": 27.919776058404576, | |
| "learning_rate": 2.79e-07, | |
| "loss": 0.5299, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.017427884615384616, | |
| "grad_norm": 20.374817697720214, | |
| "learning_rate": 2.8899999999999995e-07, | |
| "loss": 0.5575, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.018028846153846152, | |
| "grad_norm": 11.1596095155468, | |
| "learning_rate": 2.9899999999999996e-07, | |
| "loss": 0.5476, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.018629807692307692, | |
| "grad_norm": 18.15970681497743, | |
| "learning_rate": 3.09e-07, | |
| "loss": 0.521, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.019230769230769232, | |
| "grad_norm": 7.209018602994909, | |
| "learning_rate": 3.19e-07, | |
| "loss": 0.4647, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.019831730769230768, | |
| "grad_norm": 14.837068682221664, | |
| "learning_rate": 3.29e-07, | |
| "loss": 0.4864, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.020432692307692308, | |
| "grad_norm": 14.235741132745929, | |
| "learning_rate": 3.39e-07, | |
| "loss": 0.4107, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.021033653846153848, | |
| "grad_norm": 27.277651971922182, | |
| "learning_rate": 3.4899999999999996e-07, | |
| "loss": 0.4536, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.021634615384615384, | |
| "grad_norm": 17.10480760395017, | |
| "learning_rate": 3.5899999999999997e-07, | |
| "loss": 0.4289, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.022235576923076924, | |
| "grad_norm": 29.929679141274484, | |
| "learning_rate": 3.69e-07, | |
| "loss": 0.4647, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.02283653846153846, | |
| "grad_norm": 10.398612915945172, | |
| "learning_rate": 3.79e-07, | |
| "loss": 0.5167, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.0234375, | |
| "grad_norm": 9.769808118666157, | |
| "learning_rate": 3.89e-07, | |
| "loss": 0.4793, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.02403846153846154, | |
| "grad_norm": 18.64004530242777, | |
| "learning_rate": 3.99e-07, | |
| "loss": 0.4865, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.024639423076923076, | |
| "grad_norm": 17.699474376401422, | |
| "learning_rate": 4.0899999999999997e-07, | |
| "loss": 0.4727, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.025240384615384616, | |
| "grad_norm": 11.83869387760017, | |
| "learning_rate": 4.19e-07, | |
| "loss": 0.4803, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.025841346153846152, | |
| "grad_norm": 52.6903160822148, | |
| "learning_rate": 4.29e-07, | |
| "loss": 0.4587, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.026442307692307692, | |
| "grad_norm": 8.207347232374364, | |
| "learning_rate": 4.39e-07, | |
| "loss": 0.4849, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.027043269230769232, | |
| "grad_norm": 30.073558497443987, | |
| "learning_rate": 4.49e-07, | |
| "loss": 0.5023, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.027644230769230768, | |
| "grad_norm": 10.373186832780206, | |
| "learning_rate": 4.59e-07, | |
| "loss": 0.4538, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.028245192307692308, | |
| "grad_norm": 25.960320061465094, | |
| "learning_rate": 4.689999999999999e-07, | |
| "loss": 0.4712, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.028846153846153848, | |
| "grad_norm": 12.862144045179527, | |
| "learning_rate": 4.79e-07, | |
| "loss": 0.4704, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.029447115384615384, | |
| "grad_norm": 21.599196101058833, | |
| "learning_rate": 4.89e-07, | |
| "loss": 0.4695, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.030048076923076924, | |
| "grad_norm": 26.447052889694493, | |
| "learning_rate": 4.99e-07, | |
| "loss": 0.4574, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03064903846153846, | |
| "grad_norm": 19.343061960700307, | |
| "learning_rate": 5.09e-07, | |
| "loss": 0.4259, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.03125, | |
| "grad_norm": 55.39351897669441, | |
| "learning_rate": 5.19e-07, | |
| "loss": 0.4362, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.031850961538461536, | |
| "grad_norm": 9.138204788385957, | |
| "learning_rate": 5.29e-07, | |
| "loss": 0.4519, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.03245192307692308, | |
| "grad_norm": 9.17406599995409, | |
| "learning_rate": 5.39e-07, | |
| "loss": 0.4639, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.033052884615384616, | |
| "grad_norm": 42.607781411989706, | |
| "learning_rate": 5.490000000000001e-07, | |
| "loss": 0.4302, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.03365384615384615, | |
| "grad_norm": 10.368759637182924, | |
| "learning_rate": 5.590000000000001e-07, | |
| "loss": 0.4107, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.034254807692307696, | |
| "grad_norm": 12.31592718573613, | |
| "learning_rate": 5.69e-07, | |
| "loss": 0.4403, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.03485576923076923, | |
| "grad_norm": 13.670395980874881, | |
| "learning_rate": 5.79e-07, | |
| "loss": 0.4294, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.03545673076923077, | |
| "grad_norm": 16.653414422369462, | |
| "learning_rate": 5.89e-07, | |
| "loss": 0.439, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.036057692307692304, | |
| "grad_norm": 10.215056425825546, | |
| "learning_rate": 5.989999999999999e-07, | |
| "loss": 0.4069, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.03665865384615385, | |
| "grad_norm": 31.589724664408692, | |
| "learning_rate": 6.089999999999999e-07, | |
| "loss": 0.4354, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.037259615384615384, | |
| "grad_norm": 21.847510353862578, | |
| "learning_rate": 6.189999999999999e-07, | |
| "loss": 0.4638, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.03786057692307692, | |
| "grad_norm": 8.076334096727454, | |
| "learning_rate": 6.289999999999999e-07, | |
| "loss": 0.4312, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.038461538461538464, | |
| "grad_norm": 22.52385975163401, | |
| "learning_rate": 6.389999999999999e-07, | |
| "loss": 0.4364, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.0390625, | |
| "grad_norm": 13.204088595346095, | |
| "learning_rate": 6.49e-07, | |
| "loss": 0.4231, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.039663461538461536, | |
| "grad_norm": 33.19312446450432, | |
| "learning_rate": 6.59e-07, | |
| "loss": 0.4301, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.04026442307692308, | |
| "grad_norm": 9.715044908453299, | |
| "learning_rate": 6.69e-07, | |
| "loss": 0.4201, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.040865384615384616, | |
| "grad_norm": 10.574912363532471, | |
| "learning_rate": 6.79e-07, | |
| "loss": 0.4547, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.04146634615384615, | |
| "grad_norm": 22.39620841168723, | |
| "learning_rate": 6.889999999999999e-07, | |
| "loss": 0.4493, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.042067307692307696, | |
| "grad_norm": 8.058186609469454, | |
| "learning_rate": 6.989999999999999e-07, | |
| "loss": 0.3926, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.04266826923076923, | |
| "grad_norm": 21.022586695480996, | |
| "learning_rate": 7.089999999999999e-07, | |
| "loss": 0.4393, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.04326923076923077, | |
| "grad_norm": 12.66012747128014, | |
| "learning_rate": 7.189999999999999e-07, | |
| "loss": 0.4359, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.043870192307692304, | |
| "grad_norm": 5.937649569363215, | |
| "learning_rate": 7.289999999999999e-07, | |
| "loss": 0.4277, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.04447115384615385, | |
| "grad_norm": 16.798833074370908, | |
| "learning_rate": 7.389999999999999e-07, | |
| "loss": 0.446, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.045072115384615384, | |
| "grad_norm": 26.4182799886464, | |
| "learning_rate": 7.489999999999999e-07, | |
| "loss": 0.4737, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.04567307692307692, | |
| "grad_norm": 13.91407808362797, | |
| "learning_rate": 7.59e-07, | |
| "loss": 0.4796, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.046274038461538464, | |
| "grad_norm": 16.734955748274775, | |
| "learning_rate": 7.69e-07, | |
| "loss": 0.4678, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.046875, | |
| "grad_norm": 11.89547913614258, | |
| "learning_rate": 7.79e-07, | |
| "loss": 0.4536, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.047475961538461536, | |
| "grad_norm": 16.860857694776378, | |
| "learning_rate": 7.89e-07, | |
| "loss": 0.422, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.04807692307692308, | |
| "grad_norm": 20.294362719824335, | |
| "learning_rate": 7.99e-07, | |
| "loss": 0.4675, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.048677884615384616, | |
| "grad_norm": 32.163806042850815, | |
| "learning_rate": 8.09e-07, | |
| "loss": 0.4162, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.04927884615384615, | |
| "grad_norm": 8.835869696589675, | |
| "learning_rate": 8.189999999999999e-07, | |
| "loss": 0.4531, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.049879807692307696, | |
| "grad_norm": 27.004489539541865, | |
| "learning_rate": 8.289999999999999e-07, | |
| "loss": 0.46, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.05048076923076923, | |
| "grad_norm": 10.039120612814386, | |
| "learning_rate": 8.389999999999999e-07, | |
| "loss": 0.4351, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.05108173076923077, | |
| "grad_norm": 25.890820231883552, | |
| "learning_rate": 8.489999999999999e-07, | |
| "loss": 0.4477, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.051682692307692304, | |
| "grad_norm": 21.84065301973945, | |
| "learning_rate": 8.59e-07, | |
| "loss": 0.4028, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.05228365384615385, | |
| "grad_norm": 17.894672539322194, | |
| "learning_rate": 8.69e-07, | |
| "loss": 0.4231, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.052884615384615384, | |
| "grad_norm": 33.13377776596334, | |
| "learning_rate": 8.79e-07, | |
| "loss": 0.4483, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.05348557692307692, | |
| "grad_norm": 57.77962521420785, | |
| "learning_rate": 8.89e-07, | |
| "loss": 0.4528, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.054086538461538464, | |
| "grad_norm": 14.605643747393765, | |
| "learning_rate": 8.99e-07, | |
| "loss": 0.4183, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0546875, | |
| "grad_norm": 30.921239622195746, | |
| "learning_rate": 9.09e-07, | |
| "loss": 0.4299, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.055288461538461536, | |
| "grad_norm": 14.136816366550216, | |
| "learning_rate": 9.19e-07, | |
| "loss": 0.4105, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.05588942307692308, | |
| "grad_norm": 11.231190902163389, | |
| "learning_rate": 9.29e-07, | |
| "loss": 0.4185, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.056490384615384616, | |
| "grad_norm": 33.801234798562184, | |
| "learning_rate": 9.389999999999999e-07, | |
| "loss": 0.4541, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.05709134615384615, | |
| "grad_norm": 17.846337242420834, | |
| "learning_rate": 9.489999999999999e-07, | |
| "loss": 0.4178, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.057692307692307696, | |
| "grad_norm": 19.667393686818674, | |
| "learning_rate": 9.589999999999998e-07, | |
| "loss": 0.4446, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.05829326923076923, | |
| "grad_norm": 10.919623781198071, | |
| "learning_rate": 9.69e-07, | |
| "loss": 0.4745, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.05889423076923077, | |
| "grad_norm": 27.87766553645535, | |
| "learning_rate": 9.789999999999999e-07, | |
| "loss": 0.4779, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.059495192307692304, | |
| "grad_norm": 32.8338113927267, | |
| "learning_rate": 9.89e-07, | |
| "loss": 0.4962, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.06009615384615385, | |
| "grad_norm": 54.56857085663595, | |
| "learning_rate": 9.989999999999999e-07, | |
| "loss": 0.4523, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.060697115384615384, | |
| "grad_norm": 67.84354578257332, | |
| "learning_rate": 9.994245524296674e-07, | |
| "loss": 0.3837, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.06129807692307692, | |
| "grad_norm": 23.915401977778863, | |
| "learning_rate": 9.987851662404092e-07, | |
| "loss": 0.4318, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.061899038461538464, | |
| "grad_norm": 18.692398541022605, | |
| "learning_rate": 9.981457800511507e-07, | |
| "loss": 0.4429, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.0625, | |
| "grad_norm": 8.96899175552455, | |
| "learning_rate": 9.975063938618924e-07, | |
| "loss": 0.4265, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.06310096153846154, | |
| "grad_norm": 67.76708100008346, | |
| "learning_rate": 9.968670076726342e-07, | |
| "loss": 0.4004, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.06370192307692307, | |
| "grad_norm": 14.95483642619465, | |
| "learning_rate": 9.962276214833759e-07, | |
| "loss": 0.4134, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.06430288461538461, | |
| "grad_norm": 11.936205292399153, | |
| "learning_rate": 9.955882352941176e-07, | |
| "loss": 0.4206, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.06490384615384616, | |
| "grad_norm": 8.144208338195012, | |
| "learning_rate": 9.949488491048593e-07, | |
| "loss": 0.4, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.0655048076923077, | |
| "grad_norm": 19.659029674044277, | |
| "learning_rate": 9.94309462915601e-07, | |
| "loss": 0.3855, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.06610576923076923, | |
| "grad_norm": 14.97116772003002, | |
| "learning_rate": 9.936700767263426e-07, | |
| "loss": 0.402, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.06670673076923077, | |
| "grad_norm": 147.02140982947978, | |
| "learning_rate": 9.930306905370843e-07, | |
| "loss": 0.481, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.0673076923076923, | |
| "grad_norm": 43.55065984134183, | |
| "learning_rate": 9.92391304347826e-07, | |
| "loss": 0.4902, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.06790865384615384, | |
| "grad_norm": 7.879535552657374, | |
| "learning_rate": 9.917519181585678e-07, | |
| "loss": 0.4542, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.06850961538461539, | |
| "grad_norm": 8.151701209609962, | |
| "learning_rate": 9.911125319693095e-07, | |
| "loss": 0.414, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.06911057692307693, | |
| "grad_norm": 14.637959734877429, | |
| "learning_rate": 9.904731457800513e-07, | |
| "loss": 0.3782, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.06971153846153846, | |
| "grad_norm": 4.501693875744606, | |
| "learning_rate": 9.898337595907928e-07, | |
| "loss": 0.3849, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.0703125, | |
| "grad_norm": 35.230763684058516, | |
| "learning_rate": 9.891943734015345e-07, | |
| "loss": 0.4404, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.07091346153846154, | |
| "grad_norm": 12.442031715529945, | |
| "learning_rate": 9.885549872122762e-07, | |
| "loss": 0.4576, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.07151442307692307, | |
| "grad_norm": 18.8069021556737, | |
| "learning_rate": 9.879156010230177e-07, | |
| "loss": 0.4832, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.07211538461538461, | |
| "grad_norm": 15.855907967671428, | |
| "learning_rate": 9.872762148337595e-07, | |
| "loss": 0.4352, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.07271634615384616, | |
| "grad_norm": 14.369715804290616, | |
| "learning_rate": 9.866368286445012e-07, | |
| "loss": 0.4028, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.0733173076923077, | |
| "grad_norm": 177.80186681538973, | |
| "learning_rate": 9.85997442455243e-07, | |
| "loss": 0.4571, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.07391826923076923, | |
| "grad_norm": 11.84969824290971, | |
| "learning_rate": 9.853580562659845e-07, | |
| "loss": 0.4467, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.07451923076923077, | |
| "grad_norm": 68.42006681557945, | |
| "learning_rate": 9.847186700767262e-07, | |
| "loss": 0.4505, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.0751201923076923, | |
| "grad_norm": 16.098276335781833, | |
| "learning_rate": 9.84079283887468e-07, | |
| "loss": 0.4319, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.07572115384615384, | |
| "grad_norm": 16.956837398008382, | |
| "learning_rate": 9.834398976982096e-07, | |
| "loss": 0.4748, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.07632211538461539, | |
| "grad_norm": 13.890105636051898, | |
| "learning_rate": 9.828005115089514e-07, | |
| "loss": 0.4738, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 14.52112888511607, | |
| "learning_rate": 9.821611253196931e-07, | |
| "loss": 0.4436, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.07752403846153846, | |
| "grad_norm": 14.482587862628632, | |
| "learning_rate": 9.815217391304348e-07, | |
| "loss": 0.4213, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.078125, | |
| "grad_norm": 15.66954308966829, | |
| "learning_rate": 9.808823529411764e-07, | |
| "loss": 0.4889, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.07872596153846154, | |
| "grad_norm": 33.3147760442157, | |
| "learning_rate": 9.80242966751918e-07, | |
| "loss": 0.4395, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.07932692307692307, | |
| "grad_norm": 31.576191017602305, | |
| "learning_rate": 9.796035805626598e-07, | |
| "loss": 0.3946, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.07992788461538461, | |
| "grad_norm": 15.62093132771886, | |
| "learning_rate": 9.789641943734016e-07, | |
| "loss": 0.4729, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.08052884615384616, | |
| "grad_norm": 51.855714781154056, | |
| "learning_rate": 9.783248081841433e-07, | |
| "loss": 0.4652, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.0811298076923077, | |
| "grad_norm": 84.97065453956941, | |
| "learning_rate": 9.77685421994885e-07, | |
| "loss": 0.4229, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.08173076923076923, | |
| "grad_norm": 25.452407688499903, | |
| "learning_rate": 9.770460358056265e-07, | |
| "loss": 0.4064, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.08233173076923077, | |
| "grad_norm": 18.263105399640835, | |
| "learning_rate": 9.764066496163683e-07, | |
| "loss": 0.4564, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.0829326923076923, | |
| "grad_norm": 24.334822911726036, | |
| "learning_rate": 9.7576726342711e-07, | |
| "loss": 0.4071, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.08353365384615384, | |
| "grad_norm": 11.382524106662059, | |
| "learning_rate": 9.751278772378515e-07, | |
| "loss": 0.4332, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.08413461538461539, | |
| "grad_norm": 54.06425057509178, | |
| "learning_rate": 9.744884910485932e-07, | |
| "loss": 0.4505, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.08473557692307693, | |
| "grad_norm": 5.076664593001253, | |
| "learning_rate": 9.73849104859335e-07, | |
| "loss": 0.4453, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.08533653846153846, | |
| "grad_norm": 21.527761337764254, | |
| "learning_rate": 9.732097186700767e-07, | |
| "loss": 0.4719, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.0859375, | |
| "grad_norm": 36.50870455739431, | |
| "learning_rate": 9.725703324808182e-07, | |
| "loss": 0.4512, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.08653846153846154, | |
| "grad_norm": 12.540139357753157, | |
| "learning_rate": 9.7193094629156e-07, | |
| "loss": 0.4548, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.08713942307692307, | |
| "grad_norm": 15.096306703319206, | |
| "learning_rate": 9.712915601023017e-07, | |
| "loss": 0.5067, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.08774038461538461, | |
| "grad_norm": 88.89595612518276, | |
| "learning_rate": 9.706521739130434e-07, | |
| "loss": 0.469, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.08834134615384616, | |
| "grad_norm": 24.05245110004213, | |
| "learning_rate": 9.700127877237851e-07, | |
| "loss": 0.4336, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.0889423076923077, | |
| "grad_norm": 7.772235228925758, | |
| "learning_rate": 9.693734015345269e-07, | |
| "loss": 0.39, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.08954326923076923, | |
| "grad_norm": 49.178387681040576, | |
| "learning_rate": 9.687340153452686e-07, | |
| "loss": 0.4151, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.09014423076923077, | |
| "grad_norm": 9.880025918953056, | |
| "learning_rate": 9.680946291560101e-07, | |
| "loss": 0.4437, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.0907451923076923, | |
| "grad_norm": 84.04802965246304, | |
| "learning_rate": 9.674552429667519e-07, | |
| "loss": 0.4544, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.09134615384615384, | |
| "grad_norm": 23.879300812095252, | |
| "learning_rate": 9.668158567774936e-07, | |
| "loss": 0.4778, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.09194711538461539, | |
| "grad_norm": 8.74377800352843, | |
| "learning_rate": 9.661764705882353e-07, | |
| "loss": 0.4424, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.09254807692307693, | |
| "grad_norm": 25.340394685074983, | |
| "learning_rate": 9.65537084398977e-07, | |
| "loss": 0.4457, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.09314903846153846, | |
| "grad_norm": 24.377319461101425, | |
| "learning_rate": 9.648976982097188e-07, | |
| "loss": 0.4145, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.09375, | |
| "grad_norm": 27.44647420729, | |
| "learning_rate": 9.642583120204603e-07, | |
| "loss": 0.3954, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.09435096153846154, | |
| "grad_norm": 9.525464831385005, | |
| "learning_rate": 9.63618925831202e-07, | |
| "loss": 0.3716, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.09495192307692307, | |
| "grad_norm": 27.349989017536107, | |
| "learning_rate": 9.629795396419438e-07, | |
| "loss": 0.3668, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.09555288461538461, | |
| "grad_norm": 19.197820909162925, | |
| "learning_rate": 9.623401534526855e-07, | |
| "loss": 0.4735, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.09615384615384616, | |
| "grad_norm": 14.821828935472933, | |
| "learning_rate": 9.61700767263427e-07, | |
| "loss": 0.4551, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.0967548076923077, | |
| "grad_norm": 10.183698117047507, | |
| "learning_rate": 9.610613810741687e-07, | |
| "loss": 0.4463, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.09735576923076923, | |
| "grad_norm": 17.003317993340907, | |
| "learning_rate": 9.604219948849105e-07, | |
| "loss": 0.4501, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.09795673076923077, | |
| "grad_norm": 16.527884738622046, | |
| "learning_rate": 9.59782608695652e-07, | |
| "loss": 0.4274, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.0985576923076923, | |
| "grad_norm": 96.03639510226907, | |
| "learning_rate": 9.591432225063937e-07, | |
| "loss": 0.4002, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.09915865384615384, | |
| "grad_norm": 21.560400331772723, | |
| "learning_rate": 9.585038363171354e-07, | |
| "loss": 0.3857, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.09975961538461539, | |
| "grad_norm": 22.502753235352646, | |
| "learning_rate": 9.578644501278772e-07, | |
| "loss": 0.3972, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.10036057692307693, | |
| "grad_norm": 13.923835321348774, | |
| "learning_rate": 9.57225063938619e-07, | |
| "loss": 0.4161, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.10096153846153846, | |
| "grad_norm": 13.89020798215834, | |
| "learning_rate": 9.565856777493606e-07, | |
| "loss": 0.4409, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.1015625, | |
| "grad_norm": 93.30579652904981, | |
| "learning_rate": 9.559462915601024e-07, | |
| "loss": 0.4413, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.10216346153846154, | |
| "grad_norm": 10.306967482471663, | |
| "learning_rate": 9.553069053708439e-07, | |
| "loss": 0.4306, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.10276442307692307, | |
| "grad_norm": 16.35022677130188, | |
| "learning_rate": 9.546675191815856e-07, | |
| "loss": 0.4361, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.10336538461538461, | |
| "grad_norm": 68.65163238192922, | |
| "learning_rate": 9.540281329923273e-07, | |
| "loss": 0.4508, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.10396634615384616, | |
| "grad_norm": 14.691319864310291, | |
| "learning_rate": 9.533887468030691e-07, | |
| "loss": 0.4637, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.1045673076923077, | |
| "grad_norm": 11.655266762752866, | |
| "learning_rate": 9.527493606138107e-07, | |
| "loss": 0.4239, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.10516826923076923, | |
| "grad_norm": 26.007719217225308, | |
| "learning_rate": 9.521099744245524e-07, | |
| "loss": 0.4427, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.10576923076923077, | |
| "grad_norm": 31.093138088222684, | |
| "learning_rate": 9.51470588235294e-07, | |
| "loss": 0.459, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.1063701923076923, | |
| "grad_norm": 8.469416417535411, | |
| "learning_rate": 9.508312020460358e-07, | |
| "loss": 0.402, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.10697115384615384, | |
| "grad_norm": 16.06555093014836, | |
| "learning_rate": 9.501918158567774e-07, | |
| "loss": 0.4276, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.10757211538461539, | |
| "grad_norm": 46.21222096469417, | |
| "learning_rate": 9.495524296675191e-07, | |
| "loss": 0.4177, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.10817307692307693, | |
| "grad_norm": 11.561230412768193, | |
| "learning_rate": 9.489130434782609e-07, | |
| "loss": 0.3993, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.10877403846153846, | |
| "grad_norm": 33.55933631376849, | |
| "learning_rate": 9.482736572890026e-07, | |
| "loss": 0.4206, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.109375, | |
| "grad_norm": 11.530474264624262, | |
| "learning_rate": 9.476342710997442e-07, | |
| "loss": 0.4378, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.10997596153846154, | |
| "grad_norm": 28.092472805389466, | |
| "learning_rate": 9.469948849104858e-07, | |
| "loss": 0.4033, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.11057692307692307, | |
| "grad_norm": 12.87483056607121, | |
| "learning_rate": 9.463554987212276e-07, | |
| "loss": 0.3855, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.11117788461538461, | |
| "grad_norm": 12.951129649825642, | |
| "learning_rate": 9.457161125319693e-07, | |
| "loss": 0.4156, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.11177884615384616, | |
| "grad_norm": 10.238507527434807, | |
| "learning_rate": 9.450767263427109e-07, | |
| "loss": 0.4417, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.1123798076923077, | |
| "grad_norm": 8.449261275213267, | |
| "learning_rate": 9.444373401534527e-07, | |
| "loss": 0.4499, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.11298076923076923, | |
| "grad_norm": 33.54997769262099, | |
| "learning_rate": 9.437979539641944e-07, | |
| "loss": 0.3718, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.11358173076923077, | |
| "grad_norm": 163.06822397421263, | |
| "learning_rate": 9.43158567774936e-07, | |
| "loss": 0.3994, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.1141826923076923, | |
| "grad_norm": 747.6560259343341, | |
| "learning_rate": 9.425191815856776e-07, | |
| "loss": 0.41, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.11478365384615384, | |
| "grad_norm": 20.948270060235018, | |
| "learning_rate": 9.418797953964194e-07, | |
| "loss": 0.4012, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.11538461538461539, | |
| "grad_norm": 16.592630584743493, | |
| "learning_rate": 9.412404092071611e-07, | |
| "loss": 0.4071, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.11598557692307693, | |
| "grad_norm": 6.4533503474115985, | |
| "learning_rate": 9.406010230179028e-07, | |
| "loss": 0.3927, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.11658653846153846, | |
| "grad_norm": 9.481676872361602, | |
| "learning_rate": 9.399616368286445e-07, | |
| "loss": 0.4486, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.1171875, | |
| "grad_norm": 22.879429418595755, | |
| "learning_rate": 9.393222506393862e-07, | |
| "loss": 0.4487, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.11778846153846154, | |
| "grad_norm": 36.32090030167348, | |
| "learning_rate": 9.386828644501278e-07, | |
| "loss": 0.4866, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.11838942307692307, | |
| "grad_norm": 15.994462091464001, | |
| "learning_rate": 9.380434782608695e-07, | |
| "loss": 0.4853, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.11899038461538461, | |
| "grad_norm": 16.476880149018157, | |
| "learning_rate": 9.374040920716112e-07, | |
| "loss": 0.4713, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.11959134615384616, | |
| "grad_norm": 1316.107870553675, | |
| "learning_rate": 9.367647058823529e-07, | |
| "loss": 0.4386, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.1201923076923077, | |
| "grad_norm": 20.762433776167253, | |
| "learning_rate": 9.361253196930946e-07, | |
| "loss": 0.4609, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12079326923076923, | |
| "grad_norm": 33.600636933197194, | |
| "learning_rate": 9.354859335038364e-07, | |
| "loss": 0.4346, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.12139423076923077, | |
| "grad_norm": 18.47987496120014, | |
| "learning_rate": 9.34846547314578e-07, | |
| "loss": 0.3968, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.1219951923076923, | |
| "grad_norm": 12.493741832264165, | |
| "learning_rate": 9.342071611253196e-07, | |
| "loss": 0.3862, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.12259615384615384, | |
| "grad_norm": 8.016162557709496, | |
| "learning_rate": 9.335677749360613e-07, | |
| "loss": 0.3913, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.12319711538461539, | |
| "grad_norm": 556.4941548489389, | |
| "learning_rate": 9.329283887468031e-07, | |
| "loss": 0.4127, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.12379807692307693, | |
| "grad_norm": 25.506919407129853, | |
| "learning_rate": 9.322890025575447e-07, | |
| "loss": 0.4349, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.12439903846153846, | |
| "grad_norm": 21.228728941085222, | |
| "learning_rate": 9.316496163682864e-07, | |
| "loss": 0.4603, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 9.977415169528852, | |
| "learning_rate": 9.310102301790282e-07, | |
| "loss": 0.4158, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "eval_loss": 0.4175701141357422, | |
| "eval_runtime": 13.8604, | |
| "eval_samples_per_second": 2.309, | |
| "eval_steps_per_second": 0.289, | |
| "step": 2080 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 16640, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.037184756875264e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |