sanchit-gandhi HF staff commited on
Commit
67b9f84
1 Parent(s): f74be82

2hx8pk65: saving weights and logs of step 20k

Browse files
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4bbb8026d3a4c9acb651189cbf65ab582eb2284bbcae68d0c6512395b962329
3
  size 2353616717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b20c6ce9070a647fc4b56ff847349b2a6ad959d336f8591e71fc135e07d67a9
3
  size 2353616717
nohup.out CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220828_085247-2hx8pk65/files/media/table/eval/step_20k_20000_a0a50c5d8793ca99e464.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["id", "label_str", "beam_1", "beam_2", "beam_3", "beam_4", "beam_5"], "data": [["2277-149896-0000", "he was in a fevered state of mind owing to the blight his wife's action threatened to cast upon his entire future", "he was in a fevered state of mind owing to the blight his wife's action threatened to cast upon his entire future", "he was in a fevered state of mind owing to the blight his wife's action threatened to cast upon his entire future", "he was in a feverred state of mind owing to the blight his wife's action threatened to cast upon his entire future", "he was in a fevered state of mind owing to the blight his wife's action threatened to cast up on his entire future", "he was in a fever'd state of mind owing to the blight his wife's action threatened to cast upon his entire future"], ["2277-149896-0001", "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did", "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did", "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did", "he would have to pay her the money which she could now regularly demand or there would be trouble it did not matter what he did", "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did a", "he would have to pay her the money which he would now regularly demand or there would be trouble it did not matter what he did"], ["2277-149896-0002", "hurstwood walked the floor mentally arranging the chief points of his situation", "hurstwood walked the floor mentally arranging the chief points of his situation", "hurstwood walked to the floor mentally arranging the chief points of his situation", "hurstwood walked the floor mentally arranging the chief points in his situation", "hurstwood walked the floor mentally arranging the chief points of this situation", "hurst wood walked the floor mentally arranging the chief points of his situation"], ["2277-149896-0003", "he also thought of his managerial position", "he also thought of his managerial position", "he also thought of this managerial position", "he also thought in his managerial position", "he also thought of his managerial position and", "he also thought of his administrative position"], ["2277-149896-0004", "how would the papers talk about it", "how would the papers talk about it", "how would the papers talk about it and", "how would the papers talk about it i", "how would the papers talk about it a", "how would the papers talk about it the"], ["2277-149896-0005", "many little wrinkles gathered between his eyes as he contemplated this and his brow moistened", "many little wrinkles gathered between his eyes as he contemplated this and his brow moistened", "many little wrinkles gathered between his eyes as he contemplated this his brow moistened", "many little wrinkles gathered between his eyes as he contemplated this and his brow moistened", "many little wrinkles gathered between his eyes as he contemplated this this and his brow moistened", "many little wrinkles gathered between his eyes as he considered this and his brow moistened"], ["2277-149896-0006", "he could arrange that satisfactorily for carrie would be glad to wait if necessary", "he could arrange that satisfactorily for carrie would be glad to wait if necessary", "he could arrange that satisfactorily for carry would be glad to wait if necessary", "he could arrange it satisfactorily for carrie would be glad to wait if necessary", "he could arrange the satisfactorily for carrie would be glad to wait if necessary", "he could arrange this satisfactorily for carrie would be glad to wait if necessary"], ["2277-149896-0007", "he would see how things turned out to morrow and then he would talk to her they were going to meet as usual", "he would see how things turned out to morrow and then he would talk to her they were going to meet as usual", "he would see how things turned out tomorrow and then he would talk to her they were going to meet as usual", "he would see how things turned out tonight and then he would talk to her they were going to meet as usual", "he would see how things turned out to morrow then he would talk to her they were going to meet as usual", "he would see how things turned out today and then he would talk to her they were going to meet as usual"], ["2277-149896-0008", "for some reason he felt as if something might come that way and was relieved when all the envelopes had been scanned and nothing suspicious noticed", "for some reason he felt as if something might come that way and was relieved when all the envelopes had been scanned and nothing suspicious noticed", "for some reason he felt as if something might come that way and was relieved when all the envelopes had been scanned and nothing suspicious notice", "for some reason he felt as if something might come that way and was relieved when all the envelopes had been screened and nothing suspicious noticed", "for some reason he felt as if something might come this way and was relieved when all the envelopes had been scanned and nothing suspicious noticed", "for some reason he felt as if something might come that way and was relieved when all of the envelopes had been scanned and nothing suspicious noticed"], ["2277-149896-0009", "while the danger had not lessened it had not as yet materialised and with him no news was good news", "while the danger had not lessened it had not as yet materialized and with him no news was good news", "while the danger had not lessened it had not as yet materialised and with him no news was good news", "while the danger had not lessened it had not as yet materialized and with him no night was good news", "while the danger had not lessened it had not as yet materialized and with him no news was good night", "while the danger had not lessened it had not as yet materialized and with him no noise was good news"], ["2277-149896-0010", "so little did he consider drouet that it never once occurred to him to worry about his finding out", "so little did he consider drue that it never once occurred to him to worry about his finding out", "so little did he consider drouet that it never once occurred to him to worry about his finding out", "so little did he consider druda that it never once occurred to him to worry about his finding out", "so little did he consider drude that it never once occurred to him to worry about his finding out", "so little did he consider drua that it never once occurred to him to worry about his finding out"], ["2277-149896-0011", "he grew restless as he ruminated and then decided that perhaps it was nothing", "he grew restless as he ruminated and then decided that perhaps it was nothing", "he grew restless as he ruminated and then decided that perhaps it was nothing", "he grew restless as he ruminated then decided that perhaps it was nothing", "he grew restless as he ruminated and then decided that perhaps it was nothing i", "he grew restless as he ruminated and then decided that perhaps it was nothing and"], ["2277-149896-0012", "she had not been able to get away this morning", "she had not been able to get away this morning", "she had not been able to get away this morning and", "she had not been able to get away this morning a", "she had not been able to get away this morning i", "she had not been able to get away this morning the"], ["2277-149896-0013", "he would get one to day it would probably be on his desk when he got back he would look for it at once", "he would get one to day it would probably be on his desk when he got back he would look for it at once", "he would get one today it would probably be on his desk when he got back he would look for it at once", "he would get one to day it would probably be on his deck when he got back he would look for it at once", "he would get one to day it would probably be in his desk when he got back he would look for it at once", "he would get one tomorrow it would probably be on his desk when he got back he would look for it at once"], ["2277-149896-0014", "after a time he gave up waiting and drearily headed for the madison car", "after a time he gave up waiting and drearily headed for the madison car", "after a time he gave up waiting and drearily headed for the maddison car", "after a time he gave up waiting and drearily headed for the madison cart", "after a time he gave up waiting and driarily headed for the madison car", "after a time he gave up waiting and drearily headed for the madison park"], ["2277-149896-0015", "he went in and examined his letters but there was nothing from carrie", "he went in and examined his letters but there was nothing from carrie", "he went in and examined his letters but there was nothing from perry", "he went in and examined his letters but there was nothing from kerry", "he went in and examined his letters but there was nothing from carry", "he went in and examined his letters but there was nothing from carey"], ["2277-149896-0016", "fortunately there was nothing from his wife either", "fortunately there was nothing from his wife either", "fortunately there was nothing from his wife either", "fortunately there was nothing from his wife either a", "fortunately there was nothing from this wife either", "fortunately there was nothing from his wife either and"], ["2277-149896-0017", "at one thirty he went to rector's for lunch and when he returned a messenger was waiting for him", "at one thirty he went to rector's for lunch and when he returned a messenger was waiting for him", "at one thirty he went to rector's for lunch and when he returned a messenger was waiting for him", "at one hundred he went to rector's for lunch and when he returned a messenger was waiting for him", "at one thirty he went to rectors for lunch and when he returned a messenger was waiting for him", "at one thirty he went to regters for lunch and when he returned a messenger was waiting for him"], ["2277-149896-0018", "his first impulse was to write but four words in reply go to the devil", "his first impulse was to write but four words in reply go to the devil", "his first impulse was to write but four words in reply go to the devil and", "his first impulse was to write but four words in reply go to the devil i", "his first impulse was to write but four words in reply go to a devil", "his first impulse was to write but four words in reply go to the devil oh"], ["2277-149896-0019", "but he compromised by telling the boy that there would be no reply", "but he compromised by telling the boy that there would be no reply", "but he compromised by telling the boy there would be no reply", "but he compromised by telling the boy that there would be no reply i", "but he comprised by telling the boy that there would be no reply", "but he compromised by telling the boy that there would be no reply and"], ["2277-149896-0020", "then he sat down in his chair and gazed without seeing contemplating the result of his work", "then he sat down in his chair and gazed without seeing contemplating the result of his work", "then he sate down in his chair and gazed without seeing contemplating the result of his work", "then he sat down in his chair and gazed without seeing contemplating the result of this work", "then he sat down in his chair and gazed without seeing contemplating the result of his works", "then he sat down in his chair and gazed without seeing contemplated the result of his work"], ["2277-149896-0021", "what would she do about that the confounded wretch", "what would she do about that the confounded wretch", "what would she do about that that confounded wretch", "what would she do about that the confounded wretch and", "what could she do about that the confounded wretch", "what would she do about that the confounded wretch oh"], ["2277-149896-0022", "later however his old discretion asserted itself", "later however his old discretion asserted itself", "later however his old discretion asserted itself and", "later however his older discretion asserted itself", "later however this old discretion asserted itself", "later however his old discretion asserted itself a"], ["2277-149896-0023", "something had to be done a climax was near and she would not sit idle", "something had to be done a climax was near and she would not sit idle", "something had to be done the climax was near and she would not sit idle", "something had to be done a climax was near and she would not sit idle and", "something had to be done a climax was near she would not sit idle", "some thing had to be done a climax was near and she would not sit idle"], ["2277-149896-0024", "he knew her well enough to know that when she had decided upon a plan she would follow it up", "he knew her well enough to know that when she had decided upon a plan she would follow it up", "he knew her well enough to know that when she decided upon a plan she would follow it up", "he knew her well enough to know that when she had decided upon a plan she would follow it up and", "he knew her well enough to know that when she had decided on a plan she would follow it up", "he knew her well enough to know that when she had decided upon a plan she would follow it up i"], ["2277-149896-0025", "he arose from his chair and went and looked out into the street", "he arose from his chair and went and looked out into the street", "he rose from his chair and went and looked out into the street", "he arose from his chair and went and looked out into the street and", "he arose from his chair and went and looked out into the street a", "he arose from his chair went and looked out into the street"], ["2277-149896-0026", "the long drizzle had begun pedestrians had turned up collars and trousers at the bottom", "the long drizzle had begun pedestrians had turned up collars and trousers at the bottom", "the long drizzle had begun pedestrians had turned up collars and trowsers at the bottom", "the long drizzle had begun petersians had turned up collars and trousers at the bottom", "the long drizzle had begun pedestrians had turned up collars and trousers at the bottom", "the long drizzle had begun pedestrians had turned up collars or trousers at the bottom"], ["2277-149896-0027", "hurstwood almost exclaimed out loud at the insistency of this thing", "hurstwood almost exclaimed out loud at the insistency of this thing", "hurstwood almost exclaimed out loud at the insistence of this thing", "hurstwood almost exclaimed out loud at the insistency of this thing", "hurstwood almost exclaimed out loud at the insstancy of this thing", "hurstwood almost exclaimed out aloud at the insistency of this thing"], ["2277-149896-0028", "he put on his hat and looked around for his umbrella", "he put on his hat and looked around for his umbrella", "he put on his hat and looked round for his umbrella", "he put on his hat and looked around for his umbrella a", "he put on his hat and looked around for his umbrella and", "he put on his hat and looked about for his umbrella"], ["2277-149896-0029", "he would have some arrangement of this thing", "he would have some arrangement of this thing", "he would have some arrangement of the thing", "he would have some arrangements of this thing", "he would have some arrangement of this thing and", "he would have some arrangement of his thing"], ["2277-149896-0030", "he began to wish that he had compromised in some way or other that he had sent the money perhaps he could do it up here", "he began to wish that he had compromised in some way or other that he had sent the money perhaps he could do it up here", "he began to wish that he had compromised some way or other that he had sent the money perhaps he could do it up here", "he began to wish he had compromised in some way or other that he had sent the money perhaps he could do it up here", "he began to wish that he had compromised in some way or other that he had sent the money perhaps he would do it up here", "he began to wish that he had compromised in some way or another that he had sent the money perhaps he could do it up here"], ["2277-149896-0031", "he would go in and see anyhow he would have no row", "he would go in and see anyhow he would have no row", "he would go in and see anyhow he would have no rue", "he would go in and see anyhow he would have no raoul", "he would go in and see anyhow he would have no row he would have no row", "he would go in and see anything he would have no row"], ["2277-149896-0032", "by the time he reached his own street he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself that he could see his way out", "by the time he reached his own street he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself that he could see his way out", "by the time he reached his own street he was keenly alive to the difficulties of this situation and wished over and over that some solution would offer itself that he could see his way out", "by the time he reached his own street he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself' that he could see his way out", "by the time he reached his own streets he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself that he could see his way out", "by the time he reached his own street he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself he could see his way out"], ["2277-149896-0033", "then he rang the bell no answer", "then he rang the bell no answer", "then he rang the bell no answer i", "then he rang the bell no answer to", "then he rang the bell no answer and", "then he rang the bell no answer he"], ["2277-149896-0034", "he rang again this time harder still no answer", "he rang again this time harder still no answer", "he rang again this time harder still no answer i", "he rang again this time harder still no answer and", "he rang again this time harder still no answer a", "he rang again this time harder still no answer to"], ["2277-149897-0000", "when hurstwood got back to his office again he was in a greater quandary than ever", "when hurstwood got back to his office again he was in a greater quandary than ever", "when hurstwood got back to his office again he was in a greater quondary than ever", "when hurstwood got back to his office again he was in a greater quadrille than ever", "when hurstwood went back to his office again he was in a greater quandary than ever", "when hurstwood got back to his office again he was in a greater quandery than ever"], ["2277-149897-0001", "he could hardly realise how it had all come about", "he could hardly realize how it had all come about", "he could hardly realise how it had all come about", "he could hardly realize how it had all come about and", "he could hardly realise how it had all come about", "he could hardly realise how it had all come about and"], ["2277-149897-0002", "no letter had come no word of any kind and yet here it was late in the evening and she had agreed to meet him that morning", "no letter had come no word of any kind and yet here it was late in the evening and she had agreed to meet him that morning", "no letter had come no word of any kind and yet here it was late in the evening she had agreed to meet him that morning", "no letter had come no word of anything kind and yet here it was late in the evening and she had agreed to meet him that morning", "no letter had come no word of any kind and yet here it was late in evening and she had agreed to meet him that morning", "no letter had come no word of any kind and yet here it was late at the evening and she had agreed to meet him that morning"], ["2277-149897-0003", "he saw that in the excitement of recent events he had not formulated a plan upon that score", "he saw that in the excitement of recent events he had not formulated a plan upon that score", "he saw that in the excitement of recent events he had not formulated a plan upon the score", "he saw that in the excitement of recent events he had not formulated a plan on that score", "he saw that in the excitement of recent events he had not formulated a plan upon his score", "he saw that in the excitement of recent events he had not formulated a plan upon that score a"], ["2277-149897-0004", "he was getting some vague comfort out of a good cigar but it was no panacea for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no panacea for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no panatia for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no pannacia for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no panamia for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no panama for the ill which affected him"], ["2277-149897-0005", "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up", "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up", "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he had an envelope placed in it the requested amount and slowly sealed it up", "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in it the required amount and slowly sealed it up", "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in the requested amount and slowly sealed it up", "was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up"], ["2277-149897-0006", "then he called harry the boy of all work around the place", "then he called harry the boy of all work around the place", "then he called harry the boy of all work round the place", "then he called henry the boy of all work around the place", "then he called harry the boy of all work around the place and", "then he called harry the boy of all work around the place a"], ["2277-149897-0007", "you take this to this address he said handing him the envelope and give it to missus hurstwood yes sir said the boy", "you take this to this address he said handing him the envelope and give it to missus hurstwood yes sir said the boy", "you take this to this address he said handing him the envelope and give it to the missus hurstwood yes sir said the boy", "you take this to this address he said handing him the umbrella and give it to missus hurstwood yes sir said the boy", "you take this to this address he said handing him the envelope and give it to missus hurstwood yes sir said the boy i", "you take this to this addressed he said handing him the envelope and give it to missus hurstwood yes sir said the boy"], ["2277-149897-0008", "any answer i guess not", "any answer i guess not", "any answer i guess not and", "any answer i guess not i guess not", "any answer i guess not to", "any answer i guess not i'll not"], ["2277-149897-0009", "the boy hastened away and the manager fell to his musings", "the boy hastened away and the manager fell to his musings", "the boy hastened away and the manager fell into his musings", "the boy hastened away and the manager fell in his musings", "the boy hastened away and the manager fell to his musings and", "the boy hastened away and the manager fell to his musings'"], ["2277-149897-0010", "he was beaten for to night and he might just as well make the best of it", "he was beaten for to night and he might just as well make the best of it", "he was beaten for tonight and he might just as well make the best of it", "he was beaten for to night and might just as well make the best of it", "he was beaten for tomorrow and he might just as well make the best of it", "he was beaten for to night and he might just as well make best of it"], ["2277-149897-0011", "she would take the envelope and know that she had triumphed", "she would take the envelope and know that she had triumphed", "she would take the envelope and know what she had triumphed", "she would take the envelope and know whether she had triumphed", "she would take a envelope and know that she had triumphed", "she would take the envelope and know that she had triumphed and"], ["2277-149897-0012", "if he only had that letter back he wouldn't send it", "if he only had that letter back he wouldn't send it", "if he only had the letter back he wouldn't send it", "if he only had that letter back he wouldn't send it i", "if he only had had that letter back he wouldn't send it", "if he only had that letter back he wouldn't send it i don't"], ["2277-149897-0013", "for relief he arose and joined in conversation with a few friends who were drinking", "for relief he arose and joined in the conversation with a few friends who were drinking", "for relief he arose and joined in the conversation with the few friends who were drinking", "for relief he rose and joined in the conversation with a few friends who were drinking", "for relief he rose and joined in the conversation with the few friends who were drinking", "for relief he arose and joined in his conversation with a few friends who were drinking"], ["2277-149897-0014", "all the time his thoughts would run out to his home and see the scene being therein enacted", "all the time his thoughts would run out to his home and see the scene being therein enacted", "all the time his thoughts would run out to his home and see the scene being therein enacted", "all the time his thoughts would run out to his home and see the scene being thereon enacted", "all the time his thoughts would run out to his home and see the scene being there in enacted", "all this time his thoughts would run out to his home and see the scene being therein enacted"]]}
wandb/run-20220828_085247-2hx8pk65/files/output.log CHANGED
@@ -12998,5 +12998,10310 @@ To disable this warning, you can either:
12998
  - Avoid using `tokenizers` before the fork if possible
12999
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
13000
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13001
  To disable this warning, you can either:
13002
  - Avoid using `tokenizers` before the fork if possible
 
12998
  - Avoid using `tokenizers` before the fork if possible
12999
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
13000
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
13001
+ To disable this warning, you can either:
13002
+ - Avoid using `tokenizers` before the fork if possible
13003
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
13004
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
13005
+ To disable this warning, you can either:
13006
+ - Avoid using `tokenizers` before the fork if possible
13007
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
13008
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
13009
+ To disable this warning, you can either:
13010
+ - Avoid using `tokenizers` before the fork if possible
13011
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
13012
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
13013
+ To disable this warning, you can either:
13014
+ - Avoid using `tokenizers` before the fork if possible
13015
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
13016
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
13017
+ To disable this warning, you can either:
13018
+ - Avoid using `tokenizers` before the fork if possible
13019
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
13020
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
13021
+ To disable this warning, you can either:
13022
+ - Avoid using `tokenizers` before the fork if possible
13023
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
13024
+ Training...: 28% 1214/4393 [1:58:17<384:10:13, 435.05s/it]
13025
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
13026
+ To disable this warning, you can either:
13027
+ - Avoid using `tokenizers` before the fork if possible
13028
+ return jax.tree_map(/4393 [1:58:17<384:10:13, 435.05s/it]
13029
+
13030
+
13031
+
13032
+
13033
+
13034
+
13035
+
13036
+
13037
+
13038
+
13039
+
13040
+
13041
+
13042
+
13043
+
13044
+
13045
+
13046
+
13047
+
13048
+
13049
+
13050
+
13051
+
13052
+ return jax.tree_map(lambda x: x[0], tree)7, 4.59s/it]
13053
+ run_flax_speech_recognition_seq2seq.py:336: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
13054
+ return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
13055
+ Step... (10000/50000 | Eval Loss: 0.6138997077941895 | Eval wer: 0.05543913826697548 | Eval cer: 0.039964500651745845 |): 17% 2/12 [13:29:59<57:16:35, 20619.59s/it]
13056
+
13057
+
13058
+
13059
+
13060
+
13061
+
13062
+
13063
+
13064
+
13065
+
13066
+
13067
+
13068
+
13069
+
13070
+
13071
+
13072
+
13073
+
13074
+
13075
+
13076
+
13077
+
13078
+
13079
+
13080
+
13081
+
13082
+
13083
+
13084
+
13085
+
13086
+
13087
+
13088
+
13089
+
13090
+
13091
+
13092
+
13093
+
13094
+
13095
+
13096
+
13097
+
13098
+
13099
+
13100
+
13101
+
13102
+
13103
+
13104
+
13105
+
13106
+
13107
+
13108
+
13109
+
13110
+
13111
+
13112
+
13113
+
13114
+
13115
+
13116
+
13117
+
13118
+
13119
+
13120
+
13121
+
13122
+
13123
+
13124
+
13125
+
13126
+
13127
+
13128
+
13129
+
13130
+
13131
+
13132
+
13133
+
13134
+
13135
+
13136
+
13137
+
13138
+
13139
+
13140
+
13141
+
13142
+
13143
+
13144
+
13145
+
13146
+
13147
+
13148
+
13149
+
13150
+
13151
+
13152
+
13153
+
13154
+
13155
+
13156
+
13157
+
13158
+
13159
+
13160
+
13161
+
13162
+
13163
+
13164
+
13165
+
13166
+
13167
+
13168
+
13169
+
13170
+
13171
+
13172
+
13173
+
13174
+
13175
+
13176
+
13177
+
13178
+
13179
+
13180
+
13181
+
13182
+
13183
+
13184
+
13185
+
13186
+
13187
+
13188
+
13189
+
13190
+
13191
+
13192
+
13193
+
13194
+
13195
+
13196
+
13197
+
13198
+
13199
+
13200
+
13201
+
13202
+
13203
+
13204
+
13205
+
13206
+
13207
+
13208
+
13209
+
13210
+
13211
+
13212
+
13213
+
13214
+
13215
+
13216
+
13217
+
13218
+
13219
+
13220
+
13221
+
13222
+
13223
+
13224
+
13225
+
13226
+
13227
+
13228
+
13229
+
13230
+
13231
+
13232
+
13233
+
13234
+
13235
+
13236
+
13237
+
13238
+
13239
+
13240
+
13241
+
13242
+
13243
+
13244
+
13245
+
13246
+
13247
+
13248
+
13249
+
13250
+
13251
+
13252
+
13253
+
13254
+
13255
+
13256
+
13257
+
13258
+
13259
+
13260
+
13261
+
13262
+
13263
+
13264
+
13265
+
13266
+
13267
+
13268
+
13269
+
13270
+
13271
+
13272
+
13273
+
13274
+
13275
+
13276
+
13277
+
13278
+
13279
+
13280
+
13281
+
13282
+
13283
+
13284
+
13285
+
13286
+
13287
+
13288
+
13289
+
13290
+
13291
+
13292
+
13293
+
13294
+
13295
+
13296
+
13297
+
13298
+
13299
+
13300
+
13301
+
13302
+
13303
+
13304
+
13305
+
13306
+
13307
+
13308
+
13309
+
13310
+
13311
+
13312
+
13313
+
13314
+
13315
+
13316
+
13317
+
13318
+
13319
+
13320
+
13321
+
13322
+
13323
+
13324
+
13325
+
13326
+
13327
+
13328
+
13329
+
13330
+
13331
+
13332
+
13333
+
13334
+
13335
+
13336
+
13337
+
13338
+
13339
+
13340
+
13341
+
13342
+
13343
+
13344
+
13345
+
13346
+
13347
+
13348
+
13349
+
13350
+
13351
+
13352
+
13353
+
13354
+
13355
+
13356
+
13357
+
13358
+
13359
+
13360
+
13361
+
13362
+
13363
+
13364
+
13365
+
13366
+
13367
+
13368
+
13369
+
13370
+
13371
+
13372
+
13373
+
13374
+
13375
+
13376
+
13377
+
13378
+
13379
+
13380
+
13381
+
13382
+
13383
+
13384
+
13385
+
13386
+
13387
+
13388
+
13389
+
13390
+
13391
+
13392
+
13393
+
13394
+
13395
+
13396
+
13397
+
13398
+
13399
+
13400
+
13401
+
13402
+
13403
+
13404
+
13405
+
13406
+
13407
+
13408
+
13409
+
13410
+
13411
+
13412
+
13413
+
13414
+
13415
+
13416
+
13417
+
13418
+
13419
+
13420
+
13421
+
13422
+
13423
+
13424
+
13425
+
13426
+
13427
+
13428
+
13429
+
13430
+
13431
+
13432
+
13433
+
13434
+
13435
+
13436
+
13437
+
13438
+
13439
+
13440
+
13441
+
13442
+
13443
+
13444
+
13445
+
13446
+
13447
+
13448
+
13449
+
13450
+
13451
+
13452
+
13453
+
13454
+
13455
+
13456
+
13457
+
13458
+
13459
+
13460
+
13461
+
13462
+
13463
+
13464
+
13465
+
13466
+
13467
+
13468
+
13469
+
13470
+
13471
+
13472
+
13473
+
13474
+
13475
+
13476
+
13477
+
13478
+
13479
+
13480
+
13481
+
13482
+
13483
+
13484
+
13485
+
13486
+
13487
+
13488
+
13489
+
13490
+
13491
+
13492
+
13493
+
13494
+
13495
+
13496
+
13497
+
13498
+
13499
+
13500
+
13501
+
13502
+
13503
+
13504
+
13505
+
13506
+
13507
+
13508
+
13509
+
13510
+
13511
+
13512
+
13513
+
13514
+
13515
+
13516
+
13517
+
13518
+
13519
+
13520
+
13521
+
13522
+
13523
+
13524
+
13525
+
13526
+
13527
+
13528
+
13529
+
13530
+
13531
+
13532
+
13533
+
13534
+
13535
+
13536
+
13537
+
13538
+
13539
+
13540
+
13541
+
13542
+
13543
+
13544
+
13545
+
13546
+
13547
+
13548
+
13549
+
13550
+
13551
+
13552
+
13553
+
13554
+
13555
+
13556
+
13557
+
13558
+
13559
+
13560
+
13561
+
13562
+
13563
+
13564
+
13565
+
13566
+
13567
+
13568
+
13569
+
13570
+
13571
+
13572
+
13573
+
13574
+
13575
+
13576
+
13577
+
13578
+
13579
+
13580
+
13581
+
13582
+
13583
+
13584
+
13585
+
13586
+
13587
+
13588
+
13589
+
13590
+
13591
+
13592
+
13593
+
13594
+
13595
+
13596
+
13597
+
13598
+
13599
+
13600
+
13601
+
13602
+
13603
+
13604
+
13605
+
13606
+
13607
+
13608
+
13609
+
13610
+
13611
+
13612
+
13613
+
13614
+
13615
+
13616
+
13617
+
13618
+
13619
+
13620
+
13621
+
13622
+
13623
+
13624
+
13625
+
13626
+
13627
+
13628
+
13629
+
13630
+
13631
+
13632
+
13633
+
13634
+
13635
+
13636
+
13637
+
13638
+
13639
+
13640
+
13641
+
13642
+
13643
+
13644
+
13645
+
13646
+
13647
+
13648
+
13649
+
13650
+
13651
+
13652
+
13653
+
13654
+
13655
+
13656
+
13657
+
13658
+
13659
+
13660
+
13661
+
13662
+
13663
+
13664
+
13665
+
13666
+
13667
+
13668
+
13669
+
13670
+
13671
+
13672
+
13673
+
13674
+
13675
+
13676
+
13677
+
13678
+
13679
+
13680
+
13681
+
13682
+
13683
+
13684
+
13685
+
13686
+
13687
+
13688
+
13689
+
13690
+
13691
+
13692
+
13693
+
13694
+
13695
+
13696
+
13697
+
13698
+
13699
+
13700
+
13701
+
13702
+
13703
+
13704
+
13705
+
13706
+
13707
+
13708
+
13709
+
13710
+
13711
+
13712
+
13713
+
13714
+
13715
+
13716
+
13717
+
13718
+
13719
+
13720
+
13721
+
13722
+
13723
+
13724
+
13725
+
13726
+
13727
+
13728
+
13729
+
13730
+
13731
+
13732
+
13733
+
13734
+
13735
+
13736
+
13737
+
13738
+
13739
+
13740
+
13741
+
13742
+
13743
+
13744
+
13745
+
13746
+
13747
+
13748
+
13749
+
13750
+
13751
+
13752
+
13753
+
13754
+
13755
+
13756
+
13757
+
13758
+
13759
+
13760
+
13761
+
13762
+
13763
+
13764
+
13765
+
13766
+
13767
+
13768
+
13769
+
13770
+
13771
+
13772
+
13773
+
13774
+
13775
+
13776
+
13777
+
13778
+
13779
+
13780
+
13781
+
13782
+
13783
+
13784
+
13785
+
13786
+
13787
+
13788
+
13789
+
13790
+
13791
+
13792
+
13793
+
13794
+
13795
+
13796
+
13797
+
13798
+
13799
+
13800
+
13801
+
13802
+
13803
+
13804
+
13805
+
13806
+
13807
+
13808
+
13809
+
13810
+
13811
+
13812
+
13813
+
13814
+
13815
+
13816
+
13817
+
13818
+
13819
+
13820
+
13821
+
13822
+
13823
+
13824
+
13825
+
13826
+
13827
+
13828
+
13829
+
13830
+
13831
+
13832
+
13833
+
13834
+
13835
+
13836
+
13837
+
13838
+
13839
+
13840
+
13841
+
13842
+
13843
+
13844
+
13845
+
13846
+
13847
+
13848
+
13849
+
13850
+
13851
+
13852
+
13853
+
13854
+
13855
+
13856
+
13857
+
13858
+
13859
+
13860
+
13861
+
13862
+
13863
+
13864
+
13865
+
13866
+
13867
+
13868
+
13869
+
13870
+
13871
+
13872
+
13873
+
13874
+
13875
+
13876
+
13877
+
13878
+
13879
+
13880
+
13881
+
13882
+
13883
+
13884
+
13885
+
13886
+
13887
+
13888
+
13889
+
13890
+
13891
+
13892
+
13893
+
13894
+
13895
+
13896
+
13897
+
13898
+
13899
+
13900
+
13901
+
13902
+
13903
+
13904
+
13905
+
13906
+
13907
+
13908
+
13909
+
13910
+
13911
+
13912
+
13913
+
13914
+
13915
+
13916
+
13917
+
13918
+
13919
+
13920
+
13921
+
13922
+
13923
+
13924
+
13925
+
13926
+
13927
+
13928
+
13929
+
13930
+
13931
+
13932
+
13933
+
13934
+
13935
+
13936
+
13937
+
13938
+
13939
+
13940
+
13941
+
13942
+
13943
+
13944
+
13945
+
13946
+
13947
+
13948
+
13949
+
13950
+
13951
+
13952
+
13953
+
13954
+
13955
+
13956
+
13957
+
13958
+
13959
+
13960
+
13961
+
13962
+
13963
+
13964
+
13965
+
13966
+
13967
+
13968
+
13969
+
13970
+
13971
+
13972
+
13973
+
13974
+
13975
+
13976
+
13977
+
13978
+
13979
+
13980
+
13981
+
13982
+
13983
+
13984
+
13985
+
13986
+
13987
+
13988
+
13989
+
13990
+
13991
+
13992
+
13993
+
13994
+
13995
+
13996
+
13997
+
13998
+
13999
+
14000
+
14001
+
14002
+
14003
+
14004
+
14005
+
14006
+
14007
+
14008
+
14009
+
14010
+
14011
+
14012
+
14013
+
14014
+
14015
+
14016
+
14017
+
14018
+
14019
+
14020
+
14021
+
14022
+
14023
+
14024
+
14025
+
14026
+
14027
+
14028
+
14029
+
14030
+
14031
+
14032
+
14033
+
14034
+
14035
+
14036
+
14037
+
14038
+
14039
+
14040
+
14041
+
14042
+
14043
+
14044
+
14045
+
14046
+
14047
+
14048
+
14049
+
14050
+
14051
+
14052
+
14053
+
14054
+
14055
+
14056
+
14057
+
14058
+
14059
+
14060
+
14061
+
14062
+
14063
+
14064
+
14065
+
14066
+
14067
+
14068
+
14069
+
14070
+
14071
+
14072
+
14073
+
14074
+
14075
+
14076
+
14077
+
14078
+
14079
+
14080
+
14081
+
14082
+
14083
+
14084
+
14085
+
14086
+
14087
+
14088
+
14089
+
14090
+
14091
+
14092
+
14093
+
14094
+
14095
+
14096
+
14097
+
14098
+
14099
+
14100
+
14101
+
14102
+
14103
+
14104
+
14105
+
14106
+
14107
+
14108
+
14109
+
14110
+
14111
+
14112
+
14113
+
14114
+
14115
+
14116
+
14117
+
14118
+
14119
+
14120
+
14121
+
14122
+
14123
+
14124
+
14125
+
14126
+
14127
+
14128
+
14129
+
14130
+
14131
+
14132
+
14133
+
14134
+
14135
+
14136
+
14137
+
14138
+
14139
+
14140
+
14141
+
14142
+
14143
+
14144
+
14145
+
14146
+
14147
+
14148
+
14149
+
14150
+
14151
+
14152
+
14153
+
14154
+
14155
+
14156
+
14157
+
14158
+
14159
+
14160
+
14161
+
14162
+
14163
+
14164
+
14165
+
14166
+
14167
+
14168
+
14169
+
14170
+
14171
+
14172
+
14173
+
14174
+
14175
+
14176
+
14177
+
14178
+
14179
+
14180
+
14181
+
14182
+
14183
+
14184
+
14185
+
14186
+
14187
+
14188
+
14189
+
14190
+
14191
+
14192
+
14193
+
14194
+
14195
+
14196
+
14197
+
14198
+
14199
+
14200
+
14201
+
14202
+
14203
+
14204
+
14205
+
14206
+
14207
+
14208
+
14209
+
14210
+
14211
+
14212
+
14213
+
14214
+
14215
+
14216
+
14217
+
14218
+
14219
+
14220
+
14221
+
14222
+
14223
+
14224
+
14225
+
14226
+
14227
+
14228
+
14229
+
14230
+
14231
+
14232
+
14233
+
14234
+
14235
+
14236
+
14237
+
14238
+
14239
+
14240
+
14241
+
14242
+
14243
+
14244
+
14245
+
14246
+
14247
+
14248
+
14249
+
14250
+
14251
+
14252
+
14253
+
14254
+
14255
+
14256
+
14257
+
14258
+
14259
+
14260
+
14261
+
14262
+
14263
+
14264
+
14265
+
14266
+
14267
+
14268
+
14269
+
14270
+
14271
+
14272
+
14273
+
14274
+
14275
+
14276
+
14277
+
14278
+
14279
+
14280
+
14281
+
14282
+
14283
+
14284
+
14285
+
14286
+
14287
+
14288
+
14289
+
14290
+
14291
+
14292
+
14293
+
14294
+
14295
+
14296
+
14297
+
14298
+
14299
+
14300
+
14301
+
14302
+
14303
+
14304
+
14305
+
14306
+
14307
+
14308
+
14309
+
14310
+
14311
+
14312
+
14313
+
14314
+
14315
+
14316
+
14317
+
14318
+
14319
+
14320
+
14321
+
14322
+
14323
+
14324
+
14325
+
14326
+
14327
+
14328
+
14329
+
14330
+
14331
+
14332
+
14333
+
14334
+
14335
+
14336
+
14337
+
14338
+
14339
+
14340
+
14341
+
14342
+
14343
+
14344
+
14345
+
14346
+
14347
+
14348
+
14349
+
14350
+
14351
+
14352
+
14353
+
14354
+
14355
+
14356
+
14357
+
14358
+
14359
+
14360
+
14361
+
14362
+
14363
+
14364
+
14365
+
14366
+
14367
+
14368
+
14369
+
14370
+
14371
+
14372
+
14373
+
14374
+
14375
+
14376
+
14377
+
14378
+
14379
+
14380
+
14381
+
14382
+
14383
+
14384
+
14385
+
14386
+
14387
+
14388
+
14389
+
14390
+
14391
+
14392
+
14393
+
14394
+
14395
+
14396
+
14397
+
14398
+
14399
+
14400
+
14401
+
14402
+
14403
+
14404
+
14405
+
14406
+
14407
+
14408
+
14409
+
14410
+
14411
+
14412
+
14413
+
14414
+
14415
+
14416
+
14417
+
14418
+
14419
+
14420
+
14421
+
14422
+
14423
+
14424
+
14425
+
14426
+
14427
+
14428
+
14429
+
14430
+
14431
+
14432
+
14433
+
14434
+
14435
+
14436
+
14437
+
14438
+
14439
+
14440
+
14441
+
14442
+
14443
+
14444
+
14445
+
14446
+
14447
+
14448
+
14449
+
14450
+
14451
+
14452
+
14453
+
14454
+
14455
+
14456
+
14457
+
14458
+
14459
+
14460
+
14461
+
14462
+
14463
+
14464
+
14465
+
14466
+
14467
+
14468
+
14469
+
14470
+
14471
+
14472
+
14473
+
14474
+
14475
+
14476
+
14477
+
14478
+
14479
+
14480
+
14481
+
14482
+
14483
+
14484
+
14485
+
14486
+
14487
+
14488
+
14489
+
14490
+
14491
+
14492
+
14493
+
14494
+
14495
+
14496
+
14497
+
14498
+
14499
+
14500
+
14501
+
14502
+
14503
+
14504
+
14505
+
14506
+
14507
+
14508
+
14509
+
14510
+
14511
+
14512
+
14513
+
14514
+
14515
+
14516
+
14517
+
14518
+
14519
+
14520
+
14521
+
14522
+
14523
+
14524
+
14525
+
14526
+
14527
+
14528
+
14529
+
14530
+
14531
+
14532
+
14533
+
14534
+
14535
+
14536
+
14537
+
14538
+
14539
+
14540
+
14541
+
14542
+
14543
+
14544
+
14545
+
14546
+
14547
+
14548
+
14549
+
14550
+
14551
+
14552
+
14553
+
14554
+
14555
+
14556
+
14557
+
14558
+
14559
+
14560
+
14561
+
14562
+
14563
+
14564
+
14565
+
14566
+
14567
+
14568
+
14569
+
14570
+
14571
+
14572
+
14573
+
14574
+
14575
+
14576
+
14577
+
14578
+
14579
+
14580
+
14581
+
14582
+
14583
+
14584
+
14585
+
14586
+
14587
+
14588
+
14589
+
14590
+
14591
+
14592
+
14593
+
14594
+
14595
+
14596
+
14597
+
14598
+
14599
+
14600
+
14601
+
14602
+
14603
+
14604
+
14605
+
14606
+
14607
+
14608
+
14609
+
14610
+
14611
+
14612
+
14613
+
14614
+
14615
+
14616
+
14617
+
14618
+
14619
+
14620
+
14621
+
14622
+
14623
+
14624
+
14625
+
14626
+
14627
+
14628
+
14629
+
14630
+
14631
+
14632
+
14633
+
14634
+
14635
+
14636
+
14637
+
14638
+
14639
+
14640
+
14641
+
14642
+
14643
+
14644
+
14645
+
14646
+
14647
+
14648
+
14649
+
14650
+
14651
+
14652
+
14653
+
14654
+
14655
+
14656
+
14657
+
14658
+
14659
+
14660
+
14661
+
14662
+
14663
+
14664
+
14665
+
14666
+
14667
+
14668
+
14669
+
14670
+
14671
+
14672
+
14673
+
14674
+
14675
+
14676
+
14677
+
14678
+
14679
+
14680
+
14681
+
14682
+
14683
+
14684
+
14685
+
14686
+
14687
+
14688
+
14689
+
14690
+
14691
+
14692
+
14693
+
14694
+
14695
+
14696
+
14697
+
14698
+
14699
+
14700
+
14701
+
14702
+
14703
+
14704
+
14705
+
14706
+
14707
+
14708
+
14709
+
14710
+
14711
+
14712
+
14713
+
14714
+
14715
+
14716
+
14717
+
14718
+
14719
+
14720
+
14721
+
14722
+
14723
+
14724
+
14725
+
14726
+
14727
+
14728
+
14729
+
14730
+
14731
+
14732
+
14733
+
14734
+
14735
+
14736
+
14737
+
14738
+ Training...: 67% 2964/4393 [4:15:20<2:21:10, 5.93s/it]
14739
+ Step... (10000/50000 | Eval Loss: 0.6138997077941895 | Eval wer: 0.05543913826697548 | Eval cer: 0.039964500651745845 |)
14740
+ Step... (10025 | Loss: 0.08830500394105911, Learning Rate: 8.075959340203553e-05, Gradient Norm: 0.5834270119667053)
14741
+ Step... (10050 | Loss: 0.09379326552152634, Learning Rate: 8.070908370427787e-05, Gradient Norm: 0.5947138667106628)
14742
+ Step... (10075 | Loss: 0.09823241084814072, Learning Rate: 8.065858128247783e-05, Gradient Norm: 0.6362175345420837)
14743
+ Step... (10100 | Loss: 0.06507033109664917, Learning Rate: 8.060807886067778e-05, Gradient Norm: 0.44929641485214233)
14744
+ Step... (10125 | Loss: 0.08986574411392212, Learning Rate: 8.055756916292012e-05, Gradient Norm: 0.5437695384025574)
14745
+ Step... (10150 | Loss: 0.07754779607057571, Learning Rate: 8.050706674112007e-05, Gradient Norm: 0.5932506918907166)
14746
+ Step... (10175 | Loss: 0.06109367311000824, Learning Rate: 8.045656431932002e-05, Gradient Norm: 0.48796671628952026)
14747
+ Step... (10200 | Loss: 0.049698445945978165, Learning Rate: 8.040605462156236e-05, Gradient Norm: 0.8630101680755615)
14748
+ Step... (10225 | Loss: 0.10865950584411621, Learning Rate: 8.035555219976231e-05, Gradient Norm: 0.8655366897583008)
14749
+ Step... (10250 | Loss: 0.09409575164318085, Learning Rate: 8.030504977796227e-05, Gradient Norm: 0.4608936905860901)
14750
+ Step... (10275 | Loss: 0.05792190134525299, Learning Rate: 8.02545400802046e-05, Gradient Norm: 0.48416876792907715)
14751
+ Step... (10300 | Loss: 0.07172185927629471, Learning Rate: 8.020403765840456e-05, Gradient Norm: 0.4808516502380371)
14752
+ Step... (10325 | Loss: 0.11542685329914093, Learning Rate: 8.015353523660451e-05, Gradient Norm: 0.5415076017379761)
14753
+ Step... (10350 | Loss: 0.09445095807313919, Learning Rate: 8.010302553884685e-05, Gradient Norm: 0.7509729266166687)
14754
+ Step... (10375 | Loss: 0.0917729064822197, Learning Rate: 8.00525231170468e-05, Gradient Norm: 0.7841768264770508)
14755
+ Step... (10400 | Loss: 0.09692507982254028, Learning Rate: 8.000202069524676e-05, Gradient Norm: 0.4939047396183014)
14756
+ Step... (10425 | Loss: 0.10967368632555008, Learning Rate: 7.99515109974891e-05, Gradient Norm: 0.6144236922264099)
14757
+ Step... (10450 | Loss: 0.08952171355485916, Learning Rate: 7.990100857568905e-05, Gradient Norm: 0.4610723853111267)
14758
+ Step... (10475 | Loss: 0.09783611446619034, Learning Rate: 7.9850506153889e-05, Gradient Norm: 1.0754166841506958)
14759
+ Step... (10500 | Loss: 0.09750466048717499, Learning Rate: 7.979999645613134e-05, Gradient Norm: 1.0963139533996582)
14760
+ Step... (10525 | Loss: 0.12679395079612732, Learning Rate: 7.974949403433129e-05, Gradient Norm: 0.8020402789115906)
14761
+ Step... (10550 | Loss: 0.09429805725812912, Learning Rate: 7.969899161253124e-05, Gradient Norm: 1.0350173711776733)
14762
+ Step... (10575 | Loss: 0.10793081670999527, Learning Rate: 7.964848191477358e-05, Gradient Norm: 0.6100561618804932)
14763
+ Step... (10600 | Loss: 0.08640165627002716, Learning Rate: 7.959797949297354e-05, Gradient Norm: 0.7656195163726807)
14764
+ Step... (10625 | Loss: 0.08220628648996353, Learning Rate: 7.954747707117349e-05, Gradient Norm: 0.5055153965950012)
14765
+ Step... (10650 | Loss: 0.05531294271349907, Learning Rate: 7.949696737341583e-05, Gradient Norm: 0.3716430068016052)
14766
+ Step... (10675 | Loss: 0.10128755122423172, Learning Rate: 7.944646495161578e-05, Gradient Norm: 0.6751567125320435)
14767
+ Step... (10700 | Loss: 0.09419593960046768, Learning Rate: 7.939596252981573e-05, Gradient Norm: 0.5146047472953796)
14768
+ Step... (10725 | Loss: 0.09003331512212753, Learning Rate: 7.934545283205807e-05, Gradient Norm: 0.6622697710990906)
14769
+ Step... (10750 | Loss: 0.08137448877096176, Learning Rate: 7.929495041025802e-05, Gradient Norm: 0.4655037224292755)
14770
+ Step... (10775 | Loss: 0.11204398423433304, Learning Rate: 7.924444798845798e-05, Gradient Norm: 0.795628011226654)
14771
+ Step... (10800 | Loss: 0.07448437809944153, Learning Rate: 7.919393829070032e-05, Gradient Norm: 0.8185940384864807)
14772
+ Step... (10825 | Loss: 0.08662033081054688, Learning Rate: 7.914343586890027e-05, Gradient Norm: 1.1144791841506958)
14773
+ Step... (10850 | Loss: 0.08633825182914734, Learning Rate: 7.909292617114261e-05, Gradient Norm: 0.5531063675880432)
14774
+ Step... (10875 | Loss: 0.08835204690694809, Learning Rate: 7.904242374934256e-05, Gradient Norm: 0.5773115158081055)
14775
+ Step... (10900 | Loss: 0.07958956062793732, Learning Rate: 7.899192132754251e-05, Gradient Norm: 0.456502765417099)
14776
+ Step... (10925 | Loss: 0.08420322835445404, Learning Rate: 7.894141162978485e-05, Gradient Norm: 0.6213755011558533)
14777
+ Step... (10950 | Loss: 0.09074170887470245, Learning Rate: 7.88909092079848e-05, Gradient Norm: 0.5302470326423645)
14778
+ Step... (10975 | Loss: 0.12342730909585953, Learning Rate: 7.884039951022714e-05, Gradient Norm: 0.6465903520584106)
14779
+ Step... (11000 | Loss: 0.08631566911935806, Learning Rate: 7.87898970884271e-05, Gradient Norm: 0.8472545146942139)
14780
+ Step... (11025 | Loss: 0.09519127011299133, Learning Rate: 7.873938739066944e-05, Gradient Norm: 1.0324699878692627)
14781
+ Step... (11050 | Loss: 0.07494750618934631, Learning Rate: 7.868888496886939e-05, Gradient Norm: 0.47489726543426514)
14782
+ Step... (11075 | Loss: 0.08446040004491806, Learning Rate: 7.863838254706934e-05, Gradient Norm: 0.4666726291179657)
14783
+ Step... (11100 | Loss: 0.11089751124382019, Learning Rate: 7.858787284931168e-05, Gradient Norm: 0.5858631134033203)
14784
+ Step... (11125 | Loss: 0.08997974544763565, Learning Rate: 7.853737042751163e-05, Gradient Norm: 0.5574468970298767)
14785
+ Step... (11150 | Loss: 0.0976918414235115, Learning Rate: 7.848686800571159e-05, Gradient Norm: 0.7729601263999939)
14786
+ Step... (11175 | Loss: 0.11596120148897171, Learning Rate: 7.843635830795392e-05, Gradient Norm: 0.6145332455635071)
14787
+ Step... (11200 | Loss: 0.14628462493419647, Learning Rate: 7.838585588615388e-05, Gradient Norm: 0.7486353516578674)
14788
+ Step... (11225 | Loss: 0.08686906844377518, Learning Rate: 7.833535346435383e-05, Gradient Norm: 0.6828690767288208)
14789
+ Step... (11250 | Loss: 0.08807458728551865, Learning Rate: 7.828484376659617e-05, Gradient Norm: 0.48905521631240845)
14790
+ Step... (11275 | Loss: 0.0733414962887764, Learning Rate: 7.823434134479612e-05, Gradient Norm: 0.6179749965667725)
14791
+ Step... (11300 | Loss: 0.08363909274339676, Learning Rate: 7.818383892299607e-05, Gradient Norm: 0.5266070365905762)
14792
+ Step... (11325 | Loss: 0.09379070997238159, Learning Rate: 7.813332922523841e-05, Gradient Norm: 2.2078020572662354)
14793
+ Step... (11350 | Loss: 0.12219930440187454, Learning Rate: 7.808282680343837e-05, Gradient Norm: 0.7328922748565674)
14794
+ Step... (11375 | Loss: 0.12322746962308884, Learning Rate: 7.803232438163832e-05, Gradient Norm: 0.7159468531608582)
14795
+ Step... (11400 | Loss: 0.07447610795497894, Learning Rate: 7.798181468388066e-05, Gradient Norm: 0.4881691336631775)
14796
+ Step... (11425 | Loss: 0.11930616945028305, Learning Rate: 7.793131226208061e-05, Gradient Norm: 0.7435767650604248)
14797
+ Step... (11450 | Loss: 0.12145199626684189, Learning Rate: 7.788080984028056e-05, Gradient Norm: 0.5640356540679932)
14798
+ Step... (11475 | Loss: 0.062346503138542175, Learning Rate: 7.78303001425229e-05, Gradient Norm: 0.7859042882919312)
14799
+ Step... (11500 | Loss: 0.07021258771419525, Learning Rate: 7.777979772072285e-05, Gradient Norm: 0.4334392547607422)
14800
+ Step... (11525 | Loss: 0.09780385345220566, Learning Rate: 7.772929529892281e-05, Gradient Norm: 0.5772568583488464)
14801
+ Step... (11550 | Loss: 0.07436276227235794, Learning Rate: 7.767878560116515e-05, Gradient Norm: 0.450169175863266)
14802
+ Step... (11575 | Loss: 0.08741279691457748, Learning Rate: 7.76282831793651e-05, Gradient Norm: 0.5794440507888794)
14803
+ Step... (11600 | Loss: 0.07495800405740738, Learning Rate: 7.757777348160744e-05, Gradient Norm: 0.43371060490608215)
14804
+ Step... (11625 | Loss: 0.11196848750114441, Learning Rate: 7.752727105980739e-05, Gradient Norm: 0.6372781991958618)
14805
+ Step... (11650 | Loss: 0.08077137917280197, Learning Rate: 7.747676863800734e-05, Gradient Norm: 13.57120132446289)
14806
+ Step... (11675 | Loss: 0.12308752536773682, Learning Rate: 7.742625894024968e-05, Gradient Norm: 0.7368566393852234)
14807
+ Step... (11700 | Loss: 0.057600561529397964, Learning Rate: 7.737575651844963e-05, Gradient Norm: 0.3913513720035553)
14808
+
14809
+
14810
+
14811
+
14812
+
14813
+
14814
+
14815
+
14816
+
14817
+
14818
+
14819
+
14820
+
14821
+
14822
+
14823
+
14824
+
14825
+
14826
+
14827
+
14828
+
14829
+
14830
+
14831
+
14832
+
14833
+
14834
+
14835
+
14836
+
14837
+
14838
+
14839
+
14840
+
14841
+
14842
+
14843
+
14844
+
14845
+
14846
+
14847
+
14848
+
14849
+
14850
+
14851
+
14852
+
14853
+
14854
+
14855
+
14856
+
14857
+
14858
+
14859
+
14860
+
14861
+
14862
+
14863
+
14864
+
14865
+
14866
+
14867
+
14868
+
14869
+
14870
+
14871
+
14872
+
14873
+
14874
+
14875
+
14876
+
14877
+
14878
+
14879
+
14880
+
14881
+
14882
+
14883
+
14884
+
14885
+
14886
+
14887
+
14888
+
14889
+
14890
+
14891
+
14892
+
14893
+
14894
+
14895
+
14896
+
14897
+
14898
+
14899
+
14900
+
14901
+
14902
+
14903
+
14904
+
14905
+
14906
+
14907
+
14908
+
14909
+
14910
+
14911
+
14912
+
14913
+
14914
+
14915
+
14916
+
14917
+
14918
+
14919
+
14920
+
14921
+
14922
+
14923
+
14924
+
14925
+
14926
+
14927
+
14928
+
14929
+
14930
+
14931
+
14932
+
14933
+
14934
+
14935
+
14936
+
14937
+
14938
+
14939
+
14940
+
14941
+
14942
+
14943
+
14944
+
14945
+
14946
+
14947
+
14948
+
14949
+
14950
+
14951
+
14952
+
14953
+
14954
+
14955
+
14956
+
14957
+
14958
+
14959
+
14960
+
14961
+
14962
+
14963
+
14964
+
14965
+
14966
+
14967
+
14968
+
14969
+
14970
+
14971
+
14972
+
14973
+
14974
+
14975
+
14976
+
14977
+
14978
+
14979
+
14980
+
14981
+
14982
+
14983
+
14984
+
14985
+
14986
+
14987
+
14988
+
14989
+
14990
+
14991
+
14992
+
14993
+
14994
+
14995
+
14996
+
14997
+
14998
+
14999
+
15000
+
15001
+
15002
+
15003
+
15004
+
15005
+
15006
+
15007
+
15008
+
15009
+
15010
+
15011
+
15012
+
15013
+
15014
+
15015
+
15016
+
15017
+
15018
+
15019
+
15020
+
15021
+
15022
+
15023
+
15024
+
15025
+
15026
+
15027
+
15028
+
15029
+
15030
+
15031
+
15032
+
15033
+
15034
+
15035
+
15036
+
15037
+
15038
+
15039
+
15040
+
15041
+
15042
+
15043
+
15044
+
15045
+
15046
+
15047
+
15048
+
15049
+
15050
+
15051
+
15052
+
15053
+
15054
+
15055
+
15056
+
15057
+
15058
+
15059
+
15060
+
15061
+
15062
+
15063
+
15064
+
15065
+
15066
+
15067
+
15068
+
15069
+
15070
+
15071
+
15072
+
15073
+
15074
+
15075
+
15076
+
15077
+
15078
+
15079
+
15080
+
15081
+
15082
+
15083
+
15084
+
15085
+
15086
+
15087
+
15088
+
15089
+
15090
+
15091
+
15092
+
15093
+
15094
+
15095
+
15096
+
15097
+
15098
+
15099
+
15100
+
15101
+
15102
+
15103
+
15104
+
15105
+
15106
+
15107
+
15108
+
15109
+
15110
+
15111
+
15112
+
15113
+
15114
+
15115
+
15116
+
15117
+
15118
+
15119
+
15120
+
15121
+
15122
+
15123
+
15124
+
15125
+
15126
+
15127
+
15128
+
15129
+
15130
+
15131
+
15132
+
15133
+
15134
+
15135
+
15136
+
15137
+
15138
+
15139
+
15140
+
15141
+
15142
+
15143
+
15144
+
15145
+
15146
+
15147
+
15148
+
15149
+
15150
+
15151
+
15152
+
15153
+
15154
+
15155
+
15156
+
15157
+
15158
+
15159
+
15160
+
15161
+
15162
+
15163
+
15164
+
15165
+
15166
+
15167
+
15168
+
15169
+
15170
+
15171
+
15172
+
15173
+
15174
+
15175
+
15176
+
15177
+
15178
+
15179
+
15180
+
15181
+
15182
+
15183
+
15184
+
15185
+
15186
+
15187
+
15188
+
15189
+
15190
+
15191
+
15192
+
15193
+
15194
+
15195
+
15196
+
15197
+
15198
+
15199
+
15200
+
15201
+
15202
+
15203
+
15204
+
15205
+
15206
+
15207
+
15208
+
15209
+
15210
+
15211
+
15212
+
15213
+
15214
+
15215
+
15216
+
15217
+
15218
+
15219
+
15220
+
15221
+
15222
+
15223
+
15224
+
15225
+
15226
+
15227
+
15228
+
15229
+
15230
+
15231
+
15232
+
15233
+
15234
+
15235
+
15236
+
15237
+
15238
+
15239
+
15240
+
15241
+
15242
+
15243
+
15244
+
15245
+
15246
+
15247
+
15248
+
15249
+
15250
+
15251
+
15252
+
15253
+
15254
+
15255
+
15256
+
15257
+
15258
+
15259
+
15260
+
15261
+
15262
+
15263
+
15264
+
15265
+
15266
+
15267
+
15268
+
15269
+
15270
+
15271
+
15272
+
15273
+
15274
+
15275
+
15276
+
15277
+
15278
+
15279
+
15280
+
15281
+
15282
+
15283
+
15284
+
15285
+
15286
+
15287
+
15288
+
15289
+
15290
+
15291
+
15292
+
15293
+
15294
+
15295
+
15296
+
15297
+
15298
+
15299
+
15300
+
15301
+
15302
+
15303
+
15304
+
15305
+
15306
+
15307
+
15308
+
15309
+
15310
+
15311
+
15312
+
15313
+
15314
+
15315
+
15316
+
15317
+
15318
+
15319
+
15320
+
15321
+
15322
+
15323
+
15324
+
15325
+
15326
+
15327
+
15328
+
15329
+
15330
+
15331
+
15332
+
15333
+
15334
+
15335
+
15336
+
15337
+
15338
+
15339
+
15340
+
15341
+
15342
+
15343
+
15344
+
15345
+
15346
+
15347
+
15348
+
15349
+
15350
+
15351
+
15352
+
15353
+
15354
+
15355
+
15356
+
15357
+
15358
+
15359
+
15360
+
15361
+
15362
+
15363
+
15364
+
15365
+
15366
+
15367
+
15368
+
15369
+
15370
+
15371
+
15372
+
15373
+
15374
+
15375
+
15376
+
15377
+
15378
+
15379
+
15380
+
15381
+
15382
+
15383
+
15384
+
15385
+
15386
+
15387
+
15388
+
15389
+
15390
+
15391
+
15392
+
15393
+
15394
+
15395
+
15396
+
15397
+
15398
+
15399
+
15400
+
15401
+
15402
+
15403
+
15404
+
15405
+
15406
+
15407
+
15408
+
15409
+
15410
+
15411
+
15412
+
15413
+
15414
+
15415
+
15416
+
15417
+
15418
+
15419
+
15420
+
15421
+
15422
+
15423
+
15424
+
15425
+
15426
+
15427
+
15428
+
15429
+
15430
+
15431
+
15432
+
15433
+
15434
+
15435
+
15436
+
15437
+
15438
+
15439
+
15440
+
15441
+
15442
+
15443
+
15444
+
15445
+
15446
+
15447
+
15448
+
15449
+
15450
+
15451
+
15452
+
15453
+
15454
+
15455
+
15456
+
15457
+
15458
+
15459
+
15460
+
15461
+
15462
+
15463
+
15464
+
15465
+
15466
+
15467
+
15468
+
15469
+
15470
+
15471
+
15472
+
15473
+
15474
+
15475
+
15476
+
15477
+
15478
+
15479
+
15480
+
15481
+
15482
+
15483
+
15484
+
15485
+
15486
+
15487
+
15488
+
15489
+
15490
+
15491
+
15492
+
15493
+
15494
+
15495
+
15496
+
15497
+
15498
+
15499
+
15500
+
15501
+
15502
+
15503
+
15504
+
15505
+
15506
+
15507
+
15508
+
15509
+
15510
+
15511
+
15512
+
15513
+
15514
+
15515
+
15516
+
15517
+
15518
+
15519
+
15520
+
15521
+
15522
+
15523
+
15524
+
15525
+
15526
+
15527
+
15528
+
15529
+
15530
+
15531
+
15532
+
15533
+
15534
+
15535
+
15536
+
15537
+
15538
+
15539
+
15540
+
15541
+
15542
+
15543
+
15544
+
15545
+
15546
+
15547
+
15548
+
15549
+
15550
+
15551
+
15552
+
15553
+
15554
+
15555
+
15556
+
15557
+
15558
+
15559
+
15560
+
15561
+
15562
+
15563
+
15564
+
15565
+
15566
+
15567
+
15568
+
15569
+
15570
+
15571
+
15572
+
15573
+
15574
+
15575
+
15576
+
15577
+
15578
+
15579
+
15580
+
15581
+
15582
+
15583
+
15584
+
15585
+
15586
+
15587
+
15588
+
15589
+
15590
+
15591
+
15592
+
15593
+
15594
+
15595
+
15596
+
15597
+
15598
+
15599
+
15600
+
15601
+
15602
+
15603
+
15604
+
15605
+
15606
+
15607
+
15608
+
15609
+
15610
+
15611
+
15612
+
15613
+
15614
+
15615
+
15616
+
15617
+
15618
+
15619
+
15620
+
15621
+
15622
+
15623
+
15624
+
15625
+
15626
+
15627
+
15628
+
15629
+
15630
+
15631
+
15632
+
15633
+
15634
+
15635
+
15636
+
15637
+
15638
+
15639
+
15640
+
15641
+
15642
+
15643
+
15644
+
15645
+
15646
+
15647
+
15648
+
15649
+
15650
+
15651
+
15652
+
15653
+
15654
+
15655
+
15656
+
15657
+
15658
+
15659
+
15660
+
15661
+
15662
+
15663
+
15664
+
15665
+
15666
+
15667
+
15668
+
15669
+
15670
+
15671
+
15672
+
15673
+
15674
+
15675
+
15676
+
15677
+
15678
+
15679
+
15680
+
15681
+
15682
+
15683
+
15684
+
15685
+
15686
+
15687
+
15688
+
15689
+
15690
+
15691
+
15692
+
15693
+
15694
+
15695
+
15696
+
15697
+
15698
+
15699
+
15700
+
15701
+
15702
+
15703
+
15704
+
15705
+
15706
+
15707
+
15708
+
15709
+
15710
+
15711
+
15712
+
15713
+
15714
+
15715
+
15716
+
15717
+
15718
+
15719
+
15720
+
15721
+
15722
+
15723
+
15724
+
15725
+
15726
+
15727
+
15728
+
15729
+
15730
+
15731
+
15732
+
15733
+
15734
+
15735
+
15736
+
15737
+
15738
+
15739
+
15740
+
15741
+
15742
+
15743
+
15744
+
15745
+
15746
+
15747
+
15748
+
15749
+
15750
+
15751
+
15752
+
15753
+
15754
+
15755
+
15756
+
15757
+
15758
+
15759
+
15760
+
15761
+
15762
+
15763
+
15764
+
15765
+
15766
+
15767
+
15768
+
15769
+
15770
+
15771
+
15772
+
15773
+
15774
+
15775
+
15776
+
15777
+
15778
+
15779
+
15780
+
15781
+
15782
+
15783
+
15784
+
15785
+
15786
+
15787
+
15788
+
15789
+
15790
+
15791
+
15792
+
15793
+
15794
+
15795
+
15796
+
15797
+
15798
+
15799
+
15800
+
15801
+
15802
+
15803
+
15804
+
15805
+
15806
+
15807
+
15808
+
15809
+
15810
+
15811
+
15812
+
15813
+
15814
+
15815
+
15816
+
15817
+
15818
+
15819
+
15820
+
15821
+
15822
+
15823
+
15824
+
15825
+
15826
+
15827
+
15828
+
15829
+
15830
+
15831
+
15832
+
15833
+
15834
+
15835
+
15836
+
15837
+
15838
+
15839
+
15840
+
15841
+
15842
+
15843
+
15844
+
15845
+
15846
+
15847
+
15848
+
15849
+
15850
+
15851
+
15852
+
15853
+
15854
+
15855
+
15856
+
15857
+
15858
+
15859
+
15860
+
15861
+
15862
+
15863
+
15864
+
15865
+
15866
+
15867
+
15868
+
15869
+
15870
+
15871
+
15872
+
15873
+
15874
+
15875
+
15876
+
15877
+
15878
+
15879
+
15880
+
15881
+
15882
+
15883
+
15884
+
15885
+
15886
+
15887
+
15888
+
15889
+
15890
+
15891
+
15892
+
15893
+
15894
+
15895
+
15896
+
15897
+
15898
+
15899
+
15900
+
15901
+
15902
+
15903
+
15904
+
15905
+
15906
+
15907
+
15908
+
15909
+
15910
+
15911
+
15912
+
15913
+
15914
+
15915
+
15916
+
15917
+
15918
+
15919
+
15920
+
15921
+
15922
+
15923
+
15924
+
15925
+
15926
+
15927
+
15928
+
15929
+
15930
+
15931
+
15932
+
15933
+
15934
+
15935
+
15936
+
15937
+
15938
+
15939
+
15940
+
15941
+
15942
+
15943
+
15944
+
15945
+
15946
+
15947
+
15948
+
15949
+
15950
+
15951
+
15952
+
15953
+
15954
+
15955
+
15956
+
15957
+
15958
+
15959
+
15960
+
15961
+
15962
+
15963
+
15964
+
15965
+
15966
+
15967
+
15968
+
15969
+
15970
+
15971
+
15972
+
15973
+
15974
+
15975
+
15976
+
15977
+
15978
+
15979
+
15980
+
15981
+
15982
+
15983
+
15984
+
15985
+
15986
+
15987
+
15988
+
15989
+
15990
+
15991
+
15992
+
15993
+
15994
+
15995
+
15996
+
15997
+
15998
+
15999
+
16000
+
16001
+
16002
+
16003
+
16004
+
16005
+
16006
+
16007
+
16008
+
16009
+
16010
+
16011
+
16012
+
16013
+
16014
+
16015
+
16016
+
16017
+
16018
+
16019
+
16020
+
16021
+
16022
+
16023
+
16024
+
16025
+
16026
+
16027
+
16028
+
16029
+
16030
+
16031
+
16032
+
16033
+
16034
+
16035
+
16036
+
16037
+
16038
+
16039
+
16040
+
16041
+
16042
+
16043
+
16044
+
16045
+
16046
+
16047
+
16048
+
16049
+
16050
+
16051
+
16052
+
16053
+
16054
+
16055
+
16056
+
16057
+
16058
+
16059
+
16060
+
16061
+
16062
+
16063
+
16064
+
16065
+
16066
+
16067
+
16068
+
16069
+
16070
+
16071
+
16072
+
16073
+
16074
+
16075
+
16076
+
16077
+
16078
+
16079
+
16080
+
16081
+
16082
+
16083
+
16084
+
16085
+
16086
+
16087
+
16088
+
16089
+
16090
+
16091
+
16092
+
16093
+
16094
+
16095
+
16096
+
16097
+
16098
+
16099
+
16100
+
16101
+
16102
+
16103
+
16104
+
16105
+
16106
+
16107
+
16108
+
16109
+
16110
+
16111
+
16112
+
16113
+
16114
+
16115
+
16116
+
16117
+
16118
+
16119
+
16120
+
16121
+
16122
+
16123
+
16124
+
16125
+
16126
+
16127
+
16128
+
16129
+
16130
+
16131
+
16132
+
16133
+
16134
+
16135
+
16136
+
16137
+
16138
+
16139
+
16140
+
16141
+
16142
+
16143
+
16144
+
16145
+
16146
+
16147
+
16148
+
16149
+
16150
+
16151
+
16152
+
16153
+
16154
+
16155
+
16156
+
16157
+
16158
+
16159
+
16160
+
16161
+
16162
+
16163
+
16164
+
16165
+
16166
+
16167
+
16168
+
16169
+
16170
+
16171
+
16172
+
16173
+
16174
+
16175
+
16176
+
16177
+
16178
+
16179
+
16180
+
16181
+
16182
+
16183
+
16184
+
16185
+
16186
+
16187
+
16188
+
16189
+
16190
+
16191
+
16192
+
16193
+
16194
+
16195
+
16196
+
16197
+
16198
+
16199
+
16200
+
16201
+ Step... (10000/50000 | Eval Loss: 0.6138997077941895 | Eval wer: 0.05543913826697548 | Eval cer: 0.039964500651745845 |): 25% 3/12 [17:36:17<53:07:59, 21253.24s/it]
16202
+ Step... (11750 | Loss: 0.07048530131578445, Learning Rate: 7.727474439889193e-05, Gradient Norm: 0.4489003121852875)
16203
+ Step... (11775 | Loss: 0.1157061755657196, Learning Rate: 7.722424197709188e-05, Gradient Norm: 0.7761936187744141)
16204
+ Step... (11800 | Loss: 0.07189354300498962, Learning Rate: 7.717373955529183e-05, Gradient Norm: 0.4329056441783905)
16205
+ Step... (11825 | Loss: 0.10324515402317047, Learning Rate: 7.712322985753417e-05, Gradient Norm: 0.5789346098899841)
16206
+ Step... (11850 | Loss: 0.07645975798368454, Learning Rate: 7.707272743573412e-05, Gradient Norm: 1.8985731601715088)
16207
+ Step... (11875 | Loss: 0.10933918505907059, Learning Rate: 7.702221773797646e-05, Gradient Norm: 0.5749704837799072)
16208
+ Step... (11900 | Loss: 0.09483492374420166, Learning Rate: 7.697171531617641e-05, Gradient Norm: 0.49331286549568176)
16209
+ Step... (11925 | Loss: 0.07966803759336472, Learning Rate: 7.692121289437637e-05, Gradient Norm: 0.5429652333259583)
16210
+ Step... (11950 | Loss: 0.08023077994585037, Learning Rate: 7.68707031966187e-05, Gradient Norm: 1.0837703943252563)
16211
+ Step... (11975 | Loss: 0.13338258862495422, Learning Rate: 7.682020077481866e-05, Gradient Norm: 0.6416175961494446)
16212
+ Step... (12000 | Loss: 0.044519901275634766, Learning Rate: 7.676969835301861e-05, Gradient Norm: 0.41103145480155945)
16213
+ Step... (12025 | Loss: 0.11081162840127945, Learning Rate: 7.671918865526095e-05, Gradient Norm: 0.6642196774482727)
16214
+ Step... (12050 | Loss: 0.0988469272851944, Learning Rate: 7.66686862334609e-05, Gradient Norm: 0.48495012521743774)
16215
+ Step... (12075 | Loss: 0.0654635801911354, Learning Rate: 7.661818381166086e-05, Gradient Norm: 0.4860776960849762)
16216
+ Step... (12100 | Loss: 0.08458191901445389, Learning Rate: 7.65676741139032e-05, Gradient Norm: 0.4967586100101471)
16217
+ Step... (12125 | Loss: 0.08746153116226196, Learning Rate: 7.651717169210315e-05, Gradient Norm: 0.5080762505531311)
16218
+ Step... (12150 | Loss: 0.10310745984315872, Learning Rate: 7.64666692703031e-05, Gradient Norm: 0.5726480484008789)
16219
+ Step... (12175 | Loss: 0.10443349927663803, Learning Rate: 7.641615957254544e-05, Gradient Norm: 0.6160019636154175)
16220
+ Step... (12200 | Loss: 0.08901195973157883, Learning Rate: 7.636565715074539e-05, Gradient Norm: 0.43671485781669617)
16221
+ Step... (12225 | Loss: 0.06880508363246918, Learning Rate: 7.631514745298773e-05, Gradient Norm: 0.7653963565826416)
16222
+ Step... (12250 | Loss: 0.07732104510068893, Learning Rate: 7.626464503118768e-05, Gradient Norm: 0.6781657338142395)
16223
+ Step... (12275 | Loss: 0.12528778612613678, Learning Rate: 7.621413533343002e-05, Gradient Norm: 0.7398132085800171)
16224
+ Step... (12300 | Loss: 0.06724114716053009, Learning Rate: 7.616363291162997e-05, Gradient Norm: 0.6260006427764893)
16225
+ Step... (12325 | Loss: 0.06048065423965454, Learning Rate: 7.611312321387231e-05, Gradient Norm: 0.46516552567481995)
16226
+ Step... (12350 | Loss: 0.07060635089874268, Learning Rate: 7.606262079207227e-05, Gradient Norm: 0.500216543674469)
16227
+ Step... (12375 | Loss: 0.09021947532892227, Learning Rate: 7.601211837027222e-05, Gradient Norm: 0.5638901591300964)
16228
+ Step... (12400 | Loss: 0.05326595529913902, Learning Rate: 7.596160867251456e-05, Gradient Norm: 0.3534490168094635)
16229
+ Step... (12425 | Loss: 0.07285761088132858, Learning Rate: 7.591110625071451e-05, Gradient Norm: 0.5041208863258362)
16230
+ Step... (12450 | Loss: 0.10327259451150894, Learning Rate: 7.586060382891446e-05, Gradient Norm: 0.6328871846199036)
16231
+ Step... (12475 | Loss: 0.15358465909957886, Learning Rate: 7.58100941311568e-05, Gradient Norm: 0.8533681035041809)
16232
+ Step... (12500 | Loss: 0.08255607634782791, Learning Rate: 7.575959170935676e-05, Gradient Norm: 0.6552234292030334)
16233
+ Step... (12525 | Loss: 0.12218253314495087, Learning Rate: 7.570908928755671e-05, Gradient Norm: 0.5760861039161682)
16234
+ Step... (12550 | Loss: 0.07060109823942184, Learning Rate: 7.565857958979905e-05, Gradient Norm: 0.41352447867393494)
16235
+ Step... (12575 | Loss: 0.06469850987195969, Learning Rate: 7.5608077167999e-05, Gradient Norm: 0.46946582198143005)
16236
+ Step... (12600 | Loss: 0.08960758149623871, Learning Rate: 7.555757474619895e-05, Gradient Norm: 0.6394600868225098)
16237
+ Step... (12625 | Loss: 0.07029081881046295, Learning Rate: 7.550706504844129e-05, Gradient Norm: 0.4810219407081604)
16238
+ Step... (12650 | Loss: 0.07843610644340515, Learning Rate: 7.545656262664124e-05, Gradient Norm: 0.4629886746406555)
16239
+ Step... (12675 | Loss: 0.0552670955657959, Learning Rate: 7.54060602048412e-05, Gradient Norm: 0.57724928855896)
16240
+ Step... (12700 | Loss: 0.08477842062711716, Learning Rate: 7.535555050708354e-05, Gradient Norm: 0.4033758044242859)
16241
+ Step... (12725 | Loss: 0.11607497185468674, Learning Rate: 7.530504808528349e-05, Gradient Norm: 0.5471760630607605)
16242
+ Step... (12750 | Loss: 0.10754743963479996, Learning Rate: 7.525454566348344e-05, Gradient Norm: 0.67010098695755)
16243
+ Step... (12775 | Loss: 0.07305696606636047, Learning Rate: 7.520403596572578e-05, Gradient Norm: 0.5921106934547424)
16244
+ Step... (12800 | Loss: 0.07606924325227737, Learning Rate: 7.515353354392573e-05, Gradient Norm: 0.4610716700553894)
16245
+ Step... (12825 | Loss: 0.05552862957119942, Learning Rate: 7.510303112212569e-05, Gradient Norm: 0.9628118872642517)
16246
+ Step... (12850 | Loss: 0.07874363660812378, Learning Rate: 7.505252142436802e-05, Gradient Norm: 0.4508274495601654)
16247
+ Step... (12875 | Loss: 0.06021196022629738, Learning Rate: 7.500201900256798e-05, Gradient Norm: 0.45127421617507935)
16248
+ Step... (12900 | Loss: 0.07773559540510178, Learning Rate: 7.495151658076793e-05, Gradient Norm: 0.7120715379714966)
16249
+ Step... (12925 | Loss: 0.1171378344297409, Learning Rate: 7.490100688301027e-05, Gradient Norm: 0.8830618858337402)
16250
+ Step... (12950 | Loss: 0.06144741550087929, Learning Rate: 7.485050446121022e-05, Gradient Norm: 0.3950470983982086)
16251
+ Step... (12975 | Loss: 0.053323205560445786, Learning Rate: 7.480000203941017e-05, Gradient Norm: 0.5374091267585754)
16252
+ Step... (13000 | Loss: 0.06425655633211136, Learning Rate: 7.474949234165251e-05, Gradient Norm: 0.690994918346405)
16253
+ Step... (13025 | Loss: 0.09800120443105698, Learning Rate: 7.469898991985247e-05, Gradient Norm: 0.5918130278587341)
16254
+ Step... (13050 | Loss: 0.07990693300962448, Learning Rate: 7.464848749805242e-05, Gradient Norm: 0.3634057343006134)
16255
+ Step... (13075 | Loss: 0.09834995120763779, Learning Rate: 7.459797780029476e-05, Gradient Norm: 0.535110354423523)
16256
+ Step... (13100 | Loss: 0.10143288224935532, Learning Rate: 7.454747537849471e-05, Gradient Norm: 0.4474470913410187)
16257
+ Step... (13125 | Loss: 0.0711093470454216, Learning Rate: 7.449697295669466e-05, Gradient Norm: 0.4224787950515747)
16258
+ Step... (13150 | Loss: 0.07919856905937195, Learning Rate: 7.4446463258937e-05, Gradient Norm: 0.5301010012626648)
16259
+ Step... (13175 | Loss: 0.08455699682235718, Learning Rate: 7.439596083713695e-05, Gradient Norm: 0.7841456532478333)
16260
+
16261
+
16262
+
16263
+
16264
+
16265
+
16266
+
16267
+
16268
+
16269
+
16270
+
16271
+
16272
+
16273
+
16274
+
16275
+
16276
+
16277
+
16278
+
16279
+
16280
+
16281
+
16282
+
16283
+
16284
+
16285
+
16286
+
16287
+
16288
+
16289
+
16290
+
16291
+
16292
+
16293
+
16294
+
16295
+
16296
+
16297
+
16298
+
16299
+
16300
+
16301
+
16302
+
16303
+
16304
+
16305
+
16306
+
16307
+
16308
+
16309
+
16310
+
16311
+
16312
+
16313
+
16314
+
16315
+
16316
+
16317
+
16318
+
16319
+
16320
+
16321
+
16322
+
16323
+
16324
+
16325
+
16326
+
16327
+
16328
+
16329
+
16330
+
16331
+
16332
+
16333
+
16334
+
16335
+
16336
+
16337
+
16338
+
16339
+
16340
+
16341
+
16342
+
16343
+
16344
+
16345
+
16346
+
16347
+
16348
+
16349
+
16350
+
16351
+
16352
+
16353
+
16354
+
16355
+
16356
+
16357
+
16358
+
16359
+
16360
+
16361
+
16362
+
16363
+
16364
+
16365
+
16366
+
16367
+
16368
+
16369
+
16370
+
16371
+
16372
+
16373
+
16374
+
16375
+
16376
+
16377
+
16378
+
16379
+
16380
+
16381
+
16382
+
16383
+
16384
+
16385
+
16386
+
16387
+
16388
+
16389
+
16390
+
16391
+
16392
+
16393
+
16394
+
16395
+
16396
+
16397
+
16398
+
16399
+
16400
+
16401
+
16402
+
16403
+
16404
+
16405
+
16406
+
16407
+
16408
+
16409
+
16410
+
16411
+
16412
+
16413
+
16414
+
16415
+
16416
+
16417
+
16418
+
16419
+
16420
+
16421
+
16422
+
16423
+
16424
+
16425
+
16426
+
16427
+
16428
+
16429
+
16430
+
16431
+
16432
+
16433
+
16434
+
16435
+
16436
+
16437
+
16438
+
16439
+
16440
+
16441
+
16442
+
16443
+
16444
+
16445
+
16446
+
16447
+
16448
+
16449
+
16450
+
16451
+
16452
+
16453
+
16454
+
16455
+
16456
+
16457
+
16458
+
16459
+
16460
+
16461
+
16462
+
16463
+
16464
+
16465
+
16466
+
16467
+
16468
+
16469
+
16470
+
16471
+
16472
+
16473
+
16474
+
16475
+
16476
+
16477
+
16478
+
16479
+
16480
+
16481
+
16482
+
16483
+
16484
+
16485
+
16486
+
16487
+
16488
+
16489
+
16490
+
16491
+
16492
+
16493
+
16494
+
16495
+
16496
+
16497
+
16498
+
16499
+
16500
+
16501
+
16502
+
16503
+
16504
+
16505
+
16506
+
16507
+
16508
+
16509
+
16510
+
16511
+
16512
+
16513
+
16514
+
16515
+
16516
+
16517
+
16518
+
16519
+
16520
+
16521
+
16522
+
16523
+
16524
+
16525
+
16526
+
16527
+
16528
+
16529
+
16530
+
16531
+
16532
+
16533
+
16534
+
16535
+
16536
+
16537
+
16538
+
16539
+
16540
+
16541
+
16542
+
16543
+
16544
+
16545
+
16546
+
16547
+
16548
+
16549
+
16550
+
16551
+
16552
+
16553
+
16554
+
16555
+
16556
+
16557
+
16558
+
16559
+
16560
+
16561
+
16562
+
16563
+
16564
+
16565
+
16566
+
16567
+
16568
+
16569
+
16570
+
16571
+
16572
+
16573
+
16574
+
16575
+
16576
+
16577
+
16578
+
16579
+
16580
+
16581
+
16582
+
16583
+
16584
+
16585
+
16586
+
16587
+
16588
+
16589
+
16590
+
16591
+
16592
+
16593
+
16594
+
16595
+
16596
+
16597
+
16598
+
16599
+
16600
+
16601
+
16602
+
16603
+
16604
+
16605
+
16606
+
16607
+
16608
+
16609
+
16610
+
16611
+
16612
+
16613
+
16614
+
16615
+
16616
+
16617
+
16618
+
16619
+
16620
+
16621
+
16622
+
16623
+
16624
+
16625
+
16626
+
16627
+
16628
+
16629
+
16630
+
16631
+
16632
+
16633
+
16634
+
16635
+
16636
+
16637
+
16638
+
16639
+
16640
+
16641
+
16642
+
16643
+
16644
+
16645
+
16646
+
16647
+
16648
+
16649
+
16650
+
16651
+
16652
+
16653
+
16654
+
16655
+
16656
+
16657
+
16658
+
16659
+
16660
+
16661
+
16662
+
16663
+
16664
+
16665
+
16666
+
16667
+
16668
+
16669
+
16670
+
16671
+
16672
+
16673
+
16674
+
16675
+
16676
+
16677
+
16678
+
16679
+
16680
+
16681
+
16682
+
16683
+
16684
+
16685
+
16686
+
16687
+
16688
+
16689
+
16690
+
16691
+
16692
+
16693
+
16694
+
16695
+
16696
+
16697
+
16698
+
16699
+
16700
+
16701
+
16702
+
16703
+
16704
+
16705
+
16706
+
16707
+
16708
+
16709
+
16710
+
16711
+
16712
+
16713
+
16714
+
16715
+
16716
+
16717
+
16718
+
16719
+
16720
+
16721
+
16722
+
16723
+
16724
+
16725
+
16726
+
16727
+
16728
+
16729
+
16730
+
16731
+
16732
+
16733
+
16734
+
16735
+
16736
+
16737
+
16738
+
16739
+
16740
+
16741
+
16742
+
16743
+
16744
+
16745
+
16746
+
16747
+
16748
+
16749
+
16750
+
16751
+
16752
+
16753
+
16754
+
16755
+
16756
+
16757
+
16758
+
16759
+
16760
+
16761
+
16762
+
16763
+
16764
+
16765
+
16766
+
16767
+
16768
+
16769
+
16770
+
16771
+
16772
+
16773
+
16774
+
16775
+
16776
+
16777
+
16778
+
16779
+
16780
+
16781
+
16782
+
16783
+
16784
+
16785
+
16786
+
16787
+
16788
+
16789
+
16790
+
16791
+
16792
+
16793
+
16794
+
16795
+
16796
+
16797
+
16798
+
16799
+
16800
+
16801
+
16802
+
16803
+
16804
+
16805
+
16806
+
16807
+
16808
+
16809
+
16810
+
16811
+
16812
+
16813
+
16814
+
16815
+
16816
+
16817
+
16818
+
16819
+
16820
+
16821
+
16822
+
16823
+
16824
+
16825
+
16826
+
16827
+
16828
+
16829
+
16830
+
16831
+
16832
+
16833
+
16834
+
16835
+
16836
+
16837
+
16838
+
16839
+
16840
+
16841
+
16842
+
16843
+
16844
+
16845
+
16846
+
16847
+
16848
+
16849
+
16850
+
16851
+
16852
+
16853
+
16854
+
16855
+
16856
+
16857
+
16858
+
16859
+
16860
+
16861
+
16862
+
16863
+
16864
+
16865
+
16866
+
16867
+
16868
+
16869
+
16870
+
16871
+
16872
+
16873
+
16874
+
16875
+
16876
+
16877
+
16878
+
16879
+
16880
+
16881
+
16882
+
16883
+
16884
+
16885
+
16886
+
16887
+
16888
+
16889
+
16890
+
16891
+
16892
+
16893
+
16894
+
16895
+
16896
+
16897
+
16898
+
16899
+
16900
+
16901
+
16902
+
16903
+
16904
+
16905
+
16906
+
16907
+
16908
+
16909
+
16910
+
16911
+
16912
+
16913
+
16914
+
16915
+
16916
+
16917
+
16918
+
16919
+
16920
+
16921
+
16922
+
16923
+
16924
+
16925
+
16926
+
16927
+
16928
+
16929
+
16930
+
16931
+
16932
+
16933
+
16934
+
16935
+
16936
+
16937
+
16938
+
16939
+
16940
+
16941
+
16942
+
16943
+
16944
+
16945
+
16946
+
16947
+
16948
+
16949
+
16950
+
16951
+
16952
+
16953
+
16954
+
16955
+
16956
+
16957
+
16958
+
16959
+
16960
+
16961
+
16962
+
16963
+
16964
+
16965
+
16966
+
16967
+
16968
+
16969
+
16970
+
16971
+
16972
+
16973
+
16974
+
16975
+
16976
+
16977
+
16978
+
16979
+
16980
+
16981
+
16982
+
16983
+
16984
+
16985
+
16986
+
16987
+
16988
+
16989
+
16990
+
16991
+
16992
+
16993
+
16994
+
16995
+
16996
+
16997
+
16998
+
16999
+
17000
+
17001
+
17002
+
17003
+
17004
+
17005
+
17006
+
17007
+
17008
+
17009
+
17010
+
17011
+
17012
+
17013
+
17014
+
17015
+
17016
+
17017
+
17018
+
17019
+
17020
+
17021
+
17022
+
17023
+
17024
+
17025
+
17026
+
17027
+
17028
+
17029
+
17030
+
17031
+
17032
+
17033
+
17034
+
17035
+
17036
+
17037
+
17038
+
17039
+
17040
+
17041
+
17042
+
17043
+
17044
+
17045
+
17046
+
17047
+
17048
+
17049
+
17050
+
17051
+
17052
+
17053
+
17054
+
17055
+
17056
+
17057
+
17058
+
17059
+
17060
+
17061
+
17062
+
17063
+
17064
+
17065
+
17066
+
17067
+
17068
+
17069
+
17070
+
17071
+
17072
+
17073
+
17074
+
17075
+
17076
+
17077
+
17078
+
17079
+
17080
+
17081
+
17082
+
17083
+
17084
+
17085
+
17086
+
17087
+
17088
+
17089
+
17090
+
17091
+
17092
+
17093
+
17094
+
17095
+
17096
+
17097
+
17098
+
17099
+
17100
+
17101
+
17102
+
17103
+
17104
+
17105
+
17106
+
17107
+
17108
+
17109
+
17110
+
17111
+
17112
+
17113
+
17114
+
17115
+
17116
+
17117
+
17118
+
17119
+
17120
+
17121
+
17122
+
17123
+
17124
+
17125
+
17126
+
17127
+
17128
+
17129
+
17130
+
17131
+
17132
+
17133
+
17134
+
17135
+
17136
+
17137
+
17138
+
17139
+
17140
+
17141
+
17142
+
17143
+
17144
+
17145
+
17146
+
17147
+
17148
+
17149
+
17150
+
17151
+
17152
+
17153
+
17154
+
17155
+
17156
+
17157
+
17158
+
17159
+
17160
+
17161
+
17162
+
17163
+
17164
+
17165
+
17166
+
17167
+
17168
+
17169
+
17170
+
17171
+
17172
+
17173
+
17174
+
17175
+
17176
+
17177
+
17178
+
17179
+
17180
+
17181
+
17182
+
17183
+
17184
+
17185
+
17186
+
17187
+
17188
+
17189
+
17190
+
17191
+
17192
+
17193
+
17194
+
17195
+
17196
+
17197
+
17198
+
17199
+
17200
+
17201
+
17202
+
17203
+
17204
+
17205
+
17206
+
17207
+
17208
+
17209
+
17210
+
17211
+
17212
+
17213
+
17214
+
17215
+
17216
+
17217
+
17218
+
17219
+
17220
+
17221
+
17222
+
17223
+
17224
+
17225
+
17226
+
17227
+
17228
+
17229
+
17230
+
17231
+
17232
+
17233
+
17234
+
17235
+
17236
+
17237
+
17238
+
17239
+
17240
+
17241
+
17242
+
17243
+
17244
+
17245
+
17246
+
17247
+
17248
+
17249
+
17250
+
17251
+
17252
+
17253
+
17254
+
17255
+
17256
+
17257
+
17258
+
17259
+
17260
+
17261
+
17262
+
17263
+
17264
+
17265
+
17266
+
17267
+
17268
+
17269
+
17270
+
17271
+
17272
+
17273
+
17274
+
17275
+
17276
+
17277
+
17278
+
17279
+
17280
+
17281
+
17282
+
17283
+
17284
+
17285
+
17286
+
17287
+
17288
+
17289
+
17290
+
17291
+
17292
+
17293
+
17294
+
17295
+
17296
+
17297
+
17298
+
17299
+
17300
+
17301
+
17302
+
17303
+
17304
+
17305
+
17306
+
17307
+
17308
+
17309
+
17310
+
17311
+
17312
+
17313
+
17314
+
17315
+
17316
+
17317
+
17318
+
17319
+
17320
+
17321
+
17322
+
17323
+
17324
+
17325
+
17326
+
17327
+
17328
+
17329
+
17330
+
17331
+
17332
+
17333
+
17334
+
17335
+
17336
+
17337
+
17338
+
17339
+
17340
+
17341
+
17342
+
17343
+
17344
+
17345
+
17346
+
17347
+
17348
+
17349
+
17350
+
17351
+
17352
+
17353
+
17354
+
17355
+
17356
+
17357
+
17358
+
17359
+
17360
+
17361
+
17362
+
17363
+
17364
+
17365
+
17366
+
17367
+
17368
+
17369
+
17370
+
17371
+
17372
+
17373
+
17374
+
17375
+
17376
+
17377
+
17378
+
17379
+
17380
+
17381
+
17382
+
17383
+
17384
+
17385
+
17386
+
17387
+
17388
+
17389
+
17390
+
17391
+
17392
+
17393
+
17394
+
17395
+
17396
+
17397
+
17398
+
17399
+
17400
+
17401
+
17402
+
17403
+
17404
+
17405
+
17406
+
17407
+
17408
+
17409
+
17410
+
17411
+
17412
+
17413
+
17414
+
17415
+
17416
+
17417
+
17418
+
17419
+
17420
+
17421
+
17422
+
17423
+
17424
+
17425
+
17426
+
17427
+
17428
+
17429
+
17430
+
17431
+
17432
+
17433
+
17434
+
17435
+
17436
+
17437
+
17438
+
17439
+
17440
+
17441
+
17442
+
17443
+
17444
+
17445
+
17446
+
17447
+
17448
+
17449
+
17450
+
17451
+
17452
+
17453
+
17454
+
17455
+
17456
+
17457
+
17458
+
17459
+
17460
+
17461
+
17462
+
17463
+
17464
+
17465
+
17466
+
17467
+
17468
+
17469
+
17470
+
17471
+
17472
+
17473
+
17474
+
17475
+
17476
+
17477
+
17478
+
17479
+
17480
+
17481
+
17482
+
17483
+
17484
+
17485
+
17486
+
17487
+
17488
+
17489
+
17490
+
17491
+
17492
+
17493
+
17494
+
17495
+
17496
+
17497
+
17498
+
17499
+
17500
+
17501
+
17502
+
17503
+
17504
+
17505
+
17506
+
17507
+
17508
+
17509
+
17510
+
17511
+
17512
+
17513
+
17514
+
17515
+
17516
+
17517
+
17518
+
17519
+
17520
+
17521
+
17522
+
17523
+
17524
+
17525
+
17526
+
17527
+
17528
+
17529
+
17530
+
17531
+
17532
+
17533
+
17534
+
17535
+
17536
+
17537
+
17538
+
17539
+
17540
+
17541
+
17542
+
17543
+
17544
+
17545
+
17546
+
17547
+
17548
+
17549
+
17550
+
17551
+
17552
+
17553
+
17554
+
17555
+
17556
+
17557
+
17558
+
17559
+
17560
+
17561
+
17562
+
17563
+
17564
+
17565
+
17566
+
17567
+
17568
+
17569
+
17570
+
17571
+
17572
+
17573
+
17574
+
17575
+
17576
+
17577
+
17578
+
17579
+
17580
+
17581
+
17582
+
17583
+
17584
+
17585
+
17586
+
17587
+
17588
+
17589
+
17590
+
17591
+
17592
+
17593
+
17594
+
17595
+
17596
+
17597
+
17598
+
17599
+
17600
+
17601
+
17602
+
17603
+
17604
+
17605
+
17606
+
17607
+
17608
+
17609
+
17610
+
17611
+
17612
+
17613
+
17614
+
17615
+
17616
+
17617
+
17618
+
17619
+
17620
+
17621
+
17622
+
17623
+
17624
+
17625
+
17626
+
17627
+
17628
+
17629
+
17630
+
17631
+
17632
+
17633
+
17634
+
17635
+
17636
+
17637
+
17638
+
17639
+
17640
+
17641
+
17642
+
17643
+
17644
+
17645
+
17646
+
17647
+
17648
+
17649
+
17650
+
17651
+
17652
+
17653
+
17654
+
17655
+
17656
+
17657
+
17658
+
17659
+
17660
+
17661
+
17662
+
17663
+
17664
+
17665
+
17666
+
17667
+
17668
+
17669
+
17670
+
17671
+
17672
+
17673
+
17674
+
17675
+
17676
+
17677
+
17678
+
17679
+
17680
+
17681
+
17682
+
17683
+
17684
+
17685
+
17686
+
17687
+
17688
+
17689
+
17690
+
17691
+
17692
+
17693
+
17694
+
17695
+
17696
+
17697
+
17698
+
17699
+
17700
+
17701
+
17702
+
17703
+
17704
+
17705
+
17706
+
17707
+
17708
+
17709
+
17710
+
17711
+
17712
+
17713
+
17714
+
17715
+
17716
+
17717
+
17718
+
17719
+
17720
+
17721
+
17722
+
17723
+
17724
+
17725
+
17726
+
17727
+
17728
+
17729
+
17730
+
17731
+
17732
+
17733
+
17734
+
17735
+
17736
+
17737
+
17738
+
17739
+
17740
+
17741
+
17742
+
17743
+
17744
+
17745
+
17746
+
17747
+
17748
+
17749
+
17750
+
17751
+
17752
+
17753
+
17754
+
17755
+
17756
+
17757
+
17758
+
17759
+
17760
+
17761
+
17762
+
17763
+
17764
+
17765
+
17766
+
17767
+
17768
+
17769
+
17770
+
17771
+
17772
+
17773
+
17774
+
17775
+
17776
+
17777
+
17778
+
17779
+
17780
+
17781
+
17782
+
17783
+
17784
+
17785
+
17786
+
17787
+
17788
+
17789
+
17790
+
17791
+
17792
+
17793
+
17794
+
17795
+
17796
+
17797
+
17798
+
17799
+
17800
+
17801
+
17802
+
17803
+
17804
+
17805
+
17806
+
17807
+
17808
+
17809
+
17810
+
17811
+
17812
+
17813
+
17814
+
17815
+
17816
+
17817
+
17818
+
17819
+
17820
+
17821
+
17822
+
17823
+
17824
+
17825
+
17826
+
17827
+
17828
+
17829
+
17830
+
17831
+
17832
+
17833
+
17834
+
17835
+
17836
+
17837
+
17838
+
17839
+
17840
+
17841
+
17842
+
17843
+
17844
+
17845
+
17846
+
17847
+
17848
+
17849
+
17850
+
17851
+
17852
+
17853
+
17854
+
17855
+
17856
+
17857
+
17858
+
17859
+
17860
+
17861
+
17862
+
17863
+
17864
+
17865
+
17866
+
17867
+
17868
+
17869
+
17870
+
17871
+
17872
+
17873
+
17874
+
17875
+
17876
+
17877
+
17878
+
17879
+
17880
+
17881
+
17882
+
17883
+
17884
+
17885
+
17886
+
17887
+
17888
+
17889
+
17890
+
17891
+
17892
+
17893
+
17894
+
17895
+
17896
+
17897
+
17898
+
17899
+
17900
+
17901
+
17902
+
17903
+
17904
+
17905
+
17906
+
17907
+
17908
+
17909
+
17910
+
17911
+
17912
+
17913
+
17914
+
17915
+
17916
+
17917
+
17918
+
17919
+
17920
+
17921
+
17922
+
17923
+
17924
+
17925
+
17926
+
17927
+
17928
+
17929
+
17930
+
17931
+
17932
+
17933
+
17934
+
17935
+
17936
+
17937
+
17938
+
17939
+
17940
+
17941
+
17942
+
17943
+
17944
+
17945
+
17946
+
17947
+
17948
+
17949
+
17950
+
17951
+
17952
+
17953
+
17954
+
17955
+
17956
+
17957
+
17958
+
17959
+
17960
+
17961
+
17962
+
17963
+ Training...: 40% 1745/4393 [2:16:50<2:06:04, 2.86s/it]
17964
+ Step... (13200 | Loss: 0.03330820053815842, Learning Rate: 7.434545841533691e-05, Gradient Norm: 0.2786265015602112)
17965
+ Step... (13225 | Loss: 0.07699587941169739, Learning Rate: 7.429494871757925e-05, Gradient Norm: 0.5894073247909546)
17966
+ Step... (13250 | Loss: 0.05665012076497078, Learning Rate: 7.42444462957792e-05, Gradient Norm: 0.37814077734947205)
17967
+ Step... (13275 | Loss: 0.03803393617272377, Learning Rate: 7.419394387397915e-05, Gradient Norm: 0.5187913179397583)
17968
+ Step... (13300 | Loss: 0.05835753306746483, Learning Rate: 7.414343417622149e-05, Gradient Norm: 0.4063417911529541)
17969
+ Step... (13325 | Loss: 0.0394291952252388, Learning Rate: 7.409293175442144e-05, Gradient Norm: 0.4015607237815857)
17970
+ Step... (13350 | Loss: 0.047713086009025574, Learning Rate: 7.404242205666378e-05, Gradient Norm: 0.4759356379508972)
17971
+ Step... (13375 | Loss: 0.05229204520583153, Learning Rate: 7.399191963486373e-05, Gradient Norm: 0.6608598232269287)
17972
+ Step... (13400 | Loss: 0.05382212623953819, Learning Rate: 7.394141721306369e-05, Gradient Norm: 0.486329585313797)
17973
+ Step... (13425 | Loss: 0.05246791988611221, Learning Rate: 7.389090751530603e-05, Gradient Norm: 0.9899137616157532)
17974
+ Step... (13450 | Loss: 0.08576538413763046, Learning Rate: 7.384040509350598e-05, Gradient Norm: 0.48174503445625305)
17975
+ Step... (13475 | Loss: 0.04886705428361893, Learning Rate: 7.378989539574832e-05, Gradient Norm: 0.5763976573944092)
17976
+ Step... (13500 | Loss: 0.06871508061885834, Learning Rate: 7.373939297394827e-05, Gradient Norm: 0.45707303285598755)
17977
+ Step... (13525 | Loss: 0.06088908761739731, Learning Rate: 7.368888327619061e-05, Gradient Norm: 2.149138927459717)
17978
+ Step... (13550 | Loss: 0.03509918600320816, Learning Rate: 7.363838085439056e-05, Gradient Norm: 0.34174251556396484)
17979
+ Step... (13575 | Loss: 0.05192377790808678, Learning Rate: 7.358787843259051e-05, Gradient Norm: 0.6240000128746033)
17980
+ Step... (13600 | Loss: 0.08757586777210236, Learning Rate: 7.353736873483285e-05, Gradient Norm: 0.4689755439758301)
17981
+ Step... (13625 | Loss: 0.0971912071108818, Learning Rate: 7.34868663130328e-05, Gradient Norm: 0.7313928008079529)
17982
+ Step... (13650 | Loss: 0.05947166308760643, Learning Rate: 7.343636389123276e-05, Gradient Norm: 0.4908653795719147)
17983
+ Step... (13675 | Loss: 0.04817585647106171, Learning Rate: 7.33858541934751e-05, Gradient Norm: 0.6736827492713928)
17984
+ Step... (13700 | Loss: 0.06351016461849213, Learning Rate: 7.333534449571744e-05, Gradient Norm: 0.5939023494720459)
17985
+ Step... (13725 | Loss: 0.07226695865392685, Learning Rate: 7.3284849349875e-05, Gradient Norm: 1.5131006240844727)
17986
+ Step... (13750 | Loss: 0.07176526635885239, Learning Rate: 7.323433965211734e-05, Gradient Norm: 0.49994346499443054)
17987
+ Step... (13775 | Loss: 0.06307905912399292, Learning Rate: 7.318382995435968e-05, Gradient Norm: 1.0378952026367188)
17988
+ Step... (13800 | Loss: 0.07338855415582657, Learning Rate: 7.313333480851725e-05, Gradient Norm: 0.5438399314880371)
17989
+ Step... (13825 | Loss: 0.07098106294870377, Learning Rate: 7.308282511075959e-05, Gradient Norm: 0.7232956886291504)
17990
+ Step... (13850 | Loss: 0.07737265527248383, Learning Rate: 7.303231541300192e-05, Gradient Norm: 0.45953837037086487)
17991
+ Step... (13875 | Loss: 0.052374329417943954, Learning Rate: 7.298182026715949e-05, Gradient Norm: 0.7172098755836487)
17992
+ Step... (13900 | Loss: 0.04204665124416351, Learning Rate: 7.293131056940183e-05, Gradient Norm: 1.2757434844970703)
17993
+ Step... (13925 | Loss: 0.0753527358174324, Learning Rate: 7.288080087164417e-05, Gradient Norm: 0.6898155808448792)
17994
+ Step... (13950 | Loss: 0.061371222138404846, Learning Rate: 7.283030572580174e-05, Gradient Norm: 0.4435370862483978)
17995
+ Step... (13975 | Loss: 0.07333303987979889, Learning Rate: 7.277979602804407e-05, Gradient Norm: 0.7842553853988647)
17996
+ Step... (14000 | Loss: 0.062362585216760635, Learning Rate: 7.272928633028641e-05, Gradient Norm: 0.7911753058433533)
17997
+ Step... (14025 | Loss: 0.052322275936603546, Learning Rate: 7.267879118444398e-05, Gradient Norm: 0.6138515472412109)
17998
+ Step... (14050 | Loss: 0.06605450809001923, Learning Rate: 7.262828148668632e-05, Gradient Norm: 0.44087859988212585)
17999
+ Step... (14075 | Loss: 0.06452997773885727, Learning Rate: 7.257777178892866e-05, Gradient Norm: 0.606708288192749)
18000
+ Step... (14100 | Loss: 0.06375371664762497, Learning Rate: 7.252726936712861e-05, Gradient Norm: 0.6149083971977234)
18001
+ Step... (14125 | Loss: 0.0548078753054142, Learning Rate: 7.247676694532856e-05, Gradient Norm: 0.6855599284172058)
18002
+ Step... (14150 | Loss: 0.05743681266903877, Learning Rate: 7.24262572475709e-05, Gradient Norm: 0.4224940836429596)
18003
+ Step... (14175 | Loss: 0.04027267172932625, Learning Rate: 7.237575482577085e-05, Gradient Norm: 0.5648296475410461)
18004
+ Step... (14200 | Loss: 0.039936624467372894, Learning Rate: 7.232525240397081e-05, Gradient Norm: 1.8949949741363525)
18005
+ Step... (14225 | Loss: 0.09349177032709122, Learning Rate: 7.227474270621315e-05, Gradient Norm: 0.7991975545883179)
18006
+ Step... (14250 | Loss: 0.047797802835702896, Learning Rate: 7.22242402844131e-05, Gradient Norm: 0.3861495554447174)
18007
+ Step... (14275 | Loss: 0.07225294411182404, Learning Rate: 7.217373786261305e-05, Gradient Norm: 0.6475846767425537)
18008
+ Step... (14300 | Loss: 0.05031309649348259, Learning Rate: 7.212322816485539e-05, Gradient Norm: 0.40261930227279663)
18009
+ Step... (14325 | Loss: 0.04167238622903824, Learning Rate: 7.207272574305534e-05, Gradient Norm: 0.6085657477378845)
18010
+ Step... (14350 | Loss: 0.06230664998292923, Learning Rate: 7.20222233212553e-05, Gradient Norm: 0.39848047494888306)
18011
+ Step... (14375 | Loss: 0.05305255204439163, Learning Rate: 7.197171362349764e-05, Gradient Norm: 0.7188547253608704)
18012
+ Step... (14400 | Loss: 0.07000914216041565, Learning Rate: 7.192121120169759e-05, Gradient Norm: 1.0084856748580933)
18013
+ Step... (14425 | Loss: 0.08697624504566193, Learning Rate: 7.187070877989754e-05, Gradient Norm: 0.8690377473831177)
18014
+ Step... (14450 | Loss: 0.06024784967303276, Learning Rate: 7.182019908213988e-05, Gradient Norm: 0.5216410756111145)
18015
+ Step... (14475 | Loss: 0.05281940847635269, Learning Rate: 7.176969666033983e-05, Gradient Norm: 0.5339590907096863)
18016
+ Step... (14500 | Loss: 0.052328869700431824, Learning Rate: 7.171919423853979e-05, Gradient Norm: 0.34157058596611023)
18017
+ Step... (14525 | Loss: 0.08468654006719589, Learning Rate: 7.166868454078212e-05, Gradient Norm: 0.8269289135932922)
18018
+ Step... (14550 | Loss: 0.05382496491074562, Learning Rate: 7.161818211898208e-05, Gradient Norm: 0.6192730069160461)
18019
+ Step... (14575 | Loss: 0.07302450388669968, Learning Rate: 7.156767969718203e-05, Gradient Norm: 0.7902158498764038)
18020
+ Step... (14600 | Loss: 0.10266885161399841, Learning Rate: 7.151716999942437e-05, Gradient Norm: 0.5735669732093811)
18021
+ Step... (14625 | Loss: 0.0325029231607914, Learning Rate: 7.146666757762432e-05, Gradient Norm: 0.43449780344963074)
18022
+ Step... (14650 | Loss: 0.07611238211393356, Learning Rate: 7.141616515582427e-05, Gradient Norm: 0.4062472879886627)
18023
+ Step... (14675 | Loss: 0.0571739599108696, Learning Rate: 7.136565545806661e-05, Gradient Norm: 0.5664569735527039)
18024
+ Step... (14700 | Loss: 0.06980940699577332, Learning Rate: 7.131515303626657e-05, Gradient Norm: 0.4309927821159363)
18025
+ Step... (14725 | Loss: 0.06994973868131638, Learning Rate: 7.12646433385089e-05, Gradient Norm: 0.7621283531188965)
18026
+ Step... (14750 | Loss: 0.051209256052970886, Learning Rate: 7.121414091670886e-05, Gradient Norm: 0.3452795445919037)
18027
+ Step... (14775 | Loss: 0.09154806286096573, Learning Rate: 7.11636312189512e-05, Gradient Norm: 0.899725615978241)
18028
+ Step... (14800 | Loss: 0.03553222864866257, Learning Rate: 7.111312879715115e-05, Gradient Norm: 0.3117157220840454)
18029
+ Step... (14825 | Loss: 0.06072215735912323, Learning Rate: 7.106261909939349e-05, Gradient Norm: 0.7525957822799683)
18030
+ Step... (14850 | Loss: 0.08820261061191559, Learning Rate: 7.101211667759344e-05, Gradient Norm: 0.525641679763794)
18031
+ Step... (14875 | Loss: 0.05997798219323158, Learning Rate: 7.096161425579339e-05, Gradient Norm: 0.5963910818099976)
18032
+ Step... (14900 | Loss: 0.041395582258701324, Learning Rate: 7.091110455803573e-05, Gradient Norm: 0.37330615520477295)
18033
+
18034
+
18035
+
18036
+
18037
+
18038
+
18039
+
18040
+
18041
+
18042
+
18043
+
18044
+
18045
+
18046
+
18047
+
18048
+
18049
+
18050
+
18051
+
18052
+
18053
+
18054
+
18055
+
18056
+
18057
+
18058
+
18059
+
18060
+
18061
+
18062
+
18063
+
18064
+
18065
+
18066
+
18067
+
18068
+
18069
+
18070
+
18071
+
18072
+
18073
+
18074
+
18075
+
18076
+
18077
+
18078
+
18079
+
18080
+
18081
+
18082
+
18083
+
18084
+
18085
+
18086
+
18087
+
18088
+
18089
+
18090
+
18091
+
18092
+
18093
+
18094
+
18095
+
18096
+
18097
+
18098
+
18099
+
18100
+
18101
+
18102
+
18103
+
18104
+
18105
+
18106
+
18107
+
18108
+
18109
+
18110
+
18111
+
18112
+
18113
+
18114
+
18115
+
18116
+
18117
+
18118
+
18119
+
18120
+
18121
+
18122
+
18123
+
18124
+
18125
+
18126
+
18127
+
18128
+
18129
+
18130
+
18131
+
18132
+
18133
+
18134
+
18135
+
18136
+
18137
+
18138
+
18139
+
18140
+
18141
+
18142
+
18143
+
18144
+
18145
+
18146
+
18147
+
18148
+
18149
+
18150
+
18151
+
18152
+
18153
+
18154
+
18155
+
18156
+
18157
+
18158
+
18159
+
18160
+
18161
+
18162
+
18163
+
18164
+
18165
+
18166
+
18167
+
18168
+
18169
+
18170
+
18171
+
18172
+
18173
+
18174
+
18175
+
18176
+
18177
+
18178
+
18179
+
18180
+
18181
+
18182
+
18183
+
18184
+
18185
+
18186
+
18187
+
18188
+
18189
+
18190
+
18191
+
18192
+
18193
+
18194
+
18195
+
18196
+
18197
+
18198
+
18199
+
18200
+
18201
+
18202
+
18203
+
18204
+
18205
+
18206
+
18207
+
18208
+
18209
+
18210
+
18211
+
18212
+
18213
+
18214
+
18215
+
18216
+
18217
+
18218
+
18219
+
18220
+
18221
+
18222
+
18223
+
18224
+
18225
+
18226
+
18227
+
18228
+
18229
+
18230
+
18231
+
18232
+
18233
+
18234
+
18235
+
18236
+
18237
+
18238
+
18239
+
18240
+
18241
+
18242
+
18243
+
18244
+
18245
+
18246
+
18247
+
18248
+
18249
+
18250
+
18251
+
18252
+
18253
+
18254
+
18255
+
18256
+
18257
+
18258
+
18259
+
18260
+
18261
+
18262
+
18263
+
18264
+
18265
+
18266
+
18267
+
18268
+
18269
+
18270
+
18271
+
18272
+
18273
+
18274
+
18275
+
18276
+
18277
+
18278
+
18279
+
18280
+
18281
+
18282
+
18283
+
18284
+
18285
+
18286
+
18287
+
18288
+
18289
+
18290
+
18291
+
18292
+
18293
+
18294
+
18295
+
18296
+
18297
+
18298
+
18299
+
18300
+
18301
+
18302
+
18303
+
18304
+
18305
+
18306
+
18307
+
18308
+
18309
+
18310
+
18311
+
18312
+
18313
+
18314
+
18315
+
18316
+
18317
+
18318
+
18319
+
18320
+
18321
+
18322
+
18323
+
18324
+
18325
+
18326
+
18327
+
18328
+
18329
+
18330
+
18331
+
18332
+
18333
+
18334
+
18335
+
18336
+
18337
+
18338
+
18339
+
18340
+
18341
+
18342
+
18343
+
18344
+
18345
+
18346
+
18347
+
18348
+
18349
+
18350
+
18351
+
18352
+
18353
+
18354
+
18355
+
18356
+
18357
+
18358
+
18359
+
18360
+
18361
+
18362
+
18363
+
18364
+
18365
+
18366
+
18367
+
18368
+
18369
+
18370
+
18371
+
18372
+
18373
+
18374
+
18375
+
18376
+
18377
+
18378
+
18379
+
18380
+
18381
+
18382
+
18383
+
18384
+
18385
+
18386
+
18387
+
18388
+
18389
+
18390
+
18391
+
18392
+
18393
+
18394
+
18395
+
18396
+
18397
+
18398
+
18399
+
18400
+
18401
+
18402
+
18403
+
18404
+
18405
+
18406
+
18407
+
18408
+
18409
+
18410
+
18411
+
18412
+
18413
+
18414
+
18415
+
18416
+
18417
+
18418
+
18419
+
18420
+
18421
+
18422
+
18423
+
18424
+
18425
+
18426
+
18427
+
18428
+
18429
+
18430
+
18431
+
18432
+
18433
+
18434
+
18435
+
18436
+
18437
+
18438
+
18439
+
18440
+
18441
+
18442
+
18443
+
18444
+
18445
+
18446
+
18447
+
18448
+
18449
+
18450
+
18451
+
18452
+
18453
+
18454
+
18455
+
18456
+
18457
+
18458
+
18459
+
18460
+
18461
+
18462
+
18463
+
18464
+
18465
+
18466
+
18467
+
18468
+
18469
+
18470
+
18471
+
18472
+
18473
+
18474
+
18475
+
18476
+
18477
+
18478
+
18479
+
18480
+
18481
+
18482
+
18483
+
18484
+
18485
+
18486
+
18487
+
18488
+
18489
+
18490
+
18491
+
18492
+
18493
+
18494
+
18495
+
18496
+
18497
+
18498
+
18499
+
18500
+
18501
+
18502
+
18503
+
18504
+
18505
+
18506
+
18507
+
18508
+
18509
+
18510
+
18511
+
18512
+
18513
+
18514
+
18515
+
18516
+
18517
+
18518
+
18519
+
18520
+
18521
+
18522
+
18523
+
18524
+
18525
+
18526
+
18527
+
18528
+
18529
+
18530
+
18531
+
18532
+
18533
+
18534
+
18535
+
18536
+
18537
+
18538
+
18539
+
18540
+
18541
+
18542
+
18543
+
18544
+
18545
+
18546
+
18547
+
18548
+
18549
+
18550
+
18551
+
18552
+
18553
+
18554
+
18555
+
18556
+
18557
+
18558
+
18559
+
18560
+
18561
+
18562
+
18563
+
18564
+
18565
+
18566
+
18567
+
18568
+
18569
+
18570
+
18571
+
18572
+
18573
+
18574
+
18575
+
18576
+
18577
+
18578
+
18579
+
18580
+
18581
+
18582
+
18583
+
18584
+
18585
+
18586
+
18587
+
18588
+
18589
+
18590
+
18591
+
18592
+
18593
+
18594
+
18595
+
18596
+
18597
+
18598
+
18599
+
18600
+
18601
+
18602
+
18603
+
18604
+
18605
+
18606
+
18607
+
18608
+
18609
+
18610
+
18611
+
18612
+
18613
+
18614
+
18615
+
18616
+
18617
+
18618
+
18619
+
18620
+
18621
+
18622
+
18623
+
18624
+
18625
+
18626
+
18627
+
18628
+
18629
+
18630
+
18631
+
18632
+
18633
+
18634
+
18635
+
18636
+
18637
+
18638
+
18639
+
18640
+
18641
+
18642
+
18643
+
18644
+
18645
+
18646
+
18647
+
18648
+
18649
+
18650
+
18651
+
18652
+
18653
+
18654
+
18655
+
18656
+
18657
+
18658
+
18659
+
18660
+
18661
+
18662
+
18663
+
18664
+
18665
+
18666
+
18667
+
18668
+
18669
+
18670
+
18671
+
18672
+
18673
+
18674
+
18675
+
18676
+
18677
+
18678
+
18679
+
18680
+
18681
+
18682
+
18683
+
18684
+
18685
+
18686
+
18687
+
18688
+
18689
+
18690
+
18691
+
18692
+
18693
+
18694
+
18695
+
18696
+
18697
+
18698
+
18699
+
18700
+
18701
+
18702
+
18703
+
18704
+
18705
+
18706
+
18707
+
18708
+
18709
+
18710
+
18711
+
18712
+
18713
+
18714
+
18715
+
18716
+
18717
+
18718
+
18719
+
18720
+
18721
+
18722
+
18723
+
18724
+
18725
+
18726
+
18727
+
18728
+
18729
+
18730
+
18731
+
18732
+
18733
+
18734
+
18735
+
18736
+
18737
+
18738
+
18739
+
18740
+
18741
+
18742
+
18743
+
18744
+
18745
+
18746
+
18747
+
18748
+
18749
+
18750
+
18751
+
18752
+
18753
+
18754
+
18755
+
18756
+
18757
+
18758
+
18759
+
18760
+
18761
+
18762
+
18763
+
18764
+
18765
+
18766
+
18767
+
18768
+
18769
+
18770
+
18771
+
18772
+
18773
+
18774
+
18775
+
18776
+
18777
+
18778
+
18779
+
18780
+
18781
+
18782
+
18783
+
18784
+
18785
+
18786
+
18787
+
18788
+
18789
+
18790
+
18791
+
18792
+
18793
+
18794
+
18795
+
18796
+
18797
+
18798
+
18799
+
18800
+
18801
+
18802
+
18803
+
18804
+
18805
+
18806
+
18807
+
18808
+
18809
+
18810
+
18811
+
18812
+
18813
+
18814
+
18815
+
18816
+
18817
+
18818
+
18819
+
18820
+
18821
+
18822
+
18823
+
18824
+
18825
+
18826
+
18827
+
18828
+
18829
+
18830
+
18831
+
18832
+
18833
+
18834
+
18835
+
18836
+
18837
+
18838
+
18839
+
18840
+
18841
+
18842
+
18843
+
18844
+
18845
+
18846
+
18847
+
18848
+
18849
+
18850
+
18851
+
18852
+
18853
+
18854
+
18855
+
18856
+
18857
+
18858
+
18859
+
18860
+
18861
+
18862
+
18863
+
18864
+
18865
+
18866
+
18867
+
18868
+
18869
+
18870
+
18871
+
18872
+
18873
+
18874
+
18875
+
18876
+
18877
+
18878
+
18879
+
18880
+
18881
+
18882
+
18883
+
18884
+
18885
+
18886
+
18887
+
18888
+
18889
+
18890
+
18891
+
18892
+
18893
+
18894
+
18895
+
18896
+
18897
+
18898
+
18899
+
18900
+
18901
+
18902
+
18903
+
18904
+
18905
+
18906
+
18907
+
18908
+
18909
+
18910
+
18911
+
18912
+
18913
+
18914
+
18915
+
18916
+
18917
+
18918
+
18919
+
18920
+
18921
+
18922
+
18923
+
18924
+
18925
+
18926
+
18927
+
18928
+
18929
+
18930
+
18931
+
18932
+
18933
+
18934
+
18935
+
18936
+
18937
+
18938
+
18939
+
18940
+
18941
+
18942
+
18943
+
18944
+
18945
+
18946
+
18947
+
18948
+
18949
+
18950
+
18951
+
18952
+
18953
+
18954
+
18955
+
18956
+
18957
+
18958
+
18959
+
18960
+
18961
+
18962
+
18963
+
18964
+
18965
+
18966
+
18967
+
18968
+
18969
+
18970
+
18971
+
18972
+
18973
+
18974
+
18975
+
18976
+
18977
+
18978
+
18979
+
18980
+
18981
+
18982
+
18983
+
18984
+
18985
+
18986
+
18987
+
18988
+
18989
+
18990
+
18991
+
18992
+
18993
+
18994
+
18995
+
18996
+
18997
+
18998
+
18999
+
19000
+
19001
+
19002
+
19003
+
19004
+
19005
+
19006
+
19007
+
19008
+
19009
+
19010
+
19011
+
19012
+
19013
+
19014
+
19015
+
19016
+
19017
+
19018
+
19019
+
19020
+
19021
+
19022
+
19023
+
19024
+
19025
+
19026
+
19027
+
19028
+
19029
+
19030
+
19031
+
19032
+
19033
+
19034
+
19035
+
19036
+
19037
+
19038
+
19039
+
19040
+
19041
+
19042
+
19043
+
19044
+
19045
+
19046
+
19047
+
19048
+
19049
+
19050
+
19051
+
19052
+
19053
+
19054
+
19055
+
19056
+
19057
+
19058
+
19059
+
19060
+
19061
+
19062
+
19063
+
19064
+
19065
+
19066
+
19067
+
19068
+
19069
+
19070
+
19071
+
19072
+
19073
+
19074
+
19075
+
19076
+
19077
+
19078
+
19079
+
19080
+
19081
+
19082
+
19083
+
19084
+
19085
+
19086
+
19087
+
19088
+
19089
+
19090
+
19091
+
19092
+
19093
+
19094
+
19095
+
19096
+
19097
+
19098
+
19099
+
19100
+
19101
+
19102
+
19103
+
19104
+
19105
+
19106
+
19107
+
19108
+
19109
+
19110
+
19111
+
19112
+
19113
+
19114
+
19115
+
19116
+
19117
+
19118
+
19119
+
19120
+
19121
+
19122
+
19123
+
19124
+
19125
+
19126
+
19127
+
19128
+
19129
+
19130
+
19131
+
19132
+
19133
+
19134
+
19135
+
19136
+
19137
+
19138
+
19139
+
19140
+
19141
+
19142
+
19143
+
19144
+
19145
+
19146
+
19147
+
19148
+
19149
+
19150
+
19151
+
19152
+
19153
+
19154
+
19155
+
19156
+
19157
+
19158
+
19159
+
19160
+
19161
+
19162
+
19163
+
19164
+
19165
+
19166
+
19167
+
19168
+
19169
+
19170
+
19171
+
19172
+
19173
+
19174
+
19175
+
19176
+
19177
+
19178
+
19179
+
19180
+
19181
+
19182
+
19183
+
19184
+
19185
+
19186
+
19187
+
19188
+
19189
+
19190
+
19191
+
19192
+
19193
+
19194
+
19195
+
19196
+
19197
+
19198
+
19199
+
19200
+
19201
+
19202
+
19203
+
19204
+
19205
+
19206
+
19207
+
19208
+
19209
+
19210
+
19211
+
19212
+
19213
+
19214
+
19215
+
19216
+
19217
+
19218
+
19219
+
19220
+
19221
+
19222
+
19223
+
19224
+
19225
+
19226
+
19227
+
19228
+
19229
+
19230
+
19231
+
19232
+
19233
+
19234
+
19235
+
19236
+
19237
+
19238
+
19239
+
19240
+
19241
+
19242
+
19243
+
19244
+
19245
+
19246
+
19247
+
19248
+
19249
+
19250
+
19251
+
19252
+
19253
+
19254
+
19255
+
19256
+
19257
+
19258
+
19259
+
19260
+
19261
+
19262
+
19263
+
19264
+
19265
+
19266
+
19267
+
19268
+
19269
+
19270
+
19271
+
19272
+
19273
+
19274
+
19275
+
19276
+
19277
+
19278
+
19279
+
19280
+
19281
+
19282
+
19283
+
19284
+
19285
+
19286
+
19287
+
19288
+
19289
+
19290
+
19291
+
19292
+
19293
+
19294
+
19295
+
19296
+
19297
+
19298
+
19299
+
19300
+
19301
+
19302
+
19303
+
19304
+
19305
+
19306
+
19307
+
19308
+
19309
+
19310
+
19311
+
19312
+
19313
+
19314
+
19315
+
19316
+
19317
+
19318
+
19319
+
19320
+
19321
+
19322
+
19323
+
19324
+
19325
+
19326
+
19327
+
19328
+
19329
+
19330
+
19331
+
19332
+
19333
+
19334
+
19335
+
19336
+
19337
+
19338
+
19339
+
19340
+
19341
+
19342
+
19343
+
19344
+
19345
+
19346
+
19347
+
19348
+
19349
+
19350
+
19351
+
19352
+
19353
+
19354
+
19355
+
19356
+
19357
+
19358
+
19359
+
19360
+
19361
+
19362
+
19363
+
19364
+
19365
+
19366
+
19367
+
19368
+
19369
+
19370
+
19371
+
19372
+
19373
+
19374
+
19375
+
19376
+
19377
+
19378
+
19379
+
19380
+
19381
+
19382
+
19383
+
19384
+
19385
+
19386
+
19387
+
19388
+
19389
+
19390
+
19391
+
19392
+
19393
+
19394
+
19395
+
19396
+
19397
+
19398
+
19399
+
19400
+
19401
+
19402
+
19403
+
19404
+
19405
+
19406
+
19407
+
19408
+
19409
+
19410
+
19411
+
19412
+
19413
+
19414
+
19415
+
19416
+
19417
+
19418
+
19419
+
19420
+
19421
+
19422
+
19423
+
19424
+
19425
+
19426
+
19427
+
19428
+
19429
+
19430
+
19431
+
19432
+
19433
+
19434
+
19435
+
19436
+
19437
+
19438
+
19439
+
19440
+
19441
+
19442
+
19443
+
19444
+
19445
+
19446
+
19447
+
19448
+
19449
+
19450
+
19451
+
19452
+
19453
+
19454
+
19455
+
19456
+
19457
+
19458
+
19459
+
19460
+
19461
+
19462
+
19463
+
19464
+
19465
+
19466
+
19467
+
19468
+
19469
+
19470
+
19471
+
19472
+
19473
+
19474
+
19475
+
19476
+
19477
+
19478
+
19479
+
19480
+
19481
+
19482
+
19483
+
19484
+
19485
+
19486
+
19487
+
19488
+
19489
+
19490
+
19491
+
19492
+
19493
+
19494
+
19495
+
19496
+
19497
+
19498
+
19499
+
19500
+
19501
+
19502
+
19503
+
19504
+
19505
+
19506
+
19507
+
19508
+
19509
+
19510
+
19511
+
19512
+
19513
+
19514
+
19515
+
19516
+
19517
+
19518
+
19519
+
19520
+
19521
+
19522
+
19523
+
19524
+
19525
+
19526
+
19527
+
19528
+
19529
+
19530
+
19531
+
19532
+
19533
+
19534
+
19535
+
19536
+
19537
+
19538
+
19539
+
19540
+
19541
+
19542
+
19543
+
19544
+
19545
+
19546
+
19547
+
19548
+
19549
+
19550
+
19551
+
19552
+
19553
+
19554
+
19555
+
19556
+
19557
+
19558
+
19559
+
19560
+
19561
+
19562
+
19563
+
19564
+
19565
+
19566
+
19567
+
19568
+
19569
+
19570
+
19571
+
19572
+
19573
+
19574
+
19575
+
19576
+
19577
+
19578
+
19579
+
19580
+
19581
+
19582
+
19583
+
19584
+
19585
+
19586
+
19587
+
19588
+
19589
+
19590
+
19591
+
19592
+
19593
+
19594
+
19595
+
19596
+
19597
+
19598
+
19599
+
19600
+
19601
+
19602
+
19603
+
19604
+
19605
+
19606
+
19607
+
19608
+
19609
+
19610
+
19611
+
19612
+
19613
+
19614
+
19615
+
19616
+
19617
+
19618
+
19619
+
19620
+
19621
+
19622
+
19623
+
19624
+
19625
+
19626
+
19627
+
19628
+
19629
+
19630
+
19631
+
19632
+
19633
+
19634
+
19635
+
19636
+
19637
+
19638
+
19639
+
19640
+
19641
+
19642
+
19643
+
19644
+
19645
+
19646
+
19647
+
19648
+
19649
+
19650
+
19651
+
19652
+
19653
+
19654
+
19655
+
19656
+
19657
+
19658
+
19659
+
19660
+
19661
+
19662
+
19663
+
19664
+
19665
+
19666
+
19667
+
19668
+
19669
+
19670
+
19671
+
19672
+
19673
+
19674
+
19675
+
19676
+
19677
+
19678
+
19679
+
19680
+
19681
+
19682
+
19683
+
19684
+
19685
+
19686
+
19687
+
19688
+
19689
+
19690
+
19691
+
19692
+
19693
+
19694
+
19695
+
19696
+
19697
+
19698
+
19699
+
19700
+
19701
+
19702
+
19703
+
19704
+
19705
+
19706
+
19707
+
19708
+
19709
+
19710
+
19711
+
19712
+
19713
+
19714
+
19715
+
19716
+
19717
+
19718
+
19719
+
19720
+
19721
+
19722
+
19723
+
19724
+
19725
+
19726
+
19727
+
19728
+
19729
+
19730
+
19731
+
19732
+
19733
+
19734
+
19735
+
19736
+
19737
+
19738
+
19739
+
19740
+
19741
+
19742
+
19743
+
19744
+
19745
+
19746
+
19747
+
19748
+
19749
+
19750
+
19751
+
19752
+
19753
+
19754
+
19755
+
19756
+
19757
+
19758
+
19759
+
19760
+
19761
+
19762
+
19763
+
19764
+
19765
+ Training...: 80% 3521/4393 [4:36:08<1:23:05, 5.72s/it]
19766
+ Step... (14950 | Loss: 0.07344218343496323, Learning Rate: 7.081009971443564e-05, Gradient Norm: 0.568691074848175)
19767
+ Step... (14975 | Loss: 0.06512783467769623, Learning Rate: 7.075959001667798e-05, Gradient Norm: 0.6864513754844666)
19768
+ Step... (15000 | Loss: 0.04500700533390045, Learning Rate: 7.070908759487793e-05, Gradient Norm: 0.416413277387619)
19769
+ Step... (15025 | Loss: 0.047238558530807495, Learning Rate: 7.065858517307788e-05, Gradient Norm: 0.572498619556427)
19770
+ Step... (15050 | Loss: 0.07442279160022736, Learning Rate: 7.060807547532022e-05, Gradient Norm: 0.557904839515686)
19771
+ Step... (15075 | Loss: 0.041969139128923416, Learning Rate: 7.055757305352017e-05, Gradient Norm: 0.5410632491111755)
19772
+ Step... (15100 | Loss: 0.0562279112637043, Learning Rate: 7.050707063172013e-05, Gradient Norm: 0.407163143157959)
19773
+ Step... (15125 | Loss: 0.07544904947280884, Learning Rate: 7.045656093396246e-05, Gradient Norm: 0.8896680474281311)
19774
+ Step... (15150 | Loss: 0.0593624971807003, Learning Rate: 7.040605851216242e-05, Gradient Norm: 0.6524192094802856)
19775
+ Step... (15175 | Loss: 0.07540460675954819, Learning Rate: 7.035555609036237e-05, Gradient Norm: 0.7891389727592468)
19776
+ Step... (15200 | Loss: 0.06489015370607376, Learning Rate: 7.030504639260471e-05, Gradient Norm: 0.3985648453235626)
19777
+ Step... (15225 | Loss: 0.04916614294052124, Learning Rate: 7.025454397080466e-05, Gradient Norm: 1.7696620225906372)
19778
+ Step... (15250 | Loss: 0.06432841718196869, Learning Rate: 7.020404154900461e-05, Gradient Norm: 0.45687729120254517)
19779
+ Step... (15275 | Loss: 0.0661030113697052, Learning Rate: 7.015353185124695e-05, Gradient Norm: 0.7411028146743774)
19780
+ Step... (15300 | Loss: 0.05507814139127731, Learning Rate: 7.01030294294469e-05, Gradient Norm: 0.4231397807598114)
19781
+ Step... (15325 | Loss: 0.058054737746715546, Learning Rate: 7.005252700764686e-05, Gradient Norm: 0.6334179043769836)
19782
+ Step... (15350 | Loss: 0.08492830395698547, Learning Rate: 7.00020173098892e-05, Gradient Norm: 0.558548092842102)
19783
+ Step... (15375 | Loss: 0.05771920830011368, Learning Rate: 6.995151488808915e-05, Gradient Norm: 0.6630625128746033)
19784
+ Step... (15400 | Loss: 0.05129191651940346, Learning Rate: 6.99010124662891e-05, Gradient Norm: 0.43164342641830444)
19785
+ Step... (15425 | Loss: 0.058036696165800095, Learning Rate: 6.985050276853144e-05, Gradient Norm: 0.6926518082618713)
19786
+ Step... (15450 | Loss: 0.09404358267784119, Learning Rate: 6.98000003467314e-05, Gradient Norm: 1.4935879707336426)
19787
+ Step... (15475 | Loss: 0.045152828097343445, Learning Rate: 6.974949792493135e-05, Gradient Norm: 0.6178054213523865)
19788
+ Step... (15500 | Loss: 0.04112134501338005, Learning Rate: 6.969898822717369e-05, Gradient Norm: 0.34816980361938477)
19789
+ Step... (15525 | Loss: 0.05875265598297119, Learning Rate: 6.964848580537364e-05, Gradient Norm: 0.6528817415237427)
19790
+ Step... (15550 | Loss: 0.056780021637678146, Learning Rate: 6.959798338357359e-05, Gradient Norm: 0.5109397172927856)
19791
+ Step... (15575 | Loss: 0.07518711686134338, Learning Rate: 6.954747368581593e-05, Gradient Norm: 0.8571561574935913)
19792
+ Step... (15600 | Loss: 0.06897410750389099, Learning Rate: 6.949696398805827e-05, Gradient Norm: 0.5484206080436707)
19793
+ Step... (15625 | Loss: 0.06446701288223267, Learning Rate: 6.944646884221584e-05, Gradient Norm: 0.6554291844367981)
19794
+ Step... (15650 | Loss: 0.0701836496591568, Learning Rate: 6.939595914445817e-05, Gradient Norm: 0.5167053937911987)
19795
+ Step... (15675 | Loss: 0.08150122314691544, Learning Rate: 6.934544944670051e-05, Gradient Norm: 0.7669651508331299)
19796
+ Step... (15700 | Loss: 0.05067530646920204, Learning Rate: 6.929495430085808e-05, Gradient Norm: 0.36807140707969666)
19797
+ Step... (15725 | Loss: 0.06164156273007393, Learning Rate: 6.924444460310042e-05, Gradient Norm: 0.5428230166435242)
19798
+ Step... (15750 | Loss: 0.061378516256809235, Learning Rate: 6.919393490534276e-05, Gradient Norm: 0.39144206047058105)
19799
+ Step... (15775 | Loss: 0.08585003763437271, Learning Rate: 6.914343975950032e-05, Gradient Norm: 0.8241029977798462)
19800
+ Step... (15800 | Loss: 0.061143457889556885, Learning Rate: 6.909293006174266e-05, Gradient Norm: 0.4439859390258789)
19801
+ Step... (15825 | Loss: 0.10255448520183563, Learning Rate: 6.9042420363985e-05, Gradient Norm: 0.8529015779495239)
19802
+ Step... (15850 | Loss: 0.0470162034034729, Learning Rate: 6.899192521814257e-05, Gradient Norm: 0.35999107360839844)
19803
+ Step... (15875 | Loss: 0.06504394859075546, Learning Rate: 6.894141552038491e-05, Gradient Norm: 0.618028998374939)
19804
+ Step... (15900 | Loss: 0.05916808173060417, Learning Rate: 6.889090582262725e-05, Gradient Norm: 0.3941057324409485)
19805
+ Step... (15925 | Loss: 0.05540382117033005, Learning Rate: 6.88404034008272e-05, Gradient Norm: 0.7572468519210815)
19806
+ Step... (15950 | Loss: 0.06875719875097275, Learning Rate: 6.878990097902715e-05, Gradient Norm: 0.44904303550720215)
19807
+ Step... (15975 | Loss: 0.0626574382185936, Learning Rate: 6.873939128126949e-05, Gradient Norm: 0.8361836075782776)
19808
+ Step... (16000 | Loss: 0.06566054373979568, Learning Rate: 6.868888885946944e-05, Gradient Norm: 0.4539177715778351)
19809
+ Step... (16025 | Loss: 0.04651724547147751, Learning Rate: 6.863837916171178e-05, Gradient Norm: 0.5512727499008179)
19810
+ Step... (16050 | Loss: 0.07435376197099686, Learning Rate: 6.858787673991174e-05, Gradient Norm: 0.48684749007225037)
19811
+ Step... (16075 | Loss: 0.07451683282852173, Learning Rate: 6.853737431811169e-05, Gradient Norm: 0.7406129240989685)
19812
+ Step... (16100 | Loss: 0.06162319332361221, Learning Rate: 6.848686462035403e-05, Gradient Norm: 0.5421441197395325)
19813
+ Step... (16125 | Loss: 0.05605998635292053, Learning Rate: 6.843636219855398e-05, Gradient Norm: 0.9254444241523743)
19814
+ Step... (16150 | Loss: 0.06836410611867905, Learning Rate: 6.838585250079632e-05, Gradient Norm: 0.4331052601337433)
19815
+ Step... (16175 | Loss: 0.04999904707074165, Learning Rate: 6.833535007899627e-05, Gradient Norm: 0.6736144423484802)
19816
+ Step... (16200 | Loss: 0.08609673380851746, Learning Rate: 6.828484038123861e-05, Gradient Norm: 0.5675368905067444)
19817
+ Step... (16225 | Loss: 0.07071548700332642, Learning Rate: 6.823433795943856e-05, Gradient Norm: 0.7580640912055969)
19818
+ Step... (16250 | Loss: 0.0654497742652893, Learning Rate: 6.818383553763852e-05, Gradient Norm: 0.39117610454559326)
19819
+ Step... (16275 | Loss: 0.0967632606625557, Learning Rate: 6.813332583988085e-05, Gradient Norm: 0.8968567848205566)
19820
+ Step... (16300 | Loss: 0.05536797270178795, Learning Rate: 6.80828234180808e-05, Gradient Norm: 0.35227692127227783)
19821
+ Step... (16325 | Loss: 0.07816967368125916, Learning Rate: 6.803232099628076e-05, Gradient Norm: 0.7050227522850037)
19822
+ Step... (16350 | Loss: 0.04643617197871208, Learning Rate: 6.79818112985231e-05, Gradient Norm: 0.5755912065505981)
19823
+ Step... (16375 | Loss: 0.09213122725486755, Learning Rate: 6.793130887672305e-05, Gradient Norm: 0.743951141834259)
19824
+ Step... (16400 | Loss: 0.06681258976459503, Learning Rate: 6.7880806454923e-05, Gradient Norm: 0.4667063057422638)
19825
+ Step... (16425 | Loss: 0.0660945326089859, Learning Rate: 6.783029675716534e-05, Gradient Norm: 0.7734231352806091)
19826
+ Step... (16450 | Loss: 0.05684332177042961, Learning Rate: 6.77797943353653e-05, Gradient Norm: 0.42356690764427185)
19827
+ Step... (16475 | Loss: 0.09014812111854553, Learning Rate: 6.772929191356525e-05, Gradient Norm: 0.6372724771499634)
19828
+ Step... (16500 | Loss: 0.04981550574302673, Learning Rate: 6.767878221580759e-05, Gradient Norm: 0.355785995721817)
19829
+ Step... (16525 | Loss: 0.05794210731983185, Learning Rate: 6.762827979400754e-05, Gradient Norm: 0.5047518014907837)
19830
+ Step... (16550 | Loss: 0.07783947139978409, Learning Rate: 6.757777737220749e-05, Gradient Norm: 0.462705135345459)
19831
+ Step... (16575 | Loss: 0.10741002857685089, Learning Rate: 6.752726767444983e-05, Gradient Norm: 0.9183163642883301)
19832
+ Step... (16600 | Loss: 0.0607137456536293, Learning Rate: 6.747676525264978e-05, Gradient Norm: 1.044562578201294)
19833
+ Step... (16625 | Loss: 0.04184560477733612, Learning Rate: 6.742626283084974e-05, Gradient Norm: 0.5442230105400085)
19834
+ Step... (16650 | Loss: 0.056308675557374954, Learning Rate: 6.737575313309208e-05, Gradient Norm: 0.5075458884239197)
19835
+
19836
+
19837
+
19838
+
19839
+
19840
+
19841
+
19842
+
19843
+
19844
+
19845
+
19846
+
19847
+
19848
+
19849
+
19850
+
19851
+
19852
+
19853
+
19854
+
19855
+
19856
+
19857
+
19858
+
19859
+
19860
+
19861
+
19862
+
19863
+
19864
+
19865
+
19866
+
19867
+
19868
+
19869
+
19870
+
19871
+
19872
+
19873
+
19874
+
19875
+
19876
+
19877
+
19878
+
19879
+
19880
+
19881
+
19882
+
19883
+
19884
+
19885
+
19886
+
19887
+
19888
+
19889
+
19890
+
19891
+
19892
+
19893
+
19894
+
19895
+
19896
+
19897
+
19898
+
19899
+
19900
+
19901
+
19902
+
19903
+
19904
+
19905
+
19906
+
19907
+
19908
+
19909
+
19910
+
19911
+
19912
+
19913
+
19914
+
19915
+
19916
+
19917
+
19918
+
19919
+
19920
+
19921
+
19922
+
19923
+
19924
+
19925
+
19926
+
19927
+
19928
+
19929
+
19930
+
19931
+
19932
+
19933
+
19934
+
19935
+
19936
+
19937
+
19938
+
19939
+
19940
+
19941
+
19942
+
19943
+
19944
+
19945
+
19946
+
19947
+
19948
+
19949
+
19950
+
19951
+
19952
+
19953
+
19954
+
19955
+
19956
+
19957
+
19958
+
19959
+
19960
+
19961
+
19962
+
19963
+
19964
+
19965
+
19966
+
19967
+
19968
+
19969
+
19970
+
19971
+
19972
+
19973
+
19974
+
19975
+
19976
+
19977
+
19978
+
19979
+
19980
+
19981
+
19982
+
19983
+
19984
+
19985
+
19986
+
19987
+
19988
+
19989
+
19990
+
19991
+
19992
+
19993
+
19994
+
19995
+
19996
+
19997
+
19998
+
19999
+
20000
+
20001
+
20002
+
20003
+
20004
+
20005
+
20006
+
20007
+
20008
+
20009
+
20010
+
20011
+
20012
+
20013
+
20014
+
20015
+
20016
+
20017
+
20018
+
20019
+
20020
+
20021
+
20022
+
20023
+
20024
+
20025
+
20026
+
20027
+
20028
+
20029
+
20030
+
20031
+
20032
+
20033
+
20034
+
20035
+
20036
+
20037
+
20038
+
20039
+
20040
+
20041
+
20042
+
20043
+
20044
+
20045
+
20046
+
20047
+
20048
+
20049
+
20050
+
20051
+
20052
+
20053
+
20054
+
20055
+
20056
+
20057
+
20058
+
20059
+
20060
+
20061
+
20062
+
20063
+
20064
+
20065
+
20066
+
20067
+
20068
+
20069
+
20070
+
20071
+
20072
+
20073
+
20074
+
20075
+
20076
+
20077
+
20078
+
20079
+
20080
+
20081
+
20082
+
20083
+
20084
+
20085
+
20086
+
20087
+
20088
+
20089
+
20090
+
20091
+
20092
+
20093
+
20094
+
20095
+
20096
+
20097
+
20098
+
20099
+
20100
+
20101
+
20102
+
20103
+
20104
+
20105
+
20106
+
20107
+
20108
+
20109
+
20110
+
20111
+
20112
+
20113
+
20114
+
20115
+
20116
+
20117
+
20118
+
20119
+
20120
+
20121
+
20122
+
20123
+
20124
+
20125
+
20126
+
20127
+
20128
+
20129
+
20130
+
20131
+
20132
+
20133
+
20134
+
20135
+
20136
+
20137
+
20138
+
20139
+
20140
+
20141
+
20142
+
20143
+
20144
+
20145
+
20146
+
20147
+
20148
+
20149
+
20150
+
20151
+
20152
+
20153
+
20154
+
20155
+
20156
+
20157
+
20158
+
20159
+
20160
+
20161
+
20162
+
20163
+
20164
+
20165
+
20166
+
20167
+
20168
+
20169
+
20170
+
20171
+
20172
+
20173
+
20174
+
20175
+
20176
+
20177
+
20178
+
20179
+
20180
+
20181
+
20182
+
20183
+
20184
+
20185
+
20186
+
20187
+
20188
+
20189
+
20190
+
20191
+
20192
+
20193
+
20194
+
20195
+
20196
+
20197
+
20198
+
20199
+
20200
+
20201
+
20202
+
20203
+
20204
+
20205
+
20206
+
20207
+
20208
+
20209
+
20210
+
20211
+
20212
+
20213
+
20214
+
20215
+
20216
+
20217
+
20218
+
20219
+
20220
+
20221
+
20222
+
20223
+
20224
+
20225
+
20226
+
20227
+
20228
+
20229
+
20230
+
20231
+
20232
+
20233
+
20234
+
20235
+
20236
+
20237
+
20238
+
20239
+
20240
+
20241
+
20242
+
20243
+
20244
+
20245
+
20246
+
20247
+
20248
+
20249
+
20250
+
20251
+
20252
+
20253
+
20254
+
20255
+
20256
+
20257
+
20258
+
20259
+
20260
+
20261
+
20262
+
20263
+
20264
+
20265
+
20266
+
20267
+
20268
+
20269
+
20270
+
20271
+
20272
+
20273
+
20274
+
20275
+
20276
+
20277
+
20278
+
20279
+
20280
+
20281
+
20282
+
20283
+
20284
+
20285
+
20286
+
20287
+
20288
+
20289
+
20290
+
20291
+
20292
+
20293
+
20294
+
20295
+
20296
+
20297
+
20298
+
20299
+
20300
+
20301
+
20302
+
20303
+
20304
+
20305
+
20306
+
20307
+
20308
+
20309
+
20310
+
20311
+
20312
+
20313
+
20314
+
20315
+
20316
+
20317
+
20318
+
20319
+
20320
+
20321
+
20322
+
20323
+
20324
+
20325
+
20326
+
20327
+
20328
+
20329
+
20330
+
20331
+
20332
+
20333
+
20334
+
20335
+
20336
+
20337
+
20338
+
20339
+
20340
+
20341
+
20342
+
20343
+
20344
+
20345
+
20346
+
20347
+
20348
+
20349
+
20350
+
20351
+
20352
+
20353
+
20354
+
20355
+
20356
+
20357
+
20358
+
20359
+
20360
+
20361
+
20362
+
20363
+
20364
+
20365
+
20366
+
20367
+
20368
+
20369
+
20370
+
20371
+
20372
+
20373
+
20374
+
20375
+
20376
+
20377
+
20378
+
20379
+
20380
+
20381
+
20382
+
20383
+
20384
+
20385
+
20386
+
20387
+
20388
+
20389
+
20390
+
20391
+
20392
+
20393
+
20394
+
20395
+
20396
+
20397
+
20398
+
20399
+
20400
+
20401
+
20402
+
20403
+
20404
+
20405
+
20406
+
20407
+
20408
+
20409
+
20410
+
20411
+
20412
+
20413
+
20414
+
20415
+
20416
+
20417
+
20418
+
20419
+
20420
+
20421
+
20422
+
20423
+
20424
+
20425
+
20426
+
20427
+
20428
+
20429
+
20430
+
20431
+
20432
+
20433
+
20434
+
20435
+
20436
+
20437
+
20438
+
20439
+
20440
+
20441
+
20442
+
20443
+
20444
+
20445
+
20446
+
20447
+
20448
+
20449
+
20450
+
20451
+
20452
+
20453
+
20454
+
20455
+
20456
+
20457
+
20458
+
20459
+
20460
+
20461
+
20462
+
20463
+
20464
+
20465
+
20466
+
20467
+
20468
+
20469
+
20470
+
20471
+
20472
+
20473
+
20474
+
20475
+
20476
+
20477
+
20478
+
20479
+
20480
+
20481
+
20482
+
20483
+
20484
+
20485
+
20486
+
20487
+
20488
+
20489
+
20490
+
20491
+
20492
+
20493
+
20494
+
20495
+
20496
+
20497
+
20498
+
20499
+
20500
+
20501
+
20502
+
20503
+
20504
+
20505
+
20506
+
20507
+
20508
+
20509
+
20510
+
20511
+
20512
+
20513
+
20514
+
20515
+
20516
+
20517
+
20518
+
20519
+
20520
+
20521
+
20522
+
20523
+
20524
+
20525
+
20526
+
20527
+
20528
+
20529
+
20530
+
20531
+
20532
+
20533
+
20534
+
20535
+
20536
+
20537
+
20538
+
20539
+
20540
+
20541
+
20542
+
20543
+
20544
+
20545
+
20546
+
20547
+
20548
+
20549
+
20550
+
20551
+
20552
+
20553
+
20554
+
20555
+
20556
+
20557
+
20558
+
20559
+
20560
+
20561
+
20562
+
20563
+
20564
+
20565
+
20566
+
20567
+
20568
+
20569
+
20570
+
20571
+
20572
+
20573
+
20574
+
20575
+
20576
+
20577
+
20578
+
20579
+
20580
+
20581
+
20582
+
20583
+
20584
+
20585
+
20586
+
20587
+
20588
+
20589
+
20590
+
20591
+
20592
+
20593
+
20594
+
20595
+
20596
+
20597
+
20598
+
20599
+
20600
+
20601
+
20602
+
20603
+
20604
+
20605
+
20606
+
20607
+
20608
+
20609
+
20610
+
20611
+
20612
+
20613
+
20614
+
20615
+
20616
+
20617
+
20618
+
20619
+
20620
+
20621
+
20622
+
20623
+
20624
+
20625
+
20626
+
20627
+
20628
+
20629
+
20630
+
20631
+
20632
+
20633
+
20634
+
20635
+
20636
+
20637
+
20638
+
20639
+
20640
+
20641
+
20642
+
20643
+
20644
+
20645
+
20646
+
20647
+
20648
+
20649
+
20650
+
20651
+
20652
+
20653
+
20654
+
20655
+
20656
+
20657
+
20658
+
20659
+
20660
+
20661
+
20662
+
20663
+
20664
+
20665
+
20666
+
20667
+
20668
+
20669
+
20670
+
20671
+
20672
+
20673
+
20674
+
20675
+
20676
+
20677
+
20678
+
20679
+
20680
+
20681
+
20682
+
20683
+
20684
+
20685
+ Step... (10000/50000 | Eval Loss: 0.6138997077941895 | Eval wer: 0.05543913826697548 | Eval cer: 0.039964500651745845 |): 33% 4/12 [23:20:47<46:43:02, 21022.76s/it]
20686
+ Training...: 0% 0/4393 [00:00<?, ?it/s]
20687
+ Step... (16700 | Loss: 0.05502559617161751, Learning Rate: 6.727474828949198e-05, Gradient Norm: 0.41517171263694763)
20688
+ Step... (16725 | Loss: 0.08551020920276642, Learning Rate: 6.722423859173432e-05, Gradient Norm: 0.8342790603637695)
20689
+ Step... (16750 | Loss: 0.04508410021662712, Learning Rate: 6.717373616993427e-05, Gradient Norm: 0.3625466823577881)
20690
+ Step... (16775 | Loss: 0.08115522563457489, Learning Rate: 6.712323374813423e-05, Gradient Norm: 0.877686083316803)
20691
+ Step... (16800 | Loss: 0.08558391034603119, Learning Rate: 6.707272405037656e-05, Gradient Norm: 0.4395720064640045)
20692
+ Step... (16825 | Loss: 0.1271662563085556, Learning Rate: 6.702222162857652e-05, Gradient Norm: 2.5135364532470703)
20693
+ Step... (16850 | Loss: 0.07428053021430969, Learning Rate: 6.697171920677647e-05, Gradient Norm: 0.7400687336921692)
20694
+ Step... (16875 | Loss: 0.03849925845861435, Learning Rate: 6.692120950901881e-05, Gradient Norm: 0.6532626748085022)
20695
+ Step... (16900 | Loss: 0.046999331563711166, Learning Rate: 6.687070708721876e-05, Gradient Norm: 0.3451361656188965)
20696
+ Step... (16925 | Loss: 0.09734851866960526, Learning Rate: 6.682020466541871e-05, Gradient Norm: 1.0243014097213745)
20697
+ Step... (16950 | Loss: 0.06578902155160904, Learning Rate: 6.676969496766105e-05, Gradient Norm: 0.34928444027900696)
20698
+ Step... (16975 | Loss: 0.0750298947095871, Learning Rate: 6.6719192545861e-05, Gradient Norm: 0.6673861742019653)
20699
+ Step... (17000 | Loss: 0.06695280969142914, Learning Rate: 6.666869012406096e-05, Gradient Norm: 0.5307708978652954)
20700
+ Step... (17025 | Loss: 0.07898154109716415, Learning Rate: 6.66181804263033e-05, Gradient Norm: 0.809246838092804)
20701
+ Step... (17050 | Loss: 0.0646851509809494, Learning Rate: 6.656767800450325e-05, Gradient Norm: 0.478837788105011)
20702
+ Step... (17075 | Loss: 0.04616394266486168, Learning Rate: 6.65171755827032e-05, Gradient Norm: 0.7529241442680359)
20703
+ Step... (17100 | Loss: 0.08262103796005249, Learning Rate: 6.646666588494554e-05, Gradient Norm: 0.4371437430381775)
20704
+ Step... (17125 | Loss: 0.060473013669252396, Learning Rate: 6.64161634631455e-05, Gradient Norm: 0.7513884902000427)
20705
+ Step... (17150 | Loss: 0.043580979108810425, Learning Rate: 6.636566104134545e-05, Gradient Norm: 0.3903401792049408)
20706
+ Step... (17175 | Loss: 0.07462380826473236, Learning Rate: 6.631515134358779e-05, Gradient Norm: 0.6930056214332581)
20707
+ Step... (17200 | Loss: 0.06158146262168884, Learning Rate: 6.626464892178774e-05, Gradient Norm: 0.5356814861297607)
20708
+ Step... (17225 | Loss: 0.08772927522659302, Learning Rate: 6.621413922403008e-05, Gradient Norm: 0.8890146017074585)
20709
+ Step... (17250 | Loss: 0.06375957280397415, Learning Rate: 6.616363680223003e-05, Gradient Norm: 0.3810139000415802)
20710
+ Step... (17275 | Loss: 0.10558323562145233, Learning Rate: 6.611312710447237e-05, Gradient Norm: 0.8314210176467896)
20711
+ Step... (17300 | Loss: 0.049320243299007416, Learning Rate: 6.606262468267232e-05, Gradient Norm: 0.36878520250320435)
20712
+ Step... (17325 | Loss: 0.08733182400465012, Learning Rate: 6.601211498491466e-05, Gradient Norm: 0.8897705674171448)
20713
+ Step... (17350 | Loss: 0.06899122893810272, Learning Rate: 6.596161256311461e-05, Gradient Norm: 0.40162110328674316)
20714
+ Step... (17375 | Loss: 0.03307316452264786, Learning Rate: 6.591111014131457e-05, Gradient Norm: 0.5850245356559753)
20715
+ Step... (17400 | Loss: 0.05450913682579994, Learning Rate: 6.58606004435569e-05, Gradient Norm: 0.40691012144088745)
20716
+ Step... (17425 | Loss: 0.06244771555066109, Learning Rate: 6.581009802175686e-05, Gradient Norm: 0.6149278879165649)
20717
+ Step... (17450 | Loss: 0.08476860821247101, Learning Rate: 6.575959559995681e-05, Gradient Norm: 0.5099309086799622)
20718
+ Step... (17475 | Loss: 0.03256306052207947, Learning Rate: 6.570908590219915e-05, Gradient Norm: 0.3710366487503052)
20719
+ Step... (17500 | Loss: 0.04107552394270897, Learning Rate: 6.56585834803991e-05, Gradient Norm: 0.32978546619415283)
20720
+ Step... (17525 | Loss: 0.12821224331855774, Learning Rate: 6.560808105859905e-05, Gradient Norm: 0.8722378015518188)
20721
+
20722
+
20723
+
20724
+
20725
+
20726
+
20727
+
20728
+
20729
+
20730
+
20731
+
20732
+
20733
+
20734
+
20735
+
20736
+
20737
+
20738
+
20739
+
20740
+
20741
+
20742
+
20743
+
20744
+
20745
+
20746
+
20747
+
20748
+
20749
+
20750
+
20751
+
20752
+
20753
+
20754
+
20755
+
20756
+
20757
+
20758
+
20759
+
20760
+
20761
+
20762
+
20763
+
20764
+
20765
+
20766
+
20767
+
20768
+
20769
+
20770
+
20771
+
20772
+
20773
+
20774
+
20775
+
20776
+
20777
+
20778
+
20779
+
20780
+
20781
+
20782
+
20783
+
20784
+
20785
+
20786
+
20787
+
20788
+
20789
+
20790
+
20791
+
20792
+
20793
+
20794
+
20795
+
20796
+
20797
+
20798
+
20799
+
20800
+
20801
+
20802
+
20803
+
20804
+
20805
+
20806
+
20807
+
20808
+
20809
+
20810
+
20811
+
20812
+
20813
+
20814
+
20815
+
20816
+
20817
+
20818
+
20819
+
20820
+
20821
+
20822
+
20823
+
20824
+
20825
+
20826
+
20827
+
20828
+
20829
+
20830
+
20831
+
20832
+
20833
+
20834
+
20835
+
20836
+
20837
+
20838
+
20839
+
20840
+
20841
+
20842
+
20843
+
20844
+
20845
+
20846
+
20847
+
20848
+
20849
+
20850
+
20851
+
20852
+
20853
+
20854
+
20855
+
20856
+
20857
+
20858
+
20859
+
20860
+
20861
+
20862
+
20863
+
20864
+
20865
+
20866
+
20867
+
20868
+
20869
+
20870
+
20871
+
20872
+
20873
+
20874
+
20875
+
20876
+
20877
+
20878
+
20879
+
20880
+
20881
+
20882
+
20883
+
20884
+
20885
+
20886
+
20887
+
20888
+
20889
+
20890
+
20891
+
20892
+
20893
+
20894
+
20895
+
20896
+
20897
+
20898
+
20899
+
20900
+
20901
+
20902
+
20903
+
20904
+
20905
+
20906
+
20907
+
20908
+
20909
+
20910
+
20911
+
20912
+
20913
+
20914
+
20915
+
20916
+
20917
+
20918
+
20919
+
20920
+
20921
+
20922
+
20923
+
20924
+
20925
+
20926
+
20927
+
20928
+
20929
+
20930
+
20931
+
20932
+
20933
+
20934
+
20935
+
20936
+
20937
+
20938
+
20939
+
20940
+
20941
+
20942
+
20943
+
20944
+
20945
+
20946
+
20947
+
20948
+
20949
+
20950
+
20951
+
20952
+
20953
+
20954
+
20955
+
20956
+
20957
+
20958
+
20959
+
20960
+
20961
+
20962
+
20963
+
20964
+
20965
+
20966
+
20967
+
20968
+
20969
+
20970
+
20971
+
20972
+
20973
+
20974
+
20975
+
20976
+
20977
+
20978
+
20979
+
20980
+
20981
+
20982
+
20983
+
20984
+
20985
+
20986
+
20987
+
20988
+
20989
+
20990
+
20991
+
20992
+
20993
+
20994
+
20995
+
20996
+
20997
+
20998
+
20999
+
21000
+
21001
+
21002
+
21003
+
21004
+
21005
+
21006
+
21007
+
21008
+
21009
+
21010
+
21011
+
21012
+
21013
+
21014
+
21015
+
21016
+
21017
+
21018
+
21019
+
21020
+
21021
+
21022
+
21023
+
21024
+
21025
+
21026
+
21027
+
21028
+
21029
+
21030
+
21031
+
21032
+
21033
+
21034
+
21035
+
21036
+
21037
+
21038
+
21039
+
21040
+
21041
+
21042
+
21043
+
21044
+
21045
+
21046
+
21047
+
21048
+
21049
+
21050
+
21051
+
21052
+
21053
+
21054
+
21055
+
21056
+
21057
+
21058
+
21059
+
21060
+
21061
+
21062
+
21063
+
21064
+
21065
+
21066
+
21067
+
21068
+
21069
+
21070
+
21071
+
21072
+
21073
+
21074
+
21075
+
21076
+
21077
+
21078
+
21079
+
21080
+
21081
+
21082
+
21083
+
21084
+
21085
+
21086
+
21087
+
21088
+
21089
+
21090
+
21091
+
21092
+
21093
+
21094
+
21095
+
21096
+
21097
+
21098
+
21099
+
21100
+
21101
+
21102
+
21103
+
21104
+
21105
+
21106
+
21107
+
21108
+
21109
+
21110
+
21111
+
21112
+
21113
+
21114
+
21115
+
21116
+
21117
+
21118
+
21119
+
21120
+
21121
+
21122
+
21123
+
21124
+
21125
+
21126
+
21127
+
21128
+
21129
+
21130
+
21131
+
21132
+
21133
+
21134
+
21135
+
21136
+
21137
+
21138
+
21139
+
21140
+
21141
+
21142
+
21143
+
21144
+
21145
+
21146
+
21147
+
21148
+
21149
+
21150
+
21151
+
21152
+
21153
+
21154
+
21155
+
21156
+
21157
+
21158
+
21159
+
21160
+
21161
+
21162
+
21163
+
21164
+
21165
+
21166
+
21167
+
21168
+
21169
+
21170
+
21171
+
21172
+
21173
+
21174
+
21175
+
21176
+
21177
+
21178
+
21179
+
21180
+
21181
+
21182
+
21183
+
21184
+
21185
+
21186
+
21187
+
21188
+
21189
+
21190
+
21191
+
21192
+
21193
+
21194
+
21195
+
21196
+
21197
+
21198
+
21199
+
21200
+
21201
+
21202
+
21203
+
21204
+
21205
+
21206
+
21207
+
21208
+
21209
+
21210
+
21211
+
21212
+
21213
+
21214
+
21215
+
21216
+
21217
+
21218
+
21219
+
21220
+
21221
+
21222
+
21223
+
21224
+
21225
+
21226
+
21227
+
21228
+
21229
+
21230
+
21231
+
21232
+
21233
+
21234
+
21235
+
21236
+
21237
+
21238
+
21239
+
21240
+
21241
+
21242
+
21243
+
21244
+
21245
+
21246
+
21247
+
21248
+
21249
+
21250
+
21251
+
21252
+
21253
+
21254
+
21255
+
21256
+
21257
+
21258
+
21259
+
21260
+
21261
+
21262
+
21263
+
21264
+
21265
+
21266
+
21267
+
21268
+
21269
+
21270
+
21271
+
21272
+
21273
+
21274
+
21275
+
21276
+
21277
+
21278
+
21279
+
21280
+
21281
+
21282
+
21283
+
21284
+
21285
+
21286
+
21287
+
21288
+
21289
+
21290
+
21291
+
21292
+
21293
+
21294
+
21295
+
21296
+
21297
+
21298
+
21299
+
21300
+
21301
+
21302
+
21303
+
21304
+
21305
+
21306
+
21307
+
21308
+
21309
+
21310
+
21311
+
21312
+
21313
+
21314
+
21315
+
21316
+
21317
+
21318
+
21319
+
21320
+
21321
+
21322
+
21323
+
21324
+
21325
+
21326
+
21327
+
21328
+
21329
+
21330
+
21331
+
21332
+
21333
+
21334
+
21335
+
21336
+
21337
+
21338
+
21339
+
21340
+
21341
+
21342
+
21343
+
21344
+
21345
+
21346
+
21347
+
21348
+
21349
+
21350
+
21351
+
21352
+
21353
+
21354
+
21355
+
21356
+
21357
+
21358
+
21359
+
21360
+
21361
+
21362
+
21363
+
21364
+
21365
+
21366
+
21367
+
21368
+
21369
+
21370
+
21371
+
21372
+
21373
+
21374
+
21375
+
21376
+
21377
+
21378
+
21379
+
21380
+
21381
+
21382
+
21383
+
21384
+
21385
+
21386
+
21387
+
21388
+
21389
+
21390
+
21391
+
21392
+
21393
+
21394
+
21395
+
21396
+
21397
+
21398
+
21399
+
21400
+
21401
+
21402
+
21403
+
21404
+
21405
+
21406
+
21407
+
21408
+
21409
+
21410
+
21411
+
21412
+
21413
+
21414
+
21415
+
21416
+
21417
+
21418
+
21419
+
21420
+
21421
+
21422
+
21423
+
21424
+
21425
+
21426
+
21427
+
21428
+
21429
+
21430
+
21431
+
21432
+
21433
+
21434
+
21435
+
21436
+
21437
+
21438
+
21439
+
21440
+
21441
+
21442
+
21443
+
21444
+
21445
+
21446
+
21447
+
21448
+
21449
+
21450
+
21451
+
21452
+
21453
+
21454
+
21455
+
21456
+
21457
+
21458
+
21459
+
21460
+
21461
+
21462
+
21463
+
21464
+
21465
+
21466
+
21467
+
21468
+
21469
+
21470
+
21471
+
21472
+
21473
+
21474
+
21475
+
21476
+
21477
+
21478
+
21479
+
21480
+
21481
+
21482
+
21483
+
21484
+
21485
+
21486
+
21487
+
21488
+
21489
+
21490
+
21491
+
21492
+
21493
+
21494
+
21495
+
21496
+
21497
+
21498
+
21499
+
21500
+
21501
+
21502
+
21503
+
21504
+
21505
+
21506
+
21507
+
21508
+
21509
+
21510
+
21511
+
21512
+
21513
+
21514
+
21515
+
21516
+
21517
+
21518
+
21519
+
21520
+
21521
+
21522
+
21523
+
21524
+
21525
+
21526
+
21527
+
21528
+
21529
+
21530
+
21531
+
21532
+
21533
+
21534
+
21535
+
21536
+
21537
+
21538
+
21539
+
21540
+
21541
+
21542
+
21543
+
21544
+
21545
+
21546
+
21547
+
21548
+
21549
+
21550
+
21551
+
21552
+
21553
+
21554
+
21555
+
21556
+
21557
+
21558
+
21559
+
21560
+
21561
+
21562
+
21563
+
21564
+
21565
+
21566
+
21567
+
21568
+
21569
+
21570
+
21571
+
21572
+
21573
+
21574
+
21575
+
21576
+
21577
+
21578
+
21579
+
21580
+
21581
+
21582
+
21583
+
21584
+
21585
+
21586
+
21587
+
21588
+
21589
+
21590
+
21591
+
21592
+
21593
+
21594
+
21595
+
21596
+
21597
+
21598
+
21599
+
21600
+
21601
+
21602
+
21603
+
21604
+
21605
+
21606
+
21607
+
21608
+
21609
+
21610
+
21611
+
21612
+
21613
+
21614
+
21615
+
21616
+
21617
+
21618
+
21619
+
21620
+
21621
+
21622
+
21623
+
21624
+
21625
+
21626
+
21627
+
21628
+
21629
+
21630
+
21631
+
21632
+
21633
+
21634
+
21635
+
21636
+
21637
+
21638
+
21639
+
21640
+
21641
+
21642
+
21643
+
21644
+
21645
+
21646
+
21647
+
21648
+
21649
+
21650
+
21651
+
21652
+
21653
+
21654
+
21655
+
21656
+
21657
+
21658
+
21659
+
21660
+
21661
+
21662
+
21663
+
21664
+
21665
+
21666
+
21667
+
21668
+
21669
+
21670
+
21671
+
21672
+
21673
+
21674
+
21675
+
21676
+
21677
+
21678
+
21679
+
21680
+
21681
+
21682
+
21683
+
21684
+
21685
+
21686
+
21687
+
21688
+
21689
+
21690
+
21691
+
21692
+
21693
+
21694
+
21695
+
21696
+
21697
+
21698
+
21699
+
21700
+
21701
+
21702
+
21703
+
21704
+
21705
+
21706
+
21707
+
21708
+
21709
+
21710
+
21711
+
21712
+
21713
+
21714
+
21715
+
21716
+
21717
+
21718
+
21719
+
21720
+
21721
+
21722
+
21723
+
21724
+
21725
+
21726
+
21727
+
21728
+
21729
+
21730
+
21731
+
21732
+
21733
+
21734
+
21735
+
21736
+
21737
+
21738
+
21739
+
21740
+
21741
+
21742
+
21743
+
21744
+
21745
+
21746
+
21747
+
21748
+
21749
+
21750
+
21751
+
21752
+
21753
+
21754
+
21755
+
21756
+
21757
+
21758
+
21759
+
21760
+
21761
+
21762
+
21763
+
21764
+
21765
+
21766
+
21767
+
21768
+
21769
+
21770
+
21771
+
21772
+
21773
+
21774
+
21775
+
21776
+
21777
+
21778
+
21779
+
21780
+
21781
+
21782
+
21783
+
21784
+
21785
+
21786
+
21787
+
21788
+
21789
+
21790
+
21791
+
21792
+
21793
+
21794
+
21795
+
21796
+
21797
+
21798
+
21799
+
21800
+
21801
+
21802
+
21803
+
21804
+
21805
+
21806
+
21807
+
21808
+
21809
+
21810
+
21811
+
21812
+
21813
+
21814
+
21815
+
21816
+
21817
+
21818
+
21819
+
21820
+
21821
+
21822
+
21823
+
21824
+
21825
+
21826
+
21827
+
21828
+
21829
+
21830
+
21831
+
21832
+
21833
+
21834
+
21835
+
21836
+
21837
+
21838
+
21839
+
21840
+
21841
+
21842
+
21843
+
21844
+
21845
+
21846
+
21847
+
21848
+
21849
+
21850
+
21851
+
21852
+
21853
+
21854
+
21855
+
21856
+
21857
+
21858
+
21859
+
21860
+
21861
+
21862
+
21863
+
21864
+
21865
+
21866
+
21867
+
21868
+
21869
+
21870
+
21871
+
21872
+
21873
+
21874
+
21875
+
21876
+
21877
+
21878
+
21879
+
21880
+
21881
+
21882
+
21883
+
21884
+
21885
+
21886
+
21887
+
21888
+
21889
+
21890
+
21891
+
21892
+
21893
+
21894
+
21895
+
21896
+
21897
+
21898
+
21899
+
21900
+
21901
+
21902
+
21903
+
21904
+
21905
+
21906
+
21907
+
21908
+
21909
+
21910
+
21911
+
21912
+
21913
+
21914
+
21915
+
21916
+
21917
+
21918
+
21919
+
21920
+
21921
+
21922
+
21923
+
21924
+
21925
+
21926
+
21927
+
21928
+
21929
+
21930
+
21931
+
21932
+
21933
+
21934
+
21935
+
21936
+
21937
+
21938
+
21939
+
21940
+
21941
+
21942
+
21943
+
21944
+
21945
+
21946
+
21947
+
21948
+
21949
+
21950
+
21951
+
21952
+
21953
+
21954
+
21955
+
21956
+
21957
+
21958
+
21959
+
21960
+
21961
+
21962
+
21963
+
21964
+
21965
+
21966
+
21967
+
21968
+
21969
+
21970
+
21971
+
21972
+
21973
+
21974
+
21975
+
21976
+
21977
+
21978
+
21979
+
21980
+
21981
+
21982
+
21983
+
21984
+
21985
+
21986
+
21987
+
21988
+
21989
+
21990
+
21991
+
21992
+
21993
+
21994
+
21995
+
21996
+
21997
+
21998
+
21999
+
22000
+
22001
+
22002
+
22003
+
22004
+
22005
+
22006
+
22007
+
22008
+
22009
+
22010
+
22011
+
22012
+
22013
+
22014
+
22015
+
22016
+
22017
+
22018
+
22019
+
22020
+
22021
+
22022
+
22023
+
22024
+
22025
+
22026
+
22027
+
22028
+
22029
+
22030
+
22031
+
22032
+
22033
+
22034
+
22035
+
22036
+
22037
+
22038
+
22039
+
22040
+
22041
+
22042
+
22043
+
22044
+
22045
+
22046
+
22047
+
22048
+
22049
+
22050
+
22051
+
22052
+
22053
+
22054
+
22055
+
22056
+
22057
+
22058
+
22059
+
22060
+
22061
+
22062
+
22063
+
22064
+
22065
+
22066
+
22067
+
22068
+
22069
+
22070
+
22071
+
22072
+
22073
+
22074
+
22075
+
22076
+
22077
+
22078
+
22079
+
22080
+
22081
+
22082
+
22083
+
22084
+
22085
+
22086
+
22087
+
22088
+
22089
+
22090
+
22091
+
22092
+
22093
+
22094
+
22095
+
22096
+
22097
+
22098
+
22099
+
22100
+
22101
+
22102
+
22103
+
22104
+
22105
+
22106
+
22107
+
22108
+
22109
+
22110
+
22111
+
22112
+
22113
+
22114
+
22115
+
22116
+
22117
+
22118
+
22119
+
22120
+
22121
+
22122
+
22123
+
22124
+
22125
+
22126
+
22127
+
22128
+
22129
+
22130
+
22131
+
22132
+
22133
+
22134
+
22135
+
22136
+
22137
+
22138
+
22139
+
22140
+
22141
+
22142
+
22143
+
22144
+
22145
+
22146
+
22147
+
22148
+
22149
+
22150
+
22151
+
22152
+
22153
+
22154
+
22155
+
22156
+
22157
+
22158
+
22159
+
22160
+
22161
+
22162
+
22163
+
22164
+
22165
+
22166
+
22167
+
22168
+
22169
+
22170
+
22171
+
22172
+
22173
+
22174
+
22175
+
22176
+
22177
+
22178
+
22179
+
22180
+
22181
+
22182
+
22183
+
22184
+
22185
+
22186
+
22187
+
22188
+
22189
+
22190
+
22191
+
22192
+
22193
+
22194
+
22195
+
22196
+
22197
+
22198
+
22199
+
22200
+
22201
+
22202
+
22203
+
22204
+
22205
+
22206
+
22207
+
22208
+
22209
+
22210
+
22211
+
22212
+
22213
+
22214
+
22215
+
22216
+
22217
+
22218
+
22219
+
22220
+
22221
+
22222
+
22223
+
22224
+
22225
+
22226
+
22227
+
22228
+
22229
+
22230
+
22231
+
22232
+
22233
+
22234
+
22235
+
22236
+
22237
+
22238
+
22239
+
22240
+
22241
+
22242
+
22243
+
22244
+
22245
+
22246
+
22247
+
22248
+
22249
+
22250
+
22251
+
22252
+
22253
+
22254
+
22255
+
22256
+
22257
+
22258
+
22259
+
22260
+
22261
+
22262
+
22263
+
22264
+
22265
+
22266
+
22267
+
22268
+
22269
+
22270
+
22271
+
22272
+
22273
+
22274
+
22275
+
22276
+
22277
+
22278
+
22279
+
22280
+
22281
+
22282
+
22283
+
22284
+
22285
+
22286
+
22287
+
22288
+
22289
+
22290
+
22291
+
22292
+
22293
+
22294
+
22295
+
22296
+
22297
+
22298
+
22299
+
22300
+
22301
+
22302
+
22303
+
22304
+
22305
+
22306
+
22307
+
22308
+
22309
+
22310
+
22311
+
22312
+
22313
+
22314
+
22315
+
22316
+
22317
+
22318
+
22319
+
22320
+
22321
+
22322
+
22323
+
22324
+
22325
+
22326
+
22327
+
22328
+
22329
+
22330
+
22331
+
22332
+
22333
+
22334
+
22335
+
22336
+
22337
+
22338
+
22339
+
22340
+
22341
+
22342
+
22343
+
22344
+
22345
+
22346
+
22347
+
22348
+
22349
+
22350
+
22351
+
22352
+
22353
+
22354
+
22355
+
22356
+
22357
+
22358
+
22359
+
22360
+
22361
+
22362
+
22363
+
22364
+
22365
+
22366
+
22367
+
22368
+
22369
+
22370
+
22371
+
22372
+
22373
+
22374
+
22375
+
22376
+
22377
+
22378
+
22379
+
22380
+
22381
+
22382
+
22383
+
22384
+
22385
+
22386
+
22387
+
22388
+
22389
+
22390
+
22391
+
22392
+
22393
+
22394
+
22395
+
22396
+
22397
+
22398
+
22399
+
22400
+
22401
+
22402
+
22403
+
22404
+
22405
+
22406
+
22407
+ Training...: 39% 1727/4393 [2:16:55<3:53:42, 5.26s/it]
22408
+ Step... (17575 | Loss: 0.05494557321071625, Learning Rate: 6.550706893904135e-05, Gradient Norm: 0.3551705777645111)
22409
+ Step... (17600 | Loss: 0.0307022575289011, Learning Rate: 6.54565665172413e-05, Gradient Norm: 0.3670017421245575)
22410
+ Step... (17625 | Loss: 0.05794623866677284, Learning Rate: 6.540605681948364e-05, Gradient Norm: 0.5661908388137817)
22411
+ Step... (17650 | Loss: 0.06338990479707718, Learning Rate: 6.535555439768359e-05, Gradient Norm: 0.9140436053276062)
22412
+ Step... (17675 | Loss: 0.05870993807911873, Learning Rate: 6.530505197588354e-05, Gradient Norm: 0.4363075792789459)
22413
+ Step... (17700 | Loss: 0.060519590973854065, Learning Rate: 6.525454227812588e-05, Gradient Norm: 0.5052237510681152)
22414
+ Step... (17725 | Loss: 0.04964590072631836, Learning Rate: 6.520403258036822e-05, Gradient Norm: 0.534722089767456)
22415
+ Step... (17750 | Loss: 0.04412442818284035, Learning Rate: 6.515353743452579e-05, Gradient Norm: 0.38217034935951233)
22416
+ Step... (17775 | Loss: 0.07080918550491333, Learning Rate: 6.510302773676813e-05, Gradient Norm: 0.5178171396255493)
22417
+ Step... (17800 | Loss: 0.042954571545124054, Learning Rate: 6.505251803901047e-05, Gradient Norm: 0.5687723159790039)
22418
+ Step... (17825 | Loss: 0.1522335410118103, Learning Rate: 6.500202289316803e-05, Gradient Norm: 1.4223401546478271)
22419
+ Step... (17850 | Loss: 0.025624962523579597, Learning Rate: 6.495151319541037e-05, Gradient Norm: 0.3468577265739441)
22420
+ Step... (17875 | Loss: 0.0511438213288784, Learning Rate: 6.490100349765271e-05, Gradient Norm: 0.5264740586280823)
22421
+ Step... (17900 | Loss: 0.03367552533745766, Learning Rate: 6.485050835181028e-05, Gradient Norm: 0.33524781465530396)
22422
+ Step... (17925 | Loss: 0.06573169678449631, Learning Rate: 6.479999865405262e-05, Gradient Norm: 1.7670120000839233)
22423
+ Step... (17950 | Loss: 0.030942659825086594, Learning Rate: 6.474948895629495e-05, Gradient Norm: 0.4611154794692993)
22424
+ Step... (17975 | Loss: 0.050055235624313354, Learning Rate: 6.469899381045252e-05, Gradient Norm: 0.44552916288375854)
22425
+ Step... (18000 | Loss: 0.04747422784566879, Learning Rate: 6.464848411269486e-05, Gradient Norm: 0.7189703583717346)
22426
+ Step... (18025 | Loss: 0.05041767656803131, Learning Rate: 6.45979744149372e-05, Gradient Norm: 0.4791944622993469)
22427
+ Step... (18050 | Loss: 0.038698114454746246, Learning Rate: 6.454747926909477e-05, Gradient Norm: 0.3972455859184265)
22428
+ Step... (18075 | Loss: 0.06088162586092949, Learning Rate: 6.44969695713371e-05, Gradient Norm: 0.4667831063270569)
22429
+ Step... (18100 | Loss: 0.037376631051301956, Learning Rate: 6.444645987357944e-05, Gradient Norm: 0.4643656611442566)
22430
+ Step... (18125 | Loss: 0.061981990933418274, Learning Rate: 6.439596472773701e-05, Gradient Norm: 0.42906495928764343)
22431
+ Step... (18150 | Loss: 0.055675018578767776, Learning Rate: 6.434545502997935e-05, Gradient Norm: 0.5672094821929932)
22432
+ Step... (18175 | Loss: 0.04679860919713974, Learning Rate: 6.429494533222169e-05, Gradient Norm: 0.3329954743385315)
22433
+ Step... (18200 | Loss: 0.03453730419278145, Learning Rate: 6.424444291042164e-05, Gradient Norm: 0.5234297513961792)
22434
+ Step... (18225 | Loss: 0.040372561663389206, Learning Rate: 6.419394048862159e-05, Gradient Norm: 0.45973172783851624)
22435
+ Step... (18250 | Loss: 0.03428987041115761, Learning Rate: 6.414343079086393e-05, Gradient Norm: 0.3846223056316376)
22436
+ Step... (18275 | Loss: 0.04736689478158951, Learning Rate: 6.409292836906388e-05, Gradient Norm: 0.4310668706893921)
22437
+ Step... (18300 | Loss: 0.04380634054541588, Learning Rate: 6.404242594726384e-05, Gradient Norm: 0.37499678134918213)
22438
+ Step... (18325 | Loss: 0.04469010978937149, Learning Rate: 6.399191624950618e-05, Gradient Norm: 0.3715912103652954)
22439
+ Step... (18350 | Loss: 0.041518259793519974, Learning Rate: 6.394141382770613e-05, Gradient Norm: 0.5135173201560974)
22440
+ Step... (18375 | Loss: 0.050326090306043625, Learning Rate: 6.389091140590608e-05, Gradient Norm: 0.5566602945327759)
22441
+ Step... (18400 | Loss: 0.03716479241847992, Learning Rate: 6.384040170814842e-05, Gradient Norm: 0.3112817704677582)
22442
+ Step... (18425 | Loss: 0.047295041382312775, Learning Rate: 6.378989928634837e-05, Gradient Norm: 0.3363286554813385)
22443
+ Step... (18450 | Loss: 0.031067634001374245, Learning Rate: 6.373939686454833e-05, Gradient Norm: 0.3189374506473541)
22444
+ Step... (18475 | Loss: 0.06171252578496933, Learning Rate: 6.368888716679066e-05, Gradient Norm: 0.733445405960083)
22445
+ Step... (18500 | Loss: 0.02312381938099861, Learning Rate: 6.363838474499062e-05, Gradient Norm: 0.2859801948070526)
22446
+ Step... (18525 | Loss: 0.05208982527256012, Learning Rate: 6.358787504723296e-05, Gradient Norm: 0.4195255637168884)
22447
+ Step... (18550 | Loss: 0.04370296373963356, Learning Rate: 6.353737262543291e-05, Gradient Norm: 0.3430011570453644)
22448
+ Step... (18575 | Loss: 0.04457544535398483, Learning Rate: 6.348686292767525e-05, Gradient Norm: 0.4072102904319763)
22449
+ Step... (18600 | Loss: 0.053745806217193604, Learning Rate: 6.34363605058752e-05, Gradient Norm: 0.9405742287635803)
22450
+ Step... (18625 | Loss: 0.05881518870592117, Learning Rate: 6.338585808407515e-05, Gradient Norm: 0.551983118057251)
22451
+ Step... (18650 | Loss: 0.028300169855356216, Learning Rate: 6.333534838631749e-05, Gradient Norm: 0.26978495717048645)
22452
+ Step... (18675 | Loss: 0.0804310217499733, Learning Rate: 6.328484596451744e-05, Gradient Norm: 0.4554004371166229)
22453
+ Step... (18700 | Loss: 0.04370126873254776, Learning Rate: 6.323433626675978e-05, Gradient Norm: 0.3332192897796631)
22454
+ Step... (18725 | Loss: 0.046010829508304596, Learning Rate: 6.318383384495974e-05, Gradient Norm: 0.34220054745674133)
22455
+ Step... (18750 | Loss: 0.03679228201508522, Learning Rate: 6.313333142315969e-05, Gradient Norm: 0.41170281171798706)
22456
+ Step... (18775 | Loss: 0.07983490824699402, Learning Rate: 6.308282172540203e-05, Gradient Norm: 0.6871702075004578)
22457
+ Step... (18800 | Loss: 0.033616289496421814, Learning Rate: 6.303231930360198e-05, Gradient Norm: 0.3212784230709076)
22458
+ Step... (18825 | Loss: 0.04366293549537659, Learning Rate: 6.298181688180193e-05, Gradient Norm: 0.512840986251831)
22459
+ Step... (18850 | Loss: 0.043552931398153305, Learning Rate: 6.293130718404427e-05, Gradient Norm: 0.43920889496803284)
22460
+ Step... (18875 | Loss: 0.06454277783632278, Learning Rate: 6.288080476224422e-05, Gradient Norm: 0.4973304569721222)
22461
+ Step... (18900 | Loss: 0.04304179176688194, Learning Rate: 6.283030234044418e-05, Gradient Norm: 0.368775874376297)
22462
+ Step... (18925 | Loss: 0.050083015114068985, Learning Rate: 6.277979264268652e-05, Gradient Norm: 0.34253236651420593)
22463
+ Step... (18950 | Loss: 0.03213053569197655, Learning Rate: 6.272929022088647e-05, Gradient Norm: 0.3148396611213684)
22464
+ Step... (18975 | Loss: 0.04660707339644432, Learning Rate: 6.267878779908642e-05, Gradient Norm: 0.4746305048465729)
22465
+ Step... (19000 | Loss: 0.06602873653173447, Learning Rate: 6.262827810132876e-05, Gradient Norm: 0.6007050275802612)
22466
+ Step... (19025 | Loss: 0.042278241366147995, Learning Rate: 6.257777567952871e-05, Gradient Norm: 0.467171311378479)
22467
+ Step... (19050 | Loss: 0.03172172233462334, Learning Rate: 6.252727325772867e-05, Gradient Norm: 0.30905085802078247)
22468
+ Step... (19075 | Loss: 0.06033332645893097, Learning Rate: 6.2476763559971e-05, Gradient Norm: 0.3641526401042938)
22469
+ Step... (19100 | Loss: 0.0643489882349968, Learning Rate: 6.242626113817096e-05, Gradient Norm: 0.5142189860343933)
22470
+ Step... (19125 | Loss: 0.052221208810806274, Learning Rate: 6.237575871637091e-05, Gradient Norm: 1.0921443700790405)
22471
+ Step... (19150 | Loss: 0.03727109357714653, Learning Rate: 6.232524901861325e-05, Gradient Norm: 1.8303654193878174)
22472
+ Step... (19175 | Loss: 0.04352663457393646, Learning Rate: 6.22747465968132e-05, Gradient Norm: 0.4804896116256714)
22473
+ Step... (19200 | Loss: 0.08508438616991043, Learning Rate: 6.222424417501315e-05, Gradient Norm: 5.189798355102539)
22474
+ Step... (19225 | Loss: 0.03526046872138977, Learning Rate: 6.21737344772555e-05, Gradient Norm: 0.3649713695049286)
22475
+ Step... (19250 | Loss: 0.052900392562150955, Learning Rate: 6.212323205545545e-05, Gradient Norm: 0.48944270610809326)
22476
+
22477
+
22478
+
22479
+
22480
+
22481
+
22482
+
22483
+
22484
+
22485
+
22486
+
22487
+
22488
+
22489
+
22490
+
22491
+
22492
+
22493
+
22494
+
22495
+
22496
+
22497
+
22498
+
22499
+
22500
+
22501
+
22502
+
22503
+
22504
+
22505
+
22506
+
22507
+
22508
+
22509
+
22510
+
22511
+
22512
+
22513
+
22514
+
22515
+
22516
+
22517
+
22518
+
22519
+
22520
+
22521
+
22522
+
22523
+
22524
+
22525
+
22526
+
22527
+
22528
+
22529
+
22530
+
22531
+
22532
+
22533
+
22534
+
22535
+
22536
+
22537
+
22538
+
22539
+
22540
+
22541
+
22542
+
22543
+
22544
+
22545
+
22546
+
22547
+
22548
+
22549
+
22550
+
22551
+
22552
+
22553
+
22554
+
22555
+
22556
+
22557
+
22558
+
22559
+
22560
+
22561
+
22562
+
22563
+
22564
+
22565
+
22566
+
22567
+
22568
+
22569
+
22570
+
22571
+
22572
+
22573
+
22574
+
22575
+
22576
+
22577
+
22578
+
22579
+
22580
+
22581
+
22582
+
22583
+
22584
+
22585
+
22586
+
22587
+
22588
+
22589
+
22590
+
22591
+
22592
+
22593
+
22594
+
22595
+
22596
+
22597
+
22598
+
22599
+
22600
+
22601
+
22602
+
22603
+
22604
+
22605
+
22606
+
22607
+
22608
+
22609
+
22610
+
22611
+
22612
+
22613
+
22614
+
22615
+
22616
+
22617
+
22618
+
22619
+
22620
+
22621
+
22622
+
22623
+
22624
+
22625
+
22626
+
22627
+
22628
+
22629
+
22630
+
22631
+
22632
+
22633
+
22634
+
22635
+
22636
+
22637
+
22638
+
22639
+
22640
+
22641
+
22642
+
22643
+
22644
+
22645
+
22646
+
22647
+
22648
+
22649
+
22650
+
22651
+
22652
+
22653
+
22654
+
22655
+
22656
+
22657
+
22658
+
22659
+
22660
+
22661
+
22662
+
22663
+
22664
+
22665
+
22666
+
22667
+
22668
+
22669
+
22670
+
22671
+
22672
+
22673
+
22674
+
22675
+
22676
+
22677
+
22678
+
22679
+
22680
+
22681
+
22682
+
22683
+
22684
+
22685
+
22686
+
22687
+
22688
+
22689
+
22690
+
22691
+
22692
+
22693
+
22694
+
22695
+
22696
+
22697
+
22698
+
22699
+
22700
+
22701
+
22702
+
22703
+
22704
+
22705
+
22706
+
22707
+
22708
+
22709
+
22710
+
22711
+
22712
+
22713
+
22714
+
22715
+
22716
+
22717
+
22718
+
22719
+
22720
+
22721
+
22722
+
22723
+
22724
+
22725
+
22726
+
22727
+
22728
+
22729
+
22730
+
22731
+
22732
+
22733
+
22734
+
22735
+
22736
+
22737
+
22738
+
22739
+
22740
+
22741
+
22742
+
22743
+
22744
+
22745
+
22746
+
22747
+
22748
+
22749
+
22750
+
22751
+
22752
+
22753
+
22754
+
22755
+
22756
+
22757
+
22758
+
22759
+
22760
+
22761
+
22762
+
22763
+
22764
+
22765
+
22766
+
22767
+
22768
+
22769
+
22770
+
22771
+
22772
+
22773
+
22774
+
22775
+
22776
+
22777
+
22778
+
22779
+
22780
+
22781
+
22782
+
22783
+
22784
+
22785
+
22786
+
22787
+
22788
+
22789
+
22790
+
22791
+
22792
+
22793
+
22794
+
22795
+
22796
+
22797
+
22798
+
22799
+
22800
+
22801
+
22802
+
22803
+
22804
+
22805
+
22806
+
22807
+
22808
+
22809
+
22810
+
22811
+
22812
+
22813
+
22814
+
22815
+
22816
+
22817
+
22818
+
22819
+
22820
+
22821
+
22822
+
22823
+
22824
+
22825
+
22826
+
22827
+
22828
+
22829
+
22830
+
22831
+
22832
+
22833
+
22834
+
22835
+
22836
+
22837
+
22838
+
22839
+
22840
+
22841
+
22842
+
22843
+
22844
+
22845
+
22846
+
22847
+
22848
+
22849
+
22850
+
22851
+
22852
+
22853
+
22854
+
22855
+
22856
+
22857
+
22858
+
22859
+
22860
+
22861
+
22862
+
22863
+
22864
+
22865
+
22866
+
22867
+
22868
+
22869
+
22870
+
22871
+
22872
+
22873
+
22874
+
22875
+
22876
+
22877
+
22878
+
22879
+
22880
+
22881
+
22882
+
22883
+
22884
+
22885
+
22886
+
22887
+
22888
+
22889
+
22890
+
22891
+
22892
+
22893
+
22894
+
22895
+
22896
+
22897
+
22898
+
22899
+
22900
+
22901
+
22902
+
22903
+
22904
+
22905
+
22906
+
22907
+
22908
+
22909
+
22910
+
22911
+
22912
+
22913
+
22914
+
22915
+
22916
+
22917
+
22918
+
22919
+
22920
+
22921
+
22922
+
22923
+
22924
+
22925
+
22926
+
22927
+
22928
+
22929
+
22930
+
22931
+
22932
+
22933
+
22934
+
22935
+
22936
+
22937
+
22938
+
22939
+
22940
+
22941
+
22942
+
22943
+
22944
+
22945
+
22946
+
22947
+
22948
+
22949
+
22950
+
22951
+
22952
+
22953
+
22954
+
22955
+
22956
+
22957
+
22958
+
22959
+
22960
+
22961
+
22962
+
22963
+
22964
+
22965
+
22966
+
22967
+
22968
+
22969
+
22970
+
22971
+
22972
+
22973
+
22974
+
22975
+
22976
+
22977
+
22978
+
22979
+
22980
+
22981
+
22982
+
22983
+
22984
+
22985
+
22986
+
22987
+
22988
+
22989
+
22990
+
22991
+
22992
+
22993
+
22994
+
22995
+
22996
+
22997
+
22998
+
22999
+
23000
+
23001
+
23002
+
23003
+
23004
+
23005
+
23006
+
23007
+
23008
+
23009
+
23010
+
23011
+
23012
+
23013
+
23014
+
23015
+
23016
+
23017
+
23018
+
23019
+
23020
+
23021
+
23022
+
23023
+
23024
+
23025
+
23026
+
23027
+
23028
+
23029
+
23030
+
23031
+
23032
+
23033
+
23034
+
23035
+
23036
+
23037
+
23038
+
23039
+
23040
+
23041
+
23042
+
23043
+
23044
+
23045
+
23046
+
23047
+
23048
+
23049
+
23050
+
23051
+
23052
+
23053
+
23054
+
23055
+
23056
+
23057
+
23058
+
23059
+
23060
+
23061
+
23062
+
23063
+
23064
+
23065
+
23066
+
23067
+
23068
+
23069
+
23070
+
23071
+
23072
+
23073
+
23074
+
23075
+
23076
+
23077
+
23078
+
23079
+
23080
+
23081
+
23082
+
23083
+
23084
+
23085
+
23086
+
23087
+
23088
+
23089
+
23090
+
23091
+
23092
+
23093
+
23094
+
23095
+
23096
+
23097
+
23098
+
23099
+
23100
+
23101
+
23102
+
23103
+
23104
+
23105
+
23106
+
23107
+
23108
+
23109
+
23110
+
23111
+
23112
+
23113
+
23114
+
23115
+
23116
+
23117
+
23118
+
23119
+
23120
+
23121
+
23122
+
23123
+
23124
+
23125
+
23126
+
23127
+
23128
+
23129
+
23130
+
23131
+
23132
+
23133
+
23134
+
23135
+
23136
+
23137
+
23138
+
23139
+
23140
+
23141
+
23142
+
23143
+
23144
+
23145
+
23146
+
23147
+
23148
+
23149
+
23150
+
23151
+
23152
+
23153
+
23154
+
23155
+
23156
+
23157
+
23158
+ Training...: 55% 2427/4393 [3:12:16<2:54:00, 5.31s/it]
23159
+ Step... (19300 | Loss: 0.04498684033751488, Learning Rate: 6.202221993589774e-05, Gradient Norm: 0.3940765857696533)
23160
+ Step... (19325 | Loss: 0.04594220966100693, Learning Rate: 6.197171751409769e-05, Gradient Norm: 0.3983670473098755)
23161
+ Step... (19350 | Loss: 0.05320899561047554, Learning Rate: 6.192121509229764e-05, Gradient Norm: 0.43670183420181274)
23162
+ Step... (19375 | Loss: 0.0990297943353653, Learning Rate: 6.187070539453998e-05, Gradient Norm: 0.5052773952484131)
23163
+ Step... (19400 | Loss: 0.05028213933110237, Learning Rate: 6.182020297273993e-05, Gradient Norm: 0.4826207756996155)
23164
+ Step... (19425 | Loss: 0.062484271824359894, Learning Rate: 6.176970055093989e-05, Gradient Norm: 0.4715140461921692)
23165
+ Step... (19450 | Loss: 0.03580809384584427, Learning Rate: 6.171919085318223e-05, Gradient Norm: 0.34322232007980347)
23166
+ Step... (19475 | Loss: 0.06652875244617462, Learning Rate: 6.166868843138218e-05, Gradient Norm: 3.6110024452209473)
23167
+ Step... (19500 | Loss: 0.04379062354564667, Learning Rate: 6.161818600958213e-05, Gradient Norm: 0.582844078540802)
23168
+ Step... (19525 | Loss: 0.05458759889006615, Learning Rate: 6.156767631182447e-05, Gradient Norm: 0.347485214471817)
23169
+ Step... (19550 | Loss: 0.040893107652664185, Learning Rate: 6.151717389002442e-05, Gradient Norm: 0.35938355326652527)
23170
+ Step... (19575 | Loss: 0.04561559855937958, Learning Rate: 6.146667146822438e-05, Gradient Norm: 0.4187242388725281)
23171
+ Step... (19600 | Loss: 0.029457340016961098, Learning Rate: 6.141616177046672e-05, Gradient Norm: 0.34712833166122437)
23172
+ Step... (19625 | Loss: 0.040251851081848145, Learning Rate: 6.136565934866667e-05, Gradient Norm: 0.2937728762626648)
23173
+ Step... (19650 | Loss: 0.04119085520505905, Learning Rate: 6.131515692686662e-05, Gradient Norm: 0.35085585713386536)
23174
+ Step... (19675 | Loss: 0.06086919829249382, Learning Rate: 6.126464722910896e-05, Gradient Norm: 0.399850457906723)
23175
+ Step... (19700 | Loss: 0.05186273902654648, Learning Rate: 6.121414480730891e-05, Gradient Norm: 0.3567661643028259)
23176
+ Step... (19725 | Loss: 0.05835634842514992, Learning Rate: 6.116363510955125e-05, Gradient Norm: 0.7113293409347534)
23177
+ Step... (19750 | Loss: 0.031978655606508255, Learning Rate: 6.11131326877512e-05, Gradient Norm: 0.34440669417381287)
23178
+ Step... (19775 | Loss: 0.03358345478773117, Learning Rate: 6.106262298999354e-05, Gradient Norm: 0.3868075907230377)
23179
+ Step... (19800 | Loss: 0.05269327387213707, Learning Rate: 6.10121242061723e-05, Gradient Norm: 0.44065412878990173)
23180
+ Step... (19825 | Loss: 0.044944509863853455, Learning Rate: 6.096161450841464e-05, Gradient Norm: 0.33990970253944397)
23181
+ Step... (19850 | Loss: 0.049920689314603806, Learning Rate: 6.091110481065698e-05, Gradient Norm: 0.332441508769989)
23182
+ Step... (19875 | Loss: 0.05676373839378357, Learning Rate: 6.086060966481455e-05, Gradient Norm: 0.46614909172058105)
23183
+ Step... (19900 | Loss: 0.0489303357899189, Learning Rate: 6.0810099967056885e-05, Gradient Norm: 0.3743494153022766)
23184
+ Step... (19925 | Loss: 0.04911625757813454, Learning Rate: 6.0759590269299224e-05, Gradient Norm: 0.3483722507953644)
23185
+ Step... (19950 | Loss: 0.05078908056020737, Learning Rate: 6.070909512345679e-05, Gradient Norm: 0.34993499517440796)
23186
+ Step... (19975 | Loss: 0.04709053412079811, Learning Rate: 6.065858542569913e-05, Gradient Norm: 0.3195936977863312)
23187
+ Training...: 55% 2427/4393 [3:12:22<2:54:00, 5.31s/it]
23188
+ /home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/jax_utils.py:312: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
23189
+ return jax.tree_map(pad, tree)
23190
+ /home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/jax_utils.py:321: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
23191
+ return out if static_return else jax.tree_map(unpad, out)
23192
+
23193
+
23194
+
23195
+
23196
+
23197
+
23198
+
23199
+
23200
+
23201
+
23202
+
23203
+
23204
+
23205
+
23206
+
23207
+
23208
+
23209
+
23210
+
23211
+
23212
+
23213
+
23214
+
23215
+
23216
+
23217
+
23218
+
23219
+
23220
+
23221
+
23222
+
23223
+
23224
+
23225
+
23226
+
23227
+
23228
+
23229
+
23230
+
23231
+
23232
+
23233
+
23234
+
23235
+
23236
+
23237
+
23238
+
23239
+
23240
+
23241
+
23242
+
23243
+
23244
+
23245
+
23246
+
23247
+
23248
+
23249
+
23250
+
23251
+
23252
+
23253
+
23254
+
23255
+
23256
+
23257
+
23258
+
23259
+
23260
+
23261
+
23262
+
23263
+
23264
+
23265
+
23266
+
23267
+
23268
+
23269
+
23270
+
23271
+
23272
+
23273
+
23274
+
23275
+
23276
+ device_metrics = jax.tree_map(lambda x: x[0], device_metrics)
23277
+ /home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/training/common_utils.py:45: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
23278
+ return jax.tree_map(stack_args, *forest)
23279
+ run_flax_speech_recognition_seq2seq.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
23280
+ eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
23281
+ Step... (10000/50000 | Eval Loss: 0.6138997077941895 | Erun_flax_speech_recognition_seq2seq.py:1425: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
23282
+ params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
23283
+ Configuration saved in /home/sanchitgandhi/flax-wav2vec2-2-bart-large-ls-960h-black-box/config.json
23284
+ tcmalloc: large alloc 2586787840 bytes == 0x3b7a32000 @ 0x7f7cba873680 0x7f7cba893bdd 0x7f7b690721ff 0x7f7b6908142c 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6907c164 0x7f7b6907c91e 0x505166 0x56bbfa 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x5f6eb3 0x56cc1f 0x5f6cd6 0x56bacd 0x569dba 0x50bca0 0x56cc1f 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x5f6eb3
23285
+ tcmalloc: large alloc 2353618944 bytes == 0x452526000 @ 0x7f7cba873680 0x7f7cba894824 0x5fb391 0x7f7b6907c209 0x7f7b6907c91e 0x505166 0x56bbfa 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x5f6eb3 0x56cc1f 0x5f6cd6 0x56bacd 0x569dba 0x50bca0 0x56cc1f 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x6902a7 0x67f951 0x67f9cf 0x67fa71 0x681b97 0x6b9d32
23286
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
23287
+ To disable this warning, you can either:
23288
+ - Avoid using `tokenizers` before the fork if possible
23289
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
23290
+ Model weights saved in /home/sanchitgandhi/flax-wav2vec2-2-bart-large-ls-960h-black-box/flax_model.msgpack
23291
+ tokenizer config file saved in ./tokenizer_config.json
23292
+ Special tokens file saved in ./special_tokens_map.json
23293
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
23294
+ To disable this warning, you can either:
23295
+ - Avoid using `tokenizers` before the fork if possible
23296
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
23297
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
23298
+ To disable this warning, you can either:
23299
+ - Avoid using `tokenizers` before the fork if possible
23300
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
23301
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
23302
+ To disable this warning, you can either:
23303
+ - Avoid using `tokenizers` before the fork if possible
23304
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
23305
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
23306
  To disable this warning, you can either:
23307
  - Avoid using `tokenizers` before the fork if possible
wandb/run-20220828_085247-2hx8pk65/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/decoder_grad_norm": 0.5876523852348328, "train/decoder_param_norm": 1057.45703125, "train/encoder_grad_norm": 0.38440409302711487, "train/encoder_param_norm": 2316.3564453125, "train/grad_norm": 0.7022120952606201, "layer_grad_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 0.10323784500360489}, "embed_tokens": {"embedding": 0.16808316111564636}, "layernorm_embedding": {"bias": 0.03703528642654419, "scale": 0.060806743800640106}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 1.75027107616188e-05, "kernel": 0.030463965609669685}, "out_proj": {"bias": 0.024376848712563515, "kernel": 0.08760593086481094}, "q_proj": {"bias": 0.0016024636570364237, "kernel": 0.034829143434762955}, "v_proj": {"bias": 0.04787713289260864, "kernel": 0.07169140875339508}}, "encoder_attn_layer_norm": {"bias": 0.03529948368668556, "scale": 0.0380270853638649}, "fc1": {"bias": 0.013248836621642113, "kernel": 0.33658137917518616}, "fc2": {"bias": 0.030859898775815964, "kernel": 0.2677602767944336}, "final_layer_norm": {"bias": 0.1120176762342453, "scale": 0.05825764685869217}, "self_attn": {"k_proj": {"bias": 6.563532224390656e-06, "kernel": 0.047542572021484375}, "out_proj": {"bias": 0.068998321890831, "kernel": 0.15063460171222687}, "q_proj": {"bias": 0.003958633169531822, "kernel": 0.05425203591585159}, "v_proj": {"bias": 0.07329808175563812, "kernel": 0.198069229722023}}, "self_attn_layer_norm": {"bias": 0.023308640345931053, "scale": 0.030806636437773705}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 0.04864540696144104, "kernel": 0.133722722530365}}, "1": {"conv": {"bias": 0.04470941796898842, "kernel": 0.09400613605976105}}, "2": {"conv": {"bias": 0.05692768096923828, "kernel": 0.1417897492647171}}}}, "encoder": {"layer_norm": {"bias": 0.16896693408489227, "scale": 0.08190205693244934}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 5.699832854588749e-06, "kernel": 0.03451818600296974}, "out_proj": {"bias": 0.004949449095875025, "kernel": 0.0711507499217987}, "q_proj": {"bias": 0.006232084706425667, "kernel": 0.03630899265408516}, "v_proj": {"bias": 0.021894006058573723, "kernel": 0.0699479877948761}}, "feed_forward": {"intermediate_dense": {"bias": 0.010628663003444672, "kernel": 0.08824677765369415}, "output_dense": {"bias": 0.0046577295288443565, "kernel": 0.07864432781934738}}, "final_layer_norm": {"bias": 0.053700175136327744, "scale": 0.06233147531747818}, "layer_norm": {"bias": 0.09289932250976562, "scale": 0.07505689561367035}}}, "pos_conv_embed": {"conv": {"bias": 0.001811191556043923, "weight_g": 0.04629991203546524, "weight_v": 0.05902065336704254}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.01040860079228878, "scale": 0.009696024470031261}, "projection": {"bias": 0.002452271291986108, "kernel": 0.06397733092308044}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 58.57985305786133}, "embed_tokens": {"embedding": 628.9428100585938}, "layernorm_embedding": {"bias": 2.4099645614624023, "scale": 13.944293022155762}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 47.96258544921875, "kernel": 330.1817932128906}, "out_proj": {"bias": 6.197176456451416, "kernel": 226.72259521484375}, "q_proj": {"bias": 20.796918869018555, "kernel": 337.1412658691406}, "v_proj": {"bias": 3.727905035018921, "kernel": 230.9994354248047}}, "encoder_attn_layer_norm": {"bias": 10.427277565002441, "scale": 56.38846206665039}, "fc1": {"bias": 25.47351837158203, "kernel": 339.21954345703125}, "fc2": {"bias": 7.897115707397461, "kernel": 243.82398986816406}, "final_layer_norm": {"bias": 4.000784873962402, "scale": 63.70562744140625}, "self_attn": {"k_proj": {"bias": 59.513954162597656, "kernel": 278.91595458984375}, "out_proj": {"bias": 3.8339650630950928, "kernel": 131.7364501953125}, "q_proj": {"bias": 32.09528732299805, "kernel": 282.0332336425781}, "v_proj": {"bias": 2.626418352127075, "kernel": 140.15884399414062}}, "self_attn_layer_norm": {"bias": 8.851421356201172, "scale": 84.72929382324219}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 0.5224539637565613, "kernel": 58.06698226928711}}, "1": {"conv": {"bias": 0.6238547563552856, "kernel": 55.76792907714844}}, "2": {"conv": {"bias": 0.8834269046783447, "kernel": 55.83806610107422}}}}, "encoder": {"layer_norm": {"bias": 0.2885725498199463, "scale": 4.501636505126953}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 19.359642028808594, "kernel": 551.2367553710938}, "out_proj": {"bias": 16.819419860839844, "kernel": 703.838134765625}, "q_proj": {"bias": 40.78517532348633, "kernel": 543.7529907226562}, "v_proj": {"bias": 15.60958194732666, "kernel": 695.4569091796875}}, "feed_forward": {"intermediate_dense": {"bias": 24.515138626098633, "kernel": 1373.99365234375}, "output_dense": {"bias": 20.76974868774414, "kernel": 1299.6435546875}}, "final_layer_norm": {"bias": 32.476783752441406, "scale": 141.65736389160156}, "layer_norm": {"bias": 7.329699516296387, "scale": 45.53441619873047}}}, "pos_conv_embed": {"conv": {"bias": 15.283638954162598, "weight_g": 21.029205322265625, "weight_v": 212.9462127685547}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.5982058644294739, "kernel": 8.08896541595459}, "layer_norm": {"bias": 10.069783210754395, "scale": 10.451257705688477}}, "1": {"conv": {"bias": 4.74075174331665, "kernel": 90.8435287475586}, "layer_norm": {"bias": 6.922820091247559, "scale": 19.5467586517334}}, "2": {"conv": {"bias": 6.7732415199279785, "kernel": 146.13897705078125}, "layer_norm": {"bias": 9.044225692749023, "scale": 19.424888610839844}}, "3": {"conv": {"bias": 5.224758148193359, "kernel": 159.10508728027344}, "layer_norm": {"bias": 8.319666862487793, "scale": 17.64743423461914}}, "4": {"conv": {"bias": 4.434978008270264, "kernel": 157.35813903808594}, "layer_norm": {"bias": 9.193974494934082, "scale": 15.562357902526855}}, "5": {"conv": {"bias": 5.297643661499023, "kernel": 131.1835174560547}, "layer_norm": {"bias": 10.735219955444336, "scale": 13.812533378601074}}, "6": {"conv": {"bias": 5.615579128265381, "kernel": 136.41822814941406}, "layer_norm": {"bias": 12.515308380126953, "scale": 11.152680397033691}}}}, "feature_projection": {"layer_norm": {"bias": 9.422893524169922, "scale": 27.84585189819336}, "projection": {"bias": 4.289161682128906, "kernel": 88.30554962158203}}, "masked_spec_embed": 26.247730255126953}}, "train/learning_rate": 8.086059824563563e-05, "train/loss": 0.1043805480003357, "train/param_norm": 2546.3154296875, "_timestamp": 1661727380, "_runtime": 50613, "_step": 9975}
 
1
+ {"train/decoder_grad_norm": 0.24299649894237518, "train/decoder_param_norm": 1060.7144775390625, "train/encoder_grad_norm": 0.20758813619613647, "train/encoder_param_norm": 2320.3076171875, "train/grad_norm": 0.3195936977863312, "layer_grad_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 0.015636462718248367}, "embed_tokens": {"embedding": 0.08405420929193497}, "layernorm_embedding": {"bias": 0.006145514082163572, "scale": 0.004968359600752592}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 6.667326488241088e-06, "kernel": 0.014865289442241192}, "out_proj": {"bias": 0.01036052592098713, "kernel": 0.049787431955337524}, "q_proj": {"bias": 0.0007108663558028638, "kernel": 0.015181932598352432}, "v_proj": {"bias": 0.019716547802090645, "kernel": 0.03999572619795799}}, "encoder_attn_layer_norm": {"bias": 0.015086976811289787, "scale": 0.016625721007585526}, "fc1": {"bias": 0.0057020955719053745, "kernel": 0.14046090841293335}, "fc2": {"bias": 0.0147400489076972, "kernel": 0.12780840694904327}, "final_layer_norm": {"bias": 0.032790299504995346, "scale": 0.03732256218791008}, "self_attn": {"k_proj": {"bias": 2.526882781239692e-06, "kernel": 0.01344823744148016}, "out_proj": {"bias": 0.020945662632584572, "kernel": 0.0472058430314064}, "q_proj": {"bias": 0.001085575670003891, "kernel": 0.014002838172018528}, "v_proj": {"bias": 0.020695650950074196, "kernel": 0.06064840778708458}}, "self_attn_layer_norm": {"bias": 0.009373994544148445, "scale": 0.011083677411079407}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 0.02808896079659462, "kernel": 0.06710191071033478}}, "1": {"conv": {"bias": 0.02258674055337906, "kernel": 0.046897199004888535}}, "2": {"conv": {"bias": 0.02590387687087059, "kernel": 0.07143399119377136}}}}, "encoder": {"layer_norm": {"bias": 0.09637241810560226, "scale": 0.0566645972430706}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 2.509430260033696e-06, "kernel": 0.019797371700406075}, "out_proj": {"bias": 0.0026633520610630512, "kernel": 0.042062435299158096}, "q_proj": {"bias": 0.003061411203816533, "kernel": 0.01928599737584591}, "v_proj": {"bias": 0.011404206976294518, "kernel": 0.04114246740937233}}, "feed_forward": {"intermediate_dense": {"bias": 0.006081512663513422, "kernel": 0.05225696042180061}, "output_dense": {"bias": 0.002437157789245248, "kernel": 0.045792415738105774}}, "final_layer_norm": {"bias": 0.031238090246915817, "scale": 0.03307477384805679}, "layer_norm": {"bias": 0.04994071274995804, "scale": 0.04129469022154808}}}, "pos_conv_embed": {"conv": {"bias": 0.0008167774649336934, "weight_g": 0.0036110610235482454, "weight_v": 0.012701401486992836}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.0036688754335045815, "scale": 0.004511996638029814}, "projection": {"bias": 0.0011850270675495267, "kernel": 0.033120427280664444}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 58.625732421875}, "embed_tokens": {"embedding": 628.6316528320312}, "layernorm_embedding": {"bias": 2.4100914001464844, "scale": 13.898597717285156}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 47.98827362060547, "kernel": 330.9457092285156}, "out_proj": {"bias": 6.15840482711792, "kernel": 227.3690948486328}, "q_proj": {"bias": 20.846906661987305, "kernel": 337.8941650390625}, "v_proj": {"bias": 3.635528802871704, "kernel": 231.59678649902344}}, "encoder_attn_layer_norm": {"bias": 10.790207862854004, "scale": 57.176551818847656}, "fc1": {"bias": 25.858234405517578, "kernel": 343.7656555175781}, "fc2": {"bias": 7.86165714263916, "kernel": 246.748291015625}, "final_layer_norm": {"bias": 3.946004867553711, "scale": 63.51469421386719}, "self_attn": {"k_proj": {"bias": 59.53274917602539, "kernel": 279.572998046875}, "out_proj": {"bias": 3.8042359352111816, "kernel": 132.37606811523438}, "q_proj": {"bias": 32.1674919128418, "kernel": 282.6742248535156}, "v_proj": {"bias": 2.5869321823120117, "kernel": 140.7692108154297}}, "self_attn_layer_norm": {"bias": 8.874273300170898, "scale": 84.6916732788086}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 0.9373133182525635, "kernel": 60.65754699707031}}, "1": {"conv": {"bias": 1.0780301094055176, "kernel": 58.4447135925293}}, "2": {"conv": {"bias": 1.3075517416000366, "kernel": 58.37184524536133}}}}, "encoder": {"layer_norm": {"bias": 0.29216912388801575, "scale": 4.3043694496154785}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 19.379070281982422, "kernel": 552.49365234375}, "out_proj": {"bias": 16.84808349609375, "kernel": 704.6502075195312}, "q_proj": {"bias": 40.86162567138672, "kernel": 545.0123291015625}, "v_proj": {"bias": 15.593074798583984, "kernel": 696.20166015625}}, "feed_forward": {"intermediate_dense": {"bias": 24.4946231842041, "kernel": 1376.0777587890625}, "output_dense": {"bias": 20.806180953979492, "kernel": 1302.1185302734375}}, "final_layer_norm": {"bias": 32.5095329284668, "scale": 141.84854125976562}, "layer_norm": {"bias": 7.295251846313477, "scale": 45.60984420776367}}}, "pos_conv_embed": {"conv": {"bias": 15.245718002319336, "weight_g": 21.039236068725586, "weight_v": 213.549072265625}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.5982058644294739, "kernel": 8.08896541595459}, "layer_norm": {"bias": 10.069783210754395, "scale": 10.451257705688477}}, "1": {"conv": {"bias": 4.74075174331665, "kernel": 90.8435287475586}, "layer_norm": {"bias": 6.922820091247559, "scale": 19.5467586517334}}, "2": {"conv": {"bias": 6.7732415199279785, "kernel": 146.13897705078125}, "layer_norm": {"bias": 9.044225692749023, "scale": 19.424888610839844}}, "3": {"conv": {"bias": 5.224758148193359, "kernel": 159.10508728027344}, "layer_norm": {"bias": 8.319666862487793, "scale": 17.64743423461914}}, "4": {"conv": {"bias": 4.434978008270264, "kernel": 157.35813903808594}, "layer_norm": {"bias": 9.193974494934082, "scale": 15.562357902526855}}, "5": {"conv": {"bias": 5.297643661499023, "kernel": 131.1835174560547}, "layer_norm": {"bias": 10.735219955444336, "scale": 13.812533378601074}}, "6": {"conv": {"bias": 5.615579128265381, "kernel": 136.41822814941406}, "layer_norm": {"bias": 12.515308380126953, "scale": 11.152680397033691}}}}, "feature_projection": {"layer_norm": {"bias": 9.315728187561035, "scale": 27.725435256958008}, "projection": {"bias": 4.307735443115234, "kernel": 88.24262237548828}}, "masked_spec_embed": 26.247730255126953}}, "train/learning_rate": 6.065858542569913e-05, "train/loss": 0.04709053412079811, "train/param_norm": 2551.262939453125, "_timestamp": 1661775899, "_runtime": 99132, "_step": 19975, "eval/loss": 0.6138997077941895, "eval/wer": 0.05543913826697548, "eval/cer": 0.039964500651745845, "eval/step_10k": {"_type": "table-file", "sha256": "8b44e8a00a036a18ffdf81b4d076c8bf849ea6649001c69e94fa439b14f110ee", "size": 26434, "artifact_path": "wandb-client-artifact://18m0dj4hts3yiat04x5pvmncavkjapd5wb8bznb37vw8c0lqna3m2yjd1wtdrfstuoo7ejt2sphvjo0zuw1e5ne5d3qbkd7c1fylclfggig6us5tsmsj2uum5pchx48n:latest/eval/step_10k.table.json", "_latest_artifact_path": "wandb-client-artifact://18m0dj4hts3yiat04x5pvmncavkjapd5wb8bznb37vw8c0lqna3m2yjd1wtdrfstuoo7ejt2sphvjo0zuw1e5ne5d3qbkd7c1fylclfggig6us5tsmsj2uum5pchx48n:latest/eval/step_10k.table.json", "path": "media/table/eval/step_10k_10000_8b44e8a00a036a18ffdf.table.json", "ncols": 7, "nrows": 50}}
wandb/run-20220828_085247-2hx8pk65/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220828_085247-2hx8pk65/run-2hx8pk65.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:094e92de49c7288ddfac32754880e9359cb30d1406e2d3bdff46b108a8c651aa
3
- size 4469804
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64db33311cf3882e158023908e58b8d0a72a15e4fe5d8fa86d30c5ba22a41e2
3
+ size 8691435