sanchit-gandhi HF staff commited on
Commit
58efde9
1 Parent(s): 1983ed9

2hx8pk65: saving weights and logs of step 40k

Browse files
.gitattributes CHANGED
@@ -29,4 +29,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
- *.wandb filter=lfs diff=lfs merge=lfs -text
 
 
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
  *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ *.wandb filter=lfs diff=lfs merge=lfs -text
33
+ wandb/run-20220828_085247-2hx8pk65/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e9052e8f2cf429458f533122ce50b5a0fc3cfc6e3096288daefda2766c5f0b
3
  size 2353616717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8387a279a508da48c4b9b41b29ad79013e3f168689d639a6ed6588c9f0a4ed1d
3
  size 2353616717
nohup.out CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220828_085247-2hx8pk65/files/media/table/eval/step_40k_40000_1c187b6d789cca3b4ec0.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["id", "label_str", "beam_1", "beam_2", "beam_3", "beam_4", "beam_5"], "data": [["2277-149896-0000", "he was in a fevered state of mind owing to the blight his wife's action threatened to cast upon his entire future", "he was in a fevered state of mind owing to the blight his wife's action threatened to cast upon his entire future", "he was in a fevered state of mind owing to the blight his wife's action threatened to cast upon his entire future", "he was in a fevered state of mind owing to the blight his wife's action threatn'd to cast upon his entire future", "he was in a fovered state of mind owing to the blight his wife's action threatened to cast upon his entire future", "he was in a heavered state of mind owing to the blight his wife's action threatened to cast upon his entire future"], ["2277-149896-0001", "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did", "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did", "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did it", "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did", "he would have to pay her her the money which she would now regularly demand or there would be trouble it did not matter what he did", "he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he said"], ["2277-149896-0002", "hurstwood walked the floor mentally arranging the chief points of his situation", "hurstwood walked to the floor mentally arranging the chief points of his situation", "hurstwood walked the floor mentally arranging the chief points of his situation", "hurstwood walked to the floor mentally arranging the chief points of his situation", "hurstwood walked to the floor mentally arranging the chief points in his situation", "hurstwood walked the floor mentally arranging the chief points of his situation"], ["2277-149896-0003", "he also thought of his managerial position", "he also thought of his managerial position", "he also thought of this managerial position", "he also thought of his managemental position", "he also thought of the managerial position", "he also thought of his managerial position and"], ["2277-149896-0004", "how would the papers talk about it", "how would the papers talk about it", "how would the papers talk about it and", "how would the papers talk about it you", "how would the papers talk about it can't be", "how would the papers talk about it can't you"], ["2277-149896-0005", "many little wrinkles gathered between his eyes as he contemplated this and his brow moistened", "many little wrinkles gathered between his eyes as he contemplated this and his brow moistened", "many little wrinkles gathered between his eyes as he considered this and his brow moistened", "many little wrinkles gathering between his eyes as he contemplated this and his brow moistened", "many little wrinkles gathered between his eyes as he contemented this and his brow moistened", "many little wrinkles gather between his eyes as he contemplated this and his brow moistened"], ["2277-149896-0006", "he could arrange that satisfactorily for carrie would be glad to wait if necessary", "he could arrange that satisfactorily for carrie would be glad to wait if necessary", "he could arrange that satisfactorily for carry would be glad to wait if necessary", "he could arrange that satisfactorily for cary would be glad to wait if necessary", "he could arrange this satisfactorily for carrie would be glad to wait if necessary", "he could arrange the satisfactorily for carrie would be glad to wait if necessary"], ["2277-149896-0007", "he would see how things turned out to morrow and then he would talk to her they were going to meet as usual", "he would see how things turned out to morrow and then he would talk to her they were going to meet as usual", "he would see how things turned out tomorrow and then he would talk to her they were going to meet as usual", "he would see how things turned out to morrow and then he would talk to her they were going to meet as usual a", "he would see how things turned out to morrow and then he would talk to her they were looking to meet as usual", "he could see how things turned out to morrow and then he would talk to her they were going to meet as usual"], ["2277-149896-0008", "for some reason he felt as if something might come that way and was relieved when all the envelopes had been scanned and nothing suspicious noticed", "for some reason he felt as if something might come that way and was relieved when all the envelopes had been scanned and nothing suspicious noticed", "for some reason he felt as if something might come that way and was relieved while all the envelopes had been scanned and nothing suspicious noticed", "for some reason he felt as if something might come that way and was relieved when all the envelopes had been screened and nothing suspicious noticed", "for some reason he felt as if something might come that way and was relieved when all the envelopes have been scanned and nothing suspicious noticed", "for some reason he felt as if something might come that way and was relieved when all the envelopes had been scanned and nothing suspicious notice"], ["2277-149896-0009", "while the danger had not lessened it had not as yet materialised and with him no news was good news", "while the danger had not lessened it had not as yet materialized and with him no news was good news", "while the danger had not lessened it had not as yet materialised and with him no news was good news", "while the danger had not lessened it had not as yet materialized and with him no news was good news and", "while the danger had not lessened it had not as yet materialized and with him no news was good news a", "while the danger had not lessened it had not as yet materialized and with him no news was good news i"], ["2277-149896-0010", "so little did he consider drouet that it never once occurred to him to worry about his finding out", "so little did he consider drouet that it never once occurred to him to worry about his finding out", "so little did he consider drue that it never once occurred to him to worry about his finding out", "so little did he consider dru that it never once occurred to him to worry about his finding out", "so little did he consider druma that it never once occurred to him to worry about his finding out", "so little did he consider druda that it never once occurred to him to worry about his finding out"], ["2277-149896-0011", "he grew restless as he ruminated and then decided that perhaps it was nothing", "he grew restless as he ruminated and then decided that perhaps it was nothing", "he grew restless as he ruminated and then decided that perhaps it was nothing", "he grew restless as he ruminated and then decided that perhaps it was nothing and", "he grew restless as he ruminated and then decided that perhaps it was nothing i", "he grew restless as he ruminated and then decided that perhaps it was nothing had"], ["2277-149896-0012", "she had not been able to get away this morning", "she had not been able to get away this morning", "she had not been able to get away this morning and", "she had not been able to get away this morning i", "she had not been able to get away this morning a", "she had not been able to get away this morning to"], ["2277-149896-0013", "he would get one to day it would probably be on his desk when he got back he would look for it at once", "he would get one to day it would probably be on his desk when he got back he would look for it at once", "he would get one to day it would probably be on his desk when he got back he would look for it at once and", "he would get one to day it could probably be on his desk when he got back he would look for it at once", "he would get one to day it would probably be upon his desk when he got back he would look for it at once", "he would get one to day it would probably be on his desk when we got back he would look for it at once"], ["2277-149896-0014", "after a time he gave up waiting and drearily headed for the madison car", "after a time he gave up waiting and drearily headed for the madison car", "after a time he gave up waiting and drearily heading for the madison car", "after a time he gave up waiting and desirily headed for the madison car", "after a time he gave up waiting and drearily headed for the madison car and", "after a time he gave up waiting and dearily headed for the madison car"], ["2277-149896-0015", "he went in and examined his letters but there was nothing from carrie", "he went in and examined his letters but there was nothing from carrie", "he went in and examined his letters but there was nothing from kerry", "he went in and examined his letters but there was nothing from carry", "he went in and examined his letters but there was nothing from carey", "he went in and examined his letters but there was nothing from perry"], ["2277-149896-0016", "fortunately there was nothing from his wife either", "fortunately there was nothing from his wife either", "fortunately there was nothing from his wife either and", "fortunately there was nothing from his wife either", "fortunately there was nothing from this wife either", "fortunately there was nothing from his wife neither"], ["2277-149896-0017", "at one thirty he went to rector's for lunch and when he returned a messenger was waiting for him", "at one thirty he went to rector's for lunch and when he returned a messenger was waiting for him", "at one thirty he went to rectors for lunch and when he returned a messenger was waiting for him", "at one thirty he went to rectors for lunch and when he returned a messenger was waiting for him", "at one thirty he went to restor's for lunch and when he returned a messenger was waiting for him", "at one thirty he went to repertors for lunch and when he returned a messenger was waiting for him"], ["2277-149896-0018", "his first impulse was to write but four words in reply go to the devil", "his first impulse was to write but four words in reply go to the devil", "his first impulse was to write but four words in reply go to the devil and", "his first impulse was to write but four words in reply go to the devil'", "his first instinct was to write but four words in reply go to the devil", "his first impulse was to write but fourwords in reply go to the devil"], ["2277-149896-0019", "but he compromised by telling the boy that there would be no reply", "but he compromised by telling the boy that there would be no reply", "but he compromised by telling the boy that there would be no reply i", "but he compromised by telling the boy there would be no reply", "but he compromised by telling the boy that there would be no reply and", "but hecompromised by telling the boy that there would be no reply"], ["2277-149896-0020", "then he sat down in his chair and gazed without seeing contemplating the result of his work", "then he sat down in his chair and gazed without seeing contemplating the result of his work", "then he sat down in his chair and gazed without seeing contemplating the results of his work", "then he sat down in his chair and gazed without seeing contemplating the result of his works", "then he sat down in his chair and gazed without seeing contemplated the result of his work", "than he sat down in his chair and gazed without seeing contemplating the result of his work"], ["2277-149896-0021", "what would she do about that the confounded wretch", "what would she do about that the confounded wretch", "what would she do about that that confounded wretch", "what would she do about that the confounded wretch and", "what would she do about that the confounded wretch that", "what would she do about that the con founded wretch"], ["2277-149896-0022", "later however his old discretion asserted itself", "later however his old discretion asserted itself", "later however his old discretion asserted itself and", "later however his old discretion ascertained itself", "later however his old discretion asserted itself", "later however his old discretion asserted itself if"], ["2277-149896-0023", "something had to be done a climax was near and she would not sit idle", "something had to be done a climax was near and she would not sit idle", "something had to be done a nexus was near and she would not sit idle", "something had to be done a climax was near as she would not sit idle", "something had to be done a climax was near she would not sit idle", "something had to be done a climax was near and she would not sit idle"], ["2277-149896-0024", "he knew her well enough to know that when she had decided upon a plan she would follow it up", "he knew her well enough to know that when she had decided upon a plan she would follow it up", "he knew her well enough to know that when she had decided upon the plan she would follow it up", "he knew her well enough to know that when she had decided upon a plan she would follow it up i", "he knew her well enough to know that when she had decided upon a plan she would follow it up and", "he knew her well enough to know that when she had decided upon a plan she would follow it up to"], ["2277-149896-0025", "he arose from his chair and went and looked out into the street", "he arose from his chair and went and looked out into the street", "he arose from his chair and went and looked out into the street and", "he arose from his chair and went and looked out into the streets", "he rose from his chair and went and looked out into the street", "he arose from his chair and went and looked out into the street a"], ["2277-149896-0026", "the long drizzle had begun pedestrians had turned up collars and trousers at the bottom", "the long drizzle had begun pedestrians had turned up collars and trousers at the bottom", "the long drizzle had begun pedestrian had turned up collars and trousers at the bottom", "the long drizzle had begun pedestrians had turned up collars and trousers at the bottom", "the long drizzle had begun domestians had turned up collars and trousers at the bottom", "the long drizzle had begun vegetables had turned up collars and trousers at the bottom"], ["2277-149896-0027", "hurstwood almost exclaimed out loud at the insistency of this thing", "hurstwood almost exclaimed out loud at the insistence of this thing", "hurstwood almost exclaimed out loud at the insistency of this thing", "hurstwood almost exclaimed out loud at the insistency of this thing", "hurstwood almost exclaimed out loud at the insistance of this thing", "hurstwood almost exclaimed out loud at the insistence of this thing"], ["2277-149896-0028", "he put on his hat and looked around for his umbrella", "he put on his hat and looked around for his umbrella", "he put on his hat and looked round for his umbrella", "he put on his hat and looked around for his umbrella and", "he put on his hat and looked toward for his umbrella", "he put on his hat and looked about for his umbrella"], ["2277-149896-0029", "he would have some arrangement of this thing", "he would have some arrangement of this thing", "he would have some arrangement of the thing", "he would have some arrangement of this thing and", "he would have some arrangement of the thing and", "he would have some arrangement of this thing i"], ["2277-149896-0030", "he began to wish that he had compromised in some way or other that he had sent the money perhaps he could do it up here", "he began to wish that he had compromised in some way or other that he had sent the money perhaps he could do it up here", "he began to wish that he had compromised in some way or other that he had sent the money perhaps he could do it up here and", "he began to wish that he had compromised in some way or other that he had set the money perhaps he could do it up here", "he began to wish that he had compromised in some way or other that he had sented the money perhaps he could do it up here", "he began to wish that he had compromised in some way or other that he had sent the money perhaps he could do it up here but"], ["2277-149896-0031", "he would go in and see anyhow he would have no row", "he would go in and see anyhow he would have no row", "he would go in and see anyhow he would have no row and", "he would go in and see anyhow he would have no row he", "he would go in and see anyhow he would have no row he would", "he would go in and see anyhow he would have no row a"], ["2277-149896-0032", "by the time he reached his own street he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself that he could see his way out", "by the time he reached his own street he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself that he could see his way out", "by the time he reached his own street he was keenly alive to the difficulties of this situation and wished over and over that some solution would offer itself that he could see his way out", "by the time we reached his own street he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself that he could see his way out", "by the time he reached this own street he was keenly alive to the difficulties of his situation and wished over and over that some solution would offer itself that he could see his way out", ""], ["2277-149896-0033", "then he rang the bell no answer", "then he rang the bell no answer", "then he rang the bell no answer i", "than he rang the bell no answer", "then he rang the bell no answer to", "then he rang the bell no answer and"], ["2277-149896-0034", "he rang again this time harder still no answer", "he rang again this time harder still no answer", "he rang again this time harder still no answer i", "he rang again this time harder still no answer and", "he rang again this time harder still no answer a", "he ring again this time harder still no answer"], ["2277-149897-0000", "when hurstwood got back to his office again he was in a greater quandary than ever", "when hurstwood got back to his office again he was in a greater quandary than ever", "when hurstwood got back to his office again he was in a greater quadrary than ever", "when hurstwood went back to his office again he was in a greater quandary than ever", "when hurstwood got back to his office again he was in a greater quadrille than ever", "when hurstwood got back to his office again he was in a greater quandary than ever and"], ["2277-149897-0001", "he could hardly realise how it had all come about", "he could hardly realize how it had all come about", "he could hardly realise how it had all come about", "he could hardly realize how it has all come about", "he could hardly realize how it had all come about and", "he could hardly realize how it had all done about"], ["2277-149897-0002", "no letter had come no word of any kind and yet here it was late in the evening and she had agreed to meet him that morning", "no letter had come no word of any kind and yet here it was late in the evening and she had agreed to meet him that morning", "no letter had come no word of any kind and yet here it was late in the evening as she had agreed to meet him that morning", "no letter had come no word of any kind and yet here it was late in the evening and she had agreed to meet him that morning and", "no letter had come no word of any kind and yet here it was late in the afternoon and she had agreed to meet him that morning", "no letter had come no word of any kind and yet here it was late in the evening and she had agreed to meet him that morning i"], ["2277-149897-0003", "he saw that in the excitement of recent events he had not formulated a plan upon that score", "he saw that in the excitement of recent events he had not formulated a plan upon that score", "he saw that in the excitement of recent events he had not formulated a plan upon the score", "he saw that in the excitement of recent events he had not formulated a plan upon that score and", "he saw that in the excitement of recent events he had not formulated a plan upon that score a", "he saw that in the excitement of recent events he had not demonstrated a plan upon that score"], ["2277-149897-0004", "he was getting some vague comfort out of a good cigar but it was no panacea for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no panacea for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no panatia for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no panatism for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no panegy for the ill which affected him", "he was getting some vague comfort out of a good cigar but it was no pannity for the ill which affected him"], ["2277-149897-0005", "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up", "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up", "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up and", "it was with great opposition after two or three hours of the most urgent mental acknowledgement and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up", "it was with great opposition after two or three hours of the most urgent personal affirmation and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up", "it was with great opposition after two or three hours of the most urgent mental affirmation and denial that at last he got an envelope placed in it the requested amount and slowly sealed it up a"], ["2277-149897-0006", "then he called harry the boy of all work around the place", "then he called harry the boy of all work around the place", "then he called harry the boy of all work around the place and", "then called harry the boy of all work around the place", "then he called harry the boy of all work around the place i", "then he called harry the boy of all work round the place"], ["2277-149897-0007", "you take this to this address he said handing him the envelope and give it to missus hurstwood yes sir said the boy", "you take this to this address he said handing him the envelope and give it to missus hurstwood yes sir said the boy", "you take this to this address he said handing him the envelope and give it to missus hurstwood yes sir said the boy i", "you take this to this address he said handing him the envelope and give it to missus hurstwood yes sir said the boy oh", "you take this to this address he said handing him the envelope and give it to missus hurstwood yes sir said the boy a", "you take this to this address he said handing him an envelope and give it to missus hurstwood yes sir said the boy"], ["2277-149897-0008", "any answer i guess not", "any answer i guess not", "any answer i guess not he", "any answer i guess not you", "any answer i guess not i", "any answer i guess not to"], ["2277-149897-0009", "the boy hastened away and the manager fell to his musings", "the boy hastened away and the manager fell to his musings", "the boy hastened away and the director fell to his musings", "the boy hastened away and the major fell to his musings", "the boy hurried away and the manager fell to his musings", "the boy hastily away and the manager fell to his musings"], ["2277-149897-0010", "he was beaten for to night and he might just as well make the best of it", "he was beaten for to night and he might just as well make the best of it", "he was beaten for to night and we might just as well make the best of it", "he was beaten for to night and i might just as well make the best of it", "he was beaten for tonight and he might just as well make the best of it", "he was beaten for to night and the might just as well make the best of it"], ["2277-149897-0011", "she would take the envelope and know that she had triumphed", "she would take the envelope and know that she had triumphed", "she would take the envelope and know that she had triumphed and", "she would take the envelope and know that she had triumphed eight", "she would take a envelope and know that she had triumphed", "she would take the envelope and know that she had triumphed i"], ["2277-149897-0012", "if he only had that letter back he wouldn't send it", "if he only had that letter back he wouldn't send it", "if he only had that letter back he wouldn't send it if", "if he only had that letter back he wouldn't send itif", "if he only had that letter back he wouldn't send it", "if he only held that letter back he wouldn't send it"], ["2277-149897-0013", "for relief he arose and joined in conversation with a few friends who were drinking", "for relief he arose and joined in the conversation with a few friends who were drinking", "for relief he arose and joined in the conversation with the few friends who were drinking", "for relief he arose and rejoined in the conversation with a few friends who were drinking", "for relief he arose and joined in the conversation with a few friends who were drinking and", "for relief he arose and joined in the conversation with a few friends who were drinking for"], ["2277-149897-0014", "all the time his thoughts would run out to his home and see the scene being therein enacted", "all the time his thoughts would run out to his home and see the scene being therein enacted", "all the time his thoughts would run out to his house and see the scene being therein enacted", "all this time his thoughts would run out to his home and see the scene being therein enacted", "all the time his thought would run out to his home and see the scene being therein enacted", "all the time his thoughts would run out to his home and see this scene being therein enacted"]]}
wandb/run-20220828_085247-2hx8pk65/files/output.log CHANGED
@@ -33613,5 +33613,10358 @@ To disable this warning, you can either:
33613
  - Avoid using `tokenizers` before the fork if possible
33614
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
33615
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33616
  To disable this warning, you can either:
33617
  - Avoid using `tokenizers` before the fork if possible
 
33613
  - Avoid using `tokenizers` before the fork if possible
33614
  - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
33615
  huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
33616
+ To disable this warning, you can either:
33617
+ - Avoid using `tokenizers` before the fork if possible
33618
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
33619
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
33620
+ To disable this warning, you can either:
33621
+ - Avoid using `tokenizers` before the fork if possible
33622
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
33623
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
33624
+ To disable this warning, you can either:
33625
+ - Avoid using `tokenizers` before the fork if possible
33626
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
33627
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
33628
+ To disable this warning, you can either:
33629
+ - Avoid using `tokenizers` before the fork if possible
33630
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
33631
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
33632
+ To disable this warning, you can either:
33633
+ - Avoid using `tokenizers` before the fork if possible
33634
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
33635
+ Training...: 83% 3642/4393 [4:55:39<32:14:49, 154.58s/it]
33636
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
33637
+ To disable this warning, you can either:
33638
+ - Avoid using `tokenizers` before the fork if possible
33639
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
33640
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
33641
+ To disable this warning, you can either:
33642
+ - Avoid using `tokenizers` before the fork if possible
33643
+ return jax.tree_map(/4393 [4:55:39<32:14:49, 154.58s/it]
33644
+
33645
+
33646
+
33647
+
33648
+
33649
+
33650
+
33651
+
33652
+
33653
+
33654
+
33655
+
33656
+
33657
+
33658
+
33659
+
33660
+
33661
+
33662
+
33663
+
33664
+
33665
+
33666
+ return jax.tree_map(lambda x: x[0], tree)7, 5.87s/it]
33667
+ run_flax_speech_recognition_seq2seq.py:336: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
33668
+ return jax.tree_map(lambda x: x.astype(jnp.float32) if x.dtype == jnp.bfloat16 else x, t)
33669
+ Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 50% 6/12 [40:00:43<35:02:00, 21020.10s/it]
33670
+
33671
+
33672
+
33673
+
33674
+
33675
+
33676
+
33677
+
33678
+
33679
+
33680
+
33681
+
33682
+
33683
+
33684
+
33685
+
33686
+
33687
+
33688
+
33689
+
33690
+
33691
+
33692
+
33693
+
33694
+
33695
+
33696
+
33697
+
33698
+
33699
+
33700
+
33701
+
33702
+
33703
+
33704
+
33705
+
33706
+
33707
+
33708
+
33709
+
33710
+
33711
+
33712
+
33713
+
33714
+
33715
+
33716
+
33717
+
33718
+
33719
+
33720
+
33721
+
33722
+
33723
+
33724
+
33725
+
33726
+
33727
+
33728
+
33729
+
33730
+
33731
+
33732
+
33733
+
33734
+
33735
+
33736
+
33737
+
33738
+
33739
+
33740
+
33741
+
33742
+
33743
+
33744
+
33745
+
33746
+
33747
+
33748
+
33749
+
33750
+
33751
+
33752
+
33753
+
33754
+
33755
+
33756
+
33757
+
33758
+
33759
+
33760
+
33761
+
33762
+
33763
+
33764
+
33765
+
33766
+
33767
+
33768
+
33769
+
33770
+
33771
+
33772
+
33773
+
33774
+
33775
+
33776
+
33777
+
33778
+
33779
+
33780
+
33781
+
33782
+
33783
+
33784
+
33785
+
33786
+
33787
+
33788
+
33789
+
33790
+
33791
+
33792
+
33793
+
33794
+
33795
+
33796
+
33797
+
33798
+
33799
+
33800
+
33801
+
33802
+
33803
+
33804
+
33805
+
33806
+
33807
+
33808
+
33809
+
33810
+
33811
+
33812
+
33813
+
33814
+
33815
+
33816
+
33817
+
33818
+
33819
+
33820
+
33821
+
33822
+
33823
+
33824
+
33825
+
33826
+
33827
+
33828
+
33829
+
33830
+
33831
+
33832
+
33833
+
33834
+
33835
+
33836
+
33837
+
33838
+
33839
+
33840
+
33841
+
33842
+
33843
+
33844
+
33845
+
33846
+
33847
+
33848
+
33849
+
33850
+
33851
+
33852
+
33853
+
33854
+
33855
+
33856
+
33857
+
33858
+
33859
+
33860
+
33861
+
33862
+
33863
+
33864
+
33865
+
33866
+
33867
+
33868
+
33869
+
33870
+
33871
+
33872
+
33873
+
33874
+
33875
+
33876
+
33877
+
33878
+
33879
+
33880
+
33881
+
33882
+
33883
+
33884
+
33885
+
33886
+
33887
+
33888
+
33889
+
33890
+
33891
+
33892
+
33893
+
33894
+
33895
+
33896
+
33897
+
33898
+
33899
+
33900
+
33901
+
33902
+
33903
+
33904
+
33905
+
33906
+
33907
+
33908
+
33909
+
33910
+
33911
+
33912
+
33913
+
33914
+
33915
+
33916
+
33917
+
33918
+
33919
+
33920
+
33921
+
33922
+
33923
+
33924
+
33925
+
33926
+
33927
+
33928
+
33929
+
33930
+
33931
+
33932
+
33933
+
33934
+
33935
+
33936
+
33937
+
33938
+
33939
+
33940
+
33941
+
33942
+
33943
+
33944
+
33945
+
33946
+
33947
+
33948
+
33949
+
33950
+
33951
+
33952
+
33953
+
33954
+
33955
+
33956
+
33957
+
33958
+
33959
+
33960
+
33961
+
33962
+
33963
+
33964
+
33965
+
33966
+
33967
+
33968
+
33969
+
33970
+
33971
+
33972
+
33973
+
33974
+
33975
+
33976
+
33977
+
33978
+
33979
+
33980
+
33981
+
33982
+
33983
+
33984
+
33985
+
33986
+
33987
+
33988
+
33989
+
33990
+
33991
+
33992
+
33993
+
33994
+
33995
+
33996
+
33997
+
33998
+
33999
+
34000
+
34001
+
34002
+
34003
+
34004
+
34005
+
34006
+
34007
+
34008
+
34009
+
34010
+
34011
+
34012
+
34013
+
34014
+
34015
+
34016
+
34017
+
34018
+
34019
+
34020
+
34021
+
34022
+
34023
+
34024
+
34025
+
34026
+
34027
+
34028
+
34029
+
34030
+
34031
+
34032
+
34033
+
34034
+
34035
+
34036
+
34037
+
34038
+
34039
+
34040
+
34041
+
34042
+
34043
+
34044
+
34045
+
34046
+
34047
+
34048
+
34049
+
34050
+
34051
+
34052
+
34053
+
34054
+
34055
+
34056
+
34057
+
34058
+
34059
+
34060
+
34061
+
34062
+
34063
+
34064
+
34065
+
34066
+
34067
+
34068
+
34069
+
34070
+
34071
+
34072
+
34073
+
34074
+
34075
+
34076
+
34077
+
34078
+
34079
+
34080
+
34081
+
34082
+
34083
+
34084
+
34085
+
34086
+
34087
+
34088
+
34089
+
34090
+
34091
+
34092
+
34093
+
34094
+
34095
+
34096
+
34097
+
34098
+
34099
+
34100
+
34101
+
34102
+
34103
+
34104
+
34105
+
34106
+
34107
+
34108
+
34109
+
34110
+
34111
+
34112
+
34113
+
34114
+
34115
+
34116
+
34117
+
34118
+
34119
+
34120
+
34121
+
34122
+
34123
+
34124
+
34125
+
34126
+
34127
+
34128
+
34129
+
34130
+
34131
+
34132
+
34133
+
34134
+
34135
+
34136
+
34137
+
34138
+
34139
+
34140
+
34141
+
34142
+
34143
+
34144
+
34145
+
34146
+
34147
+
34148
+
34149
+
34150
+
34151
+
34152
+
34153
+
34154
+
34155
+
34156
+
34157
+
34158
+
34159
+
34160
+
34161
+
34162
+
34163
+
34164
+
34165
+
34166
+
34167
+
34168
+
34169
+
34170
+
34171
+
34172
+
34173
+
34174
+
34175
+
34176
+
34177
+
34178
+
34179
+
34180
+
34181
+
34182
+
34183
+
34184
+
34185
+
34186
+
34187
+
34188
+
34189
+
34190
+
34191
+
34192
+
34193
+
34194
+
34195
+
34196
+
34197
+
34198
+
34199
+
34200
+
34201
+
34202
+
34203
+
34204
+
34205
+
34206
+
34207
+
34208
+
34209
+
34210
+
34211
+
34212
+
34213
+
34214
+
34215
+
34216
+
34217
+
34218
+
34219
+
34220
+
34221
+
34222
+
34223
+
34224
+
34225
+
34226
+
34227
+
34228
+
34229
+
34230
+
34231
+
34232
+
34233
+
34234
+
34235
+
34236
+
34237
+
34238
+
34239
+
34240
+
34241
+
34242
+
34243
+
34244
+
34245
+
34246
+
34247
+
34248
+
34249
+
34250
+
34251
+
34252
+
34253
+
34254
+
34255
+
34256
+
34257
+
34258
+
34259
+
34260
+
34261
+
34262
+
34263
+
34264
+
34265
+
34266
+
34267
+
34268
+
34269
+
34270
+
34271
+
34272
+
34273
+
34274
+
34275
+
34276
+
34277
+
34278
+
34279
+
34280
+
34281
+
34282
+
34283
+
34284
+
34285
+
34286
+
34287
+
34288
+
34289
+
34290
+
34291
+
34292
+
34293
+
34294
+
34295
+
34296
+
34297
+
34298
+
34299
+
34300
+
34301
+
34302
+
34303
+
34304
+
34305
+
34306
+
34307
+
34308
+
34309
+
34310
+
34311
+
34312
+
34313
+
34314
+
34315
+
34316
+
34317
+
34318
+
34319
+
34320
+
34321
+
34322
+
34323
+
34324
+
34325
+
34326
+
34327
+
34328
+
34329
+
34330
+
34331
+
34332
+
34333
+
34334
+
34335
+
34336
+
34337
+
34338
+
34339
+
34340
+
34341
+
34342
+
34343
+
34344
+
34345
+
34346
+
34347
+
34348
+
34349
+
34350
+
34351
+
34352
+
34353
+
34354
+
34355
+
34356
+
34357
+
34358
+
34359
+
34360
+
34361
+
34362
+
34363
+
34364
+
34365
+
34366
+
34367
+
34368
+
34369
+
34370
+
34371
+
34372
+
34373
+
34374
+
34375
+
34376
+
34377
+
34378
+
34379
+
34380
+ Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 58% 7/12 [40:58:25<29:19:56, 21119.31s/it]
34381
+ Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |)
34382
+ Step... (30025 | Loss: 0.01562521792948246, Learning Rate: 4.0355556848226115e-05, Gradient Norm: 0.265262246131897)
34383
+ Step... (30050 | Loss: 0.016519010066986084, Learning Rate: 4.0305047150468454e-05, Gradient Norm: 0.39625251293182373)
34384
+ Step... (30075 | Loss: 0.016090065240859985, Learning Rate: 4.0254544728668407e-05, Gradient Norm: 0.24512924253940582)
34385
+ Step... (30100 | Loss: 0.020717907696962357, Learning Rate: 4.020404230686836e-05, Gradient Norm: 0.36649051308631897)
34386
+ Step... (30125 | Loss: 0.023311305791139603, Learning Rate: 4.01535326091107e-05, Gradient Norm: 0.48252689838409424)
34387
+ Step... (30150 | Loss: 0.020284006372094154, Learning Rate: 4.010303018731065e-05, Gradient Norm: 0.4051229953765869)
34388
+ Step... (30175 | Loss: 0.03670956566929817, Learning Rate: 4.0052527765510604e-05, Gradient Norm: 0.3055880665779114)
34389
+ Step... (30200 | Loss: 0.016665274277329445, Learning Rate: 4.000201806775294e-05, Gradient Norm: 0.3359684348106384)
34390
+ Step... (30225 | Loss: 0.021576257422566414, Learning Rate: 3.9951515645952895e-05, Gradient Norm: 0.2677457928657532)
34391
+ Step... (30250 | Loss: 0.018543612211942673, Learning Rate: 3.990100958617404e-05, Gradient Norm: 0.3655332922935486)
34392
+ Step... (30275 | Loss: 0.02315855212509632, Learning Rate: 3.985050352639519e-05, Gradient Norm: 0.2440711408853531)
34393
+ Step... (30300 | Loss: 0.012810557149350643, Learning Rate: 3.979999746661633e-05, Gradient Norm: 0.29266688227653503)
34394
+ Step... (30325 | Loss: 0.016653001308441162, Learning Rate: 3.9749495044816285e-05, Gradient Norm: 0.22393402457237244)
34395
+ Step... (30350 | Loss: 0.01203943882137537, Learning Rate: 3.969898898503743e-05, Gradient Norm: 0.2736056447029114)
34396
+ Step... (30375 | Loss: 0.018077773973345757, Learning Rate: 3.964848292525858e-05, Gradient Norm: 0.2008410394191742)
34397
+ Step... (30400 | Loss: 0.03966773301362991, Learning Rate: 3.959798050345853e-05, Gradient Norm: 0.3925747871398926)
34398
+ Step... (30425 | Loss: 0.031882643699645996, Learning Rate: 3.954747080570087e-05, Gradient Norm: 0.39469194412231445)
34399
+ Step... (30450 | Loss: 0.014567309990525246, Learning Rate: 3.949696838390082e-05, Gradient Norm: 0.2972516715526581)
34400
+ Step... (30475 | Loss: 0.026796311140060425, Learning Rate: 3.9446465962100774e-05, Gradient Norm: 0.3355867564678192)
34401
+ Step... (30500 | Loss: 0.0361606702208519, Learning Rate: 3.939595626434311e-05, Gradient Norm: 0.407830148935318)
34402
+ Step... (30525 | Loss: 0.03200415521860123, Learning Rate: 3.9345453842543066e-05, Gradient Norm: 0.5616704821586609)
34403
+ Step... (30550 | Loss: 0.014776033349335194, Learning Rate: 3.929495142074302e-05, Gradient Norm: 0.3266984522342682)
34404
+ Step... (30575 | Loss: 0.012889200821518898, Learning Rate: 3.924444172298536e-05, Gradient Norm: 0.1923571079969406)
34405
+ Step... (30600 | Loss: 0.013984130695462227, Learning Rate: 3.919393930118531e-05, Gradient Norm: 0.3364371657371521)
34406
+ Step... (30625 | Loss: 0.029455211013555527, Learning Rate: 3.914343687938526e-05, Gradient Norm: 0.34443628787994385)
34407
+ Step... (30650 | Loss: 0.03519538417458534, Learning Rate: 3.90929271816276e-05, Gradient Norm: 0.5291494131088257)
34408
+ Step... (30675 | Loss: 0.02681455761194229, Learning Rate: 3.9042424759827554e-05, Gradient Norm: 0.30602386593818665)
34409
+ Step... (30700 | Loss: 0.02662944421172142, Learning Rate: 3.899192233802751e-05, Gradient Norm: 0.36101633310317993)
34410
+ Step... (30725 | Loss: 0.01868010126054287, Learning Rate: 3.8941412640269846e-05, Gradient Norm: 0.336725115776062)
34411
+ Step... (30750 | Loss: 0.05492250248789787, Learning Rate: 3.88909102184698e-05, Gradient Norm: 0.7654430270195007)
34412
+
34413
+
34414
+
34415
+
34416
+
34417
+
34418
+
34419
+
34420
+
34421
+
34422
+
34423
+
34424
+
34425
+
34426
+
34427
+
34428
+
34429
+
34430
+
34431
+
34432
+
34433
+
34434
+
34435
+
34436
+
34437
+
34438
+
34439
+
34440
+
34441
+
34442
+
34443
+
34444
+
34445
+
34446
+
34447
+
34448
+
34449
+
34450
+
34451
+
34452
+
34453
+
34454
+
34455
+
34456
+
34457
+
34458
+
34459
+
34460
+
34461
+
34462
+
34463
+
34464
+
34465
+
34466
+
34467
+
34468
+
34469
+
34470
+
34471
+
34472
+
34473
+
34474
+
34475
+
34476
+
34477
+
34478
+
34479
+
34480
+
34481
+
34482
+
34483
+
34484
+
34485
+
34486
+
34487
+
34488
+
34489
+
34490
+
34491
+
34492
+
34493
+
34494
+
34495
+
34496
+
34497
+
34498
+
34499
+
34500
+
34501
+
34502
+
34503
+
34504
+
34505
+
34506
+
34507
+
34508
+
34509
+
34510
+
34511
+
34512
+
34513
+
34514
+
34515
+
34516
+
34517
+
34518
+
34519
+
34520
+
34521
+
34522
+
34523
+
34524
+
34525
+
34526
+
34527
+
34528
+
34529
+
34530
+
34531
+
34532
+
34533
+
34534
+
34535
+
34536
+
34537
+
34538
+
34539
+
34540
+
34541
+
34542
+
34543
+
34544
+
34545
+
34546
+
34547
+
34548
+
34549
+
34550
+
34551
+
34552
+
34553
+
34554
+
34555
+
34556
+
34557
+
34558
+
34559
+
34560
+
34561
+
34562
+
34563
+
34564
+
34565
+
34566
+
34567
+
34568
+
34569
+
34570
+
34571
+
34572
+
34573
+
34574
+
34575
+
34576
+
34577
+
34578
+
34579
+
34580
+
34581
+
34582
+
34583
+
34584
+
34585
+
34586
+
34587
+
34588
+
34589
+
34590
+
34591
+
34592
+
34593
+
34594
+
34595
+
34596
+
34597
+
34598
+
34599
+
34600
+
34601
+
34602
+
34603
+
34604
+
34605
+
34606
+
34607
+
34608
+
34609
+
34610
+
34611
+
34612
+
34613
+
34614
+
34615
+
34616
+
34617
+
34618
+
34619
+
34620
+
34621
+
34622
+
34623
+
34624
+
34625
+
34626
+
34627
+
34628
+
34629
+
34630
+
34631
+
34632
+
34633
+
34634
+
34635
+
34636
+
34637
+
34638
+
34639
+
34640
+
34641
+
34642
+
34643
+
34644
+
34645
+
34646
+
34647
+
34648
+
34649
+
34650
+
34651
+
34652
+
34653
+
34654
+
34655
+
34656
+
34657
+
34658
+
34659
+
34660
+
34661
+
34662
+
34663
+
34664
+
34665
+
34666
+
34667
+
34668
+
34669
+
34670
+
34671
+
34672
+
34673
+
34674
+
34675
+
34676
+
34677
+
34678
+
34679
+
34680
+
34681
+
34682
+
34683
+
34684
+
34685
+
34686
+
34687
+
34688
+
34689
+
34690
+
34691
+
34692
+
34693
+
34694
+
34695
+
34696
+
34697
+
34698
+
34699
+
34700
+
34701
+
34702
+
34703
+
34704
+
34705
+
34706
+
34707
+
34708
+
34709
+
34710
+
34711
+
34712
+
34713
+
34714
+
34715
+
34716
+
34717
+
34718
+
34719
+
34720
+
34721
+
34722
+
34723
+
34724
+
34725
+
34726
+
34727
+
34728
+
34729
+
34730
+
34731
+
34732
+
34733
+
34734
+
34735
+
34736
+
34737
+
34738
+
34739
+
34740
+
34741
+
34742
+
34743
+
34744
+
34745
+
34746
+
34747
+
34748
+
34749
+
34750
+
34751
+
34752
+
34753
+
34754
+
34755
+
34756
+
34757
+
34758
+
34759
+
34760
+
34761
+
34762
+
34763
+
34764
+
34765
+
34766
+
34767
+
34768
+
34769
+
34770
+
34771
+
34772
+
34773
+
34774
+
34775
+
34776
+
34777
+
34778
+
34779
+
34780
+
34781
+
34782
+
34783
+
34784
+
34785
+
34786
+
34787
+
34788
+
34789
+
34790
+
34791
+
34792
+
34793
+
34794
+
34795
+
34796
+
34797
+
34798
+
34799
+
34800
+
34801
+
34802
+
34803
+
34804
+
34805
+
34806
+
34807
+
34808
+
34809
+
34810
+
34811
+
34812
+
34813
+
34814
+
34815
+
34816
+
34817
+
34818
+
34819
+
34820
+
34821
+
34822
+
34823
+
34824
+
34825
+
34826
+
34827
+
34828
+
34829
+
34830
+
34831
+
34832
+
34833
+
34834
+
34835
+
34836
+
34837
+
34838
+
34839
+
34840
+
34841
+
34842
+
34843
+
34844
+
34845
+
34846
+
34847
+
34848
+
34849
+
34850
+
34851
+
34852
+
34853
+
34854
+
34855
+
34856
+
34857
+
34858
+
34859
+
34860
+
34861
+
34862
+
34863
+
34864
+
34865
+
34866
+
34867
+
34868
+
34869
+
34870
+
34871
+
34872
+
34873
+
34874
+
34875
+
34876
+
34877
+
34878
+
34879
+
34880
+
34881
+
34882
+
34883
+
34884
+
34885
+
34886
+
34887
+
34888
+
34889
+
34890
+
34891
+
34892
+
34893
+
34894
+
34895
+
34896
+
34897
+
34898
+
34899
+
34900
+
34901
+
34902
+
34903
+
34904
+
34905
+
34906
+
34907
+
34908
+
34909
+
34910
+
34911
+
34912
+
34913
+
34914
+
34915
+
34916
+
34917
+
34918
+
34919
+
34920
+
34921
+
34922
+
34923
+
34924
+
34925
+
34926
+
34927
+
34928
+
34929
+
34930
+
34931
+
34932
+
34933
+
34934
+
34935
+
34936
+
34937
+
34938
+
34939
+
34940
+
34941
+
34942
+
34943
+
34944
+
34945
+
34946
+
34947
+
34948
+
34949
+
34950
+
34951
+
34952
+
34953
+
34954
+
34955
+
34956
+
34957
+
34958
+
34959
+
34960
+
34961
+
34962
+
34963
+
34964
+
34965
+
34966
+
34967
+
34968
+
34969
+
34970
+
34971
+
34972
+
34973
+
34974
+
34975
+
34976
+
34977
+
34978
+
34979
+
34980
+
34981
+
34982
+
34983
+
34984
+
34985
+
34986
+
34987
+
34988
+
34989
+
34990
+
34991
+
34992
+
34993
+
34994
+
34995
+
34996
+
34997
+
34998
+
34999
+
35000
+
35001
+
35002
+
35003
+
35004
+
35005
+
35006
+
35007
+
35008
+
35009
+
35010
+
35011
+
35012
+
35013
+
35014
+
35015
+
35016
+
35017
+
35018
+
35019
+
35020
+
35021
+
35022
+
35023
+
35024
+
35025
+
35026
+
35027
+
35028
+
35029
+
35030
+
35031
+
35032
+
35033
+
35034
+
35035
+
35036
+
35037
+
35038
+
35039
+
35040
+
35041
+
35042
+
35043
+
35044
+
35045
+
35046
+
35047
+
35048
+
35049
+
35050
+
35051
+
35052
+
35053
+
35054
+
35055
+
35056
+
35057
+
35058
+
35059
+
35060
+
35061
+
35062
+
35063
+
35064
+
35065
+
35066
+
35067
+
35068
+
35069
+
35070
+
35071
+
35072
+
35073
+
35074
+
35075
+
35076
+
35077
+
35078
+
35079
+
35080
+
35081
+
35082
+
35083
+
35084
+
35085
+
35086
+
35087
+
35088
+
35089
+
35090
+
35091
+
35092
+
35093
+
35094
+
35095
+
35096
+
35097
+
35098
+
35099
+
35100
+
35101
+
35102
+
35103
+
35104
+
35105
+
35106
+
35107
+
35108
+
35109
+
35110
+
35111
+
35112
+
35113
+
35114
+
35115
+
35116
+
35117
+
35118
+
35119
+
35120
+
35121
+
35122
+
35123
+
35124
+
35125
+
35126
+
35127
+
35128
+
35129
+
35130
+
35131
+
35132
+
35133
+
35134
+
35135
+
35136
+
35137
+
35138
+
35139
+
35140
+
35141
+
35142
+
35143
+
35144
+
35145
+
35146
+
35147
+
35148
+
35149
+
35150
+
35151
+
35152
+
35153
+
35154
+
35155
+
35156
+
35157
+
35158
+
35159
+
35160
+
35161
+
35162
+
35163
+
35164
+
35165
+
35166
+
35167
+
35168
+
35169
+
35170
+
35171
+
35172
+
35173
+
35174
+
35175
+
35176
+
35177
+
35178
+
35179
+
35180
+
35181
+
35182
+
35183
+
35184
+
35185
+
35186
+
35187
+
35188
+
35189
+
35190
+
35191
+
35192
+
35193
+
35194
+
35195
+
35196
+
35197
+
35198
+
35199
+
35200
+
35201
+
35202
+
35203
+
35204
+
35205
+
35206
+
35207
+
35208
+
35209
+
35210
+
35211
+
35212
+
35213
+
35214
+
35215
+
35216
+
35217
+
35218
+
35219
+
35220
+
35221
+
35222
+
35223
+
35224
+
35225
+
35226
+
35227
+
35228
+
35229
+
35230
+
35231
+
35232
+
35233
+
35234
+
35235
+
35236
+
35237
+
35238
+
35239
+
35240
+
35241
+
35242
+
35243
+
35244
+
35245
+
35246
+
35247
+
35248
+
35249
+
35250
+
35251
+
35252
+
35253
+
35254
+
35255
+
35256
+
35257
+
35258
+
35259
+
35260
+
35261
+
35262
+
35263
+
35264
+
35265
+
35266
+
35267
+
35268
+
35269
+
35270
+
35271
+
35272
+
35273
+
35274
+
35275
+
35276
+
35277
+
35278
+
35279
+
35280
+
35281
+
35282
+
35283
+
35284
+
35285
+
35286
+
35287
+
35288
+
35289
+
35290
+
35291
+
35292
+
35293
+
35294
+
35295
+
35296
+
35297
+
35298
+
35299
+
35300
+
35301
+
35302
+
35303
+
35304
+
35305
+
35306
+
35307
+
35308
+
35309
+
35310
+
35311
+
35312
+
35313
+
35314
+
35315
+
35316
+
35317
+
35318
+
35319
+
35320
+
35321
+
35322
+
35323
+
35324
+
35325
+
35326
+
35327
+
35328
+
35329
+
35330
+
35331
+
35332
+
35333
+
35334
+
35335
+
35336
+
35337
+
35338
+
35339
+
35340
+
35341
+
35342
+
35343
+
35344
+
35345
+
35346
+
35347
+
35348
+
35349
+
35350
+
35351
+
35352
+
35353
+
35354
+
35355
+
35356
+
35357
+
35358
+
35359
+
35360
+
35361
+
35362
+
35363
+
35364
+
35365
+
35366
+
35367
+
35368
+
35369
+
35370
+
35371
+
35372
+
35373
+
35374
+
35375
+
35376
+
35377
+
35378
+
35379
+
35380
+
35381
+
35382
+
35383
+
35384
+
35385
+
35386
+
35387
+
35388
+
35389
+
35390
+
35391
+
35392
+
35393
+
35394
+
35395
+
35396
+
35397
+
35398
+
35399
+
35400
+
35401
+
35402
+
35403
+
35404
+
35405
+
35406
+
35407
+
35408
+
35409
+
35410
+
35411
+
35412
+
35413
+
35414
+
35415
+
35416
+
35417
+
35418
+
35419
+
35420
+
35421
+
35422
+
35423
+
35424
+
35425
+
35426
+
35427
+
35428
+
35429
+
35430
+
35431
+
35432
+
35433
+
35434
+
35435
+
35436
+
35437
+
35438
+
35439
+
35440
+
35441
+
35442
+
35443
+
35444
+
35445
+
35446
+
35447
+
35448
+
35449
+
35450
+
35451
+
35452
+
35453
+
35454
+
35455
+
35456
+
35457
+
35458
+
35459
+
35460
+
35461
+
35462
+
35463
+
35464
+
35465
+
35466
+
35467
+
35468
+
35469
+
35470
+
35471
+
35472
+
35473
+
35474
+
35475
+
35476
+
35477
+
35478
+
35479
+
35480
+
35481
+
35482
+
35483
+
35484
+
35485
+
35486
+
35487
+
35488
+
35489
+
35490
+
35491
+
35492
+
35493
+
35494
+
35495
+
35496
+
35497
+
35498
+
35499
+
35500
+
35501
+
35502
+
35503
+
35504
+
35505
+
35506
+
35507
+
35508
+
35509
+
35510
+
35511
+
35512
+
35513
+
35514
+
35515
+
35516
+
35517
+
35518
+
35519
+
35520
+
35521
+
35522
+
35523
+
35524
+
35525
+
35526
+
35527
+
35528
+
35529
+
35530
+
35531
+
35532
+
35533
+
35534
+
35535
+
35536
+
35537
+
35538
+
35539
+
35540
+
35541
+
35542
+
35543
+
35544
+
35545
+
35546
+
35547
+
35548
+
35549
+
35550
+
35551
+
35552
+
35553
+
35554
+
35555
+
35556
+
35557
+
35558
+
35559
+
35560
+
35561
+
35562
+
35563
+
35564
+
35565
+
35566
+
35567
+
35568
+
35569
+
35570
+
35571
+
35572
+
35573
+
35574
+
35575
+
35576
+
35577
+
35578
+
35579
+
35580
+
35581
+
35582
+
35583
+
35584
+
35585
+
35586
+
35587
+
35588
+
35589
+
35590
+
35591
+
35592
+
35593
+
35594
+
35595
+
35596
+
35597
+
35598
+
35599
+
35600
+
35601
+
35602
+
35603
+
35604
+
35605
+
35606
+
35607
+
35608
+
35609
+
35610
+
35611
+
35612
+
35613
+
35614
+
35615
+
35616
+
35617
+
35618
+
35619
+
35620
+
35621
+
35622
+
35623
+
35624
+
35625
+
35626
+
35627
+
35628
+
35629
+
35630
+
35631
+
35632
+
35633
+
35634
+
35635
+
35636
+
35637
+
35638
+
35639
+
35640
+
35641
+
35642
+
35643
+
35644
+
35645
+
35646
+
35647
+
35648
+
35649
+
35650
+
35651
+
35652
+
35653
+
35654
+
35655
+
35656
+
35657
+
35658
+
35659
+
35660
+
35661
+
35662
+
35663
+
35664
+
35665
+
35666
+
35667
+
35668
+
35669
+
35670
+
35671
+
35672
+
35673
+
35674
+
35675
+
35676
+
35677
+
35678
+
35679
+
35680
+
35681
+
35682
+
35683
+
35684
+
35685
+
35686
+
35687
+
35688
+
35689
+
35690
+
35691
+
35692
+
35693
+
35694
+
35695
+
35696
+
35697
+
35698
+
35699
+
35700
+
35701
+
35702
+
35703
+
35704
+
35705
+
35706
+
35707
+
35708
+
35709
+
35710
+
35711
+
35712
+
35713
+
35714
+
35715
+
35716
+
35717
+
35718
+
35719
+
35720
+
35721
+
35722
+
35723
+
35724
+
35725
+
35726
+
35727
+
35728
+
35729
+
35730
+
35731
+
35732
+
35733
+
35734
+
35735
+
35736
+
35737
+
35738
+
35739
+
35740
+
35741
+
35742
+
35743
+
35744
+
35745
+
35746
+
35747
+
35748
+
35749
+
35750
+
35751
+
35752
+
35753
+
35754
+
35755
+
35756
+
35757
+
35758
+
35759
+
35760
+
35761
+
35762
+
35763
+
35764
+
35765
+
35766
+
35767
+
35768
+
35769
+
35770
+
35771
+
35772
+
35773
+
35774
+
35775
+
35776
+
35777
+
35778
+
35779
+
35780
+
35781
+
35782
+
35783
+
35784
+
35785
+
35786
+
35787
+
35788
+
35789
+
35790
+
35791
+
35792
+
35793
+
35794
+
35795
+
35796
+
35797
+
35798
+
35799
+
35800
+
35801
+
35802
+
35803
+
35804
+
35805
+
35806
+
35807
+
35808
+
35809
+
35810
+
35811
+
35812
+
35813
+
35814
+
35815
+
35816
+
35817
+
35818
+
35819
+
35820
+
35821
+
35822
+
35823
+
35824
+
35825
+
35826
+
35827
+
35828
+
35829
+
35830
+
35831
+
35832
+
35833
+
35834
+
35835
+
35836
+
35837
+
35838
+
35839
+
35840
+
35841
+
35842
+
35843
+
35844
+
35845
+
35846
+
35847
+
35848
+
35849
+
35850
+
35851
+
35852
+
35853
+
35854
+
35855
+
35856
+
35857
+
35858
+
35859
+
35860
+
35861
+
35862
+
35863
+
35864
+
35865
+
35866
+
35867
+
35868
+
35869
+
35870
+
35871
+
35872
+
35873
+
35874
+
35875
+
35876
+
35877
+
35878
+
35879
+
35880
+
35881
+
35882
+
35883
+
35884
+
35885
+
35886
+
35887
+
35888
+
35889
+
35890
+
35891
+
35892
+
35893
+
35894
+
35895
+
35896
+
35897
+
35898
+
35899
+
35900
+
35901
+
35902
+
35903
+
35904
+
35905
+
35906
+
35907
+
35908
+
35909
+
35910
+
35911
+
35912
+
35913
+
35914
+
35915
+
35916
+
35917
+
35918
+
35919
+
35920
+
35921
+
35922
+
35923
+
35924
+
35925
+
35926
+
35927
+
35928
+
35929
+
35930
+
35931
+
35932
+
35933
+
35934
+
35935
+
35936
+
35937
+
35938
+
35939
+
35940
+
35941
+
35942
+
35943
+
35944
+
35945
+
35946
+
35947
+
35948
+
35949
+
35950
+
35951
+
35952
+
35953
+
35954
+
35955
+
35956
+
35957
+
35958
+
35959
+
35960
+
35961
+
35962
+
35963
+
35964
+
35965
+
35966
+
35967
+
35968
+
35969
+
35970
+
35971
+
35972
+
35973
+
35974
+
35975
+
35976
+
35977
+
35978
+
35979
+
35980
+
35981
+
35982
+
35983
+
35984
+
35985
+
35986
+
35987
+
35988
+
35989
+
35990
+
35991
+
35992
+
35993
+
35994
+
35995
+
35996
+
35997
+
35998
+
35999
+
36000
+
36001
+
36002
+
36003
+
36004
+
36005
+
36006
+
36007
+
36008
+
36009
+
36010
+
36011
+
36012
+
36013
+
36014
+
36015
+
36016
+
36017
+
36018
+
36019
+
36020
+
36021
+
36022
+
36023
+
36024
+
36025
+
36026
+
36027
+
36028
+
36029
+
36030
+
36031
+
36032
+
36033
+
36034
+
36035
+
36036
+
36037
+
36038
+
36039
+
36040
+
36041
+
36042
+
36043
+
36044
+
36045
+
36046
+
36047
+
36048
+
36049
+
36050
+
36051
+
36052
+
36053
+
36054
+
36055
+
36056
+
36057
+
36058
+
36059
+
36060
+
36061
+
36062
+
36063
+
36064
+
36065
+
36066
+
36067
+
36068
+
36069
+
36070
+
36071
+
36072
+
36073
+
36074
+
36075
+
36076
+
36077
+
36078
+
36079
+
36080
+
36081
+
36082
+
36083
+
36084
+
36085
+
36086
+
36087
+
36088
+
36089
+
36090
+
36091
+
36092
+
36093
+
36094
+
36095
+
36096
+
36097
+
36098
+
36099
+
36100
+
36101
+
36102
+
36103
+
36104
+
36105
+
36106
+
36107
+
36108
+
36109
+
36110
+
36111
+
36112
+
36113
+
36114
+
36115
+
36116
+
36117
+
36118
+
36119
+
36120
+
36121
+
36122
+
36123
+
36124
+
36125
+
36126
+
36127
+ Training...: 40% 1748/4393 [2:19:42<1:36:57, 2.20s/it]
36128
+ Step... (30775 | Loss: 0.020074449479579926, Learning Rate: 3.884040779666975e-05, Gradient Norm: 0.35103708505630493)
36129
+ Step... (30800 | Loss: 0.009022093378007412, Learning Rate: 3.878989809891209e-05, Gradient Norm: 0.4046609401702881)
36130
+ Step... (30825 | Loss: 0.010481682606041431, Learning Rate: 3.873939567711204e-05, Gradient Norm: 0.39897775650024414)
36131
+ Step... (30850 | Loss: 0.0015329535817727447, Learning Rate: 3.868888961733319e-05, Gradient Norm: 0.08926476538181305)
36132
+ Step... (30875 | Loss: 0.018157506361603737, Learning Rate: 3.8638383557554334e-05, Gradient Norm: 0.33348050713539124)
36133
+ Step... (30900 | Loss: 0.0076045086607337, Learning Rate: 3.858787749777548e-05, Gradient Norm: 0.35109609365463257)
36134
+ Step... (30925 | Loss: 0.018489006906747818, Learning Rate: 3.8537371437996626e-05, Gradient Norm: 0.2586105465888977)
36135
+ Step... (30950 | Loss: 0.029866378754377365, Learning Rate: 3.848686537821777e-05, Gradient Norm: 0.9838259220123291)
36136
+ Step... (30975 | Loss: 0.025535719469189644, Learning Rate: 3.8436362956417724e-05, Gradient Norm: 0.26122814416885376)
36137
+ Step... (31000 | Loss: 0.010486635379493237, Learning Rate: 3.838585689663887e-05, Gradient Norm: 0.8068628311157227)
36138
+ Step... (31025 | Loss: 0.015530271455645561, Learning Rate: 3.8335350836860016e-05, Gradient Norm: 0.1934879869222641)
36139
+ Step... (31050 | Loss: 0.010574892163276672, Learning Rate: 3.828484841505997e-05, Gradient Norm: 0.5335045456886292)
36140
+ Step... (31075 | Loss: 0.02766241505742073, Learning Rate: 3.823433871730231e-05, Gradient Norm: 0.33427202701568604)
36141
+ Step... (31100 | Loss: 0.004194389563053846, Learning Rate: 3.818383629550226e-05, Gradient Norm: 0.21223211288452148)
36142
+ Step... (31125 | Loss: 0.012914509512484074, Learning Rate: 3.813333387370221e-05, Gradient Norm: 0.22572757303714752)
36143
+ Step... (31150 | Loss: 0.002646281151100993, Learning Rate: 3.808282417594455e-05, Gradient Norm: 0.20129801332950592)
36144
+ Step... (31175 | Loss: 0.018720488995313644, Learning Rate: 3.8032321754144505e-05, Gradient Norm: 0.33454686403274536)
36145
+ Step... (31200 | Loss: 0.0021898970007896423, Learning Rate: 3.798181933234446e-05, Gradient Norm: 0.2922018766403198)
36146
+ Step... (31225 | Loss: 0.018139390274882317, Learning Rate: 3.7931309634586796e-05, Gradient Norm: 0.284376323223114)
36147
+ Step... (31250 | Loss: 0.017836367711424828, Learning Rate: 3.788080721278675e-05, Gradient Norm: 0.7448089718818665)
36148
+ Step... (31275 | Loss: 0.009362353943288326, Learning Rate: 3.78303047909867e-05, Gradient Norm: 0.23159761726856232)
36149
+ Step... (31300 | Loss: 0.005309374537318945, Learning Rate: 3.777979509322904e-05, Gradient Norm: 0.3577989637851715)
36150
+ Step... (31325 | Loss: 0.023352516815066338, Learning Rate: 3.772929267142899e-05, Gradient Norm: 0.3282865583896637)
36151
+ Step... (31350 | Loss: 0.011359583586454391, Learning Rate: 3.7678790249628946e-05, Gradient Norm: 0.4796968102455139)
36152
+ Step... (31375 | Loss: 0.015349440276622772, Learning Rate: 3.7628280551871285e-05, Gradient Norm: 0.23296594619750977)
36153
+ Step... (31400 | Loss: 0.002398452255874872, Learning Rate: 3.757777813007124e-05, Gradient Norm: 0.17549781501293182)
36154
+ Step... (31425 | Loss: 0.01950138248503208, Learning Rate: 3.752727570827119e-05, Gradient Norm: 0.30373281240463257)
36155
+ Step... (31450 | Loss: 0.006116131786257029, Learning Rate: 3.747676601051353e-05, Gradient Norm: 0.35958558320999146)
36156
+ Step... (31475 | Loss: 0.022649751976132393, Learning Rate: 3.742626358871348e-05, Gradient Norm: 0.29181864857673645)
36157
+ Step... (31500 | Loss: 0.005045859608799219, Learning Rate: 3.737575752893463e-05, Gradient Norm: 0.2876945436000824)
36158
+ Step... (31525 | Loss: 0.011433024890720844, Learning Rate: 3.7325251469155774e-05, Gradient Norm: 0.2105947732925415)
36159
+ Step... (31550 | Loss: 0.017296401783823967, Learning Rate: 3.727474540937692e-05, Gradient Norm: 0.5196598172187805)
36160
+ Step... (31575 | Loss: 0.021925507113337517, Learning Rate: 3.722424298757687e-05, Gradient Norm: 0.3805218040943146)
36161
+ Step... (31600 | Loss: 0.00693158432841301, Learning Rate: 3.717373692779802e-05, Gradient Norm: 0.9232239127159119)
36162
+ Step... (31625 | Loss: 0.011155126616358757, Learning Rate: 3.7123230868019164e-05, Gradient Norm: 0.2306728959083557)
36163
+ Step... (31650 | Loss: 0.0032064933329820633, Learning Rate: 3.7072728446219116e-05, Gradient Norm: 0.18296311795711517)
36164
+ Step... (31675 | Loss: 0.015599323436617851, Learning Rate: 3.7022218748461455e-05, Gradient Norm: 0.2689945101737976)
36165
+ Step... (31700 | Loss: 0.003943402785807848, Learning Rate: 3.697171632666141e-05, Gradient Norm: 0.2862476110458374)
36166
+ Step... (31725 | Loss: 0.014135945588350296, Learning Rate: 3.692121390486136e-05, Gradient Norm: 0.29760271310806274)
36167
+ Step... (31750 | Loss: 0.001614759094081819, Learning Rate: 3.68707042071037e-05, Gradient Norm: 0.11080089956521988)
36168
+ Step... (31775 | Loss: 0.024603702127933502, Learning Rate: 3.682020178530365e-05, Gradient Norm: 0.485177606344223)
36169
+ Step... (31800 | Loss: 0.004279211163520813, Learning Rate: 3.6769699363503605e-05, Gradient Norm: 0.2789261043071747)
36170
+ Step... (31825 | Loss: 0.008554738014936447, Learning Rate: 3.6719189665745944e-05, Gradient Norm: 0.17841897904872894)
36171
+ Step... (31850 | Loss: 0.004655478522181511, Learning Rate: 3.66686872439459e-05, Gradient Norm: 0.441360741853714)
36172
+ Step... (31875 | Loss: 0.010068932548165321, Learning Rate: 3.661818482214585e-05, Gradient Norm: 0.18529221415519714)
36173
+ Step... (31900 | Loss: 0.01598585955798626, Learning Rate: 3.656767512438819e-05, Gradient Norm: 0.7631728649139404)
36174
+ Step... (31925 | Loss: 0.01710684411227703, Learning Rate: 3.651717270258814e-05, Gradient Norm: 0.2801308333873749)
36175
+ Step... (31950 | Loss: 0.005609472282230854, Learning Rate: 3.646666300483048e-05, Gradient Norm: 0.4520111083984375)
36176
+ Step... (31975 | Loss: 0.010884484276175499, Learning Rate: 3.641616058303043e-05, Gradient Norm: 0.21241894364356995)
36177
+ Step... (32000 | Loss: 0.009471398778259754, Learning Rate: 3.6365658161230385e-05, Gradient Norm: 0.5816971659660339)
36178
+ Step... (32025 | Loss: 0.021492963656783104, Learning Rate: 3.6315148463472724e-05, Gradient Norm: 0.2569616734981537)
36179
+ Step... (32050 | Loss: 0.002289405558258295, Learning Rate: 3.626464604167268e-05, Gradient Norm: 0.11815373599529266)
36180
+ Step... (32075 | Loss: 0.02590622566640377, Learning Rate: 3.621414361987263e-05, Gradient Norm: 0.3947508633136749)
36181
+ Step... (32100 | Loss: 0.007713155820965767, Learning Rate: 3.616363392211497e-05, Gradient Norm: 0.457485556602478)
36182
+ Step... (32125 | Loss: 0.02284613437950611, Learning Rate: 3.611313150031492e-05, Gradient Norm: 0.31568753719329834)
36183
+ Step... (32150 | Loss: 0.006654662545770407, Learning Rate: 3.606262544053607e-05, Gradient Norm: 0.39326000213623047)
36184
+ Step... (32175 | Loss: 0.01354904007166624, Learning Rate: 3.601211938075721e-05, Gradient Norm: 0.2626039683818817)
36185
+ Step... (32200 | Loss: 0.01198299415409565, Learning Rate: 3.596161332097836e-05, Gradient Norm: 0.5191692113876343)
36186
+ Step... (32225 | Loss: 0.0203788373619318, Learning Rate: 3.591111089917831e-05, Gradient Norm: 0.29200589656829834)
36187
+ Step... (32250 | Loss: 0.011300805024802685, Learning Rate: 3.586060483939946e-05, Gradient Norm: 0.3954571485519409)
36188
+ Step... (32275 | Loss: 0.02609907276928425, Learning Rate: 3.58100987796206e-05, Gradient Norm: 0.4498150944709778)
36189
+ Step... (32300 | Loss: 0.006780035328119993, Learning Rate: 3.5759596357820556e-05, Gradient Norm: 0.4498085379600525)
36190
+ Step... (32325 | Loss: 0.021178090944886208, Learning Rate: 3.5709086660062894e-05, Gradient Norm: 0.39719340205192566)
36191
+ Step... (32350 | Loss: 0.016519315540790558, Learning Rate: 3.565858423826285e-05, Gradient Norm: 0.5781014561653137)
36192
+ Step... (32375 | Loss: 0.016620468348264694, Learning Rate: 3.56080818164628e-05, Gradient Norm: 0.2802444398403168)
36193
+ Step... (32400 | Loss: 0.022222617641091347, Learning Rate: 3.555757211870514e-05, Gradient Norm: 0.7580689787864685)
36194
+ Step... (32425 | Loss: 0.0225432887673378, Learning Rate: 3.550706969690509e-05, Gradient Norm: 0.288802832365036)
36195
+ Step... (32450 | Loss: 0.0061545781791210175, Learning Rate: 3.5456567275105044e-05, Gradient Norm: 0.3461533784866333)
36196
+
36197
+
36198
+
36199
+
36200
+
36201
+
36202
+
36203
+
36204
+
36205
+
36206
+
36207
+
36208
+
36209
+
36210
+
36211
+
36212
+
36213
+
36214
+
36215
+
36216
+
36217
+
36218
+
36219
+
36220
+
36221
+
36222
+
36223
+
36224
+
36225
+
36226
+
36227
+
36228
+
36229
+
36230
+
36231
+
36232
+
36233
+
36234
+
36235
+
36236
+
36237
+
36238
+
36239
+
36240
+
36241
+
36242
+
36243
+
36244
+
36245
+
36246
+
36247
+
36248
+
36249
+
36250
+
36251
+
36252
+
36253
+
36254
+
36255
+
36256
+
36257
+
36258
+
36259
+
36260
+
36261
+
36262
+
36263
+
36264
+
36265
+
36266
+
36267
+
36268
+
36269
+
36270
+
36271
+
36272
+
36273
+
36274
+
36275
+
36276
+
36277
+
36278
+
36279
+
36280
+
36281
+
36282
+
36283
+
36284
+
36285
+
36286
+
36287
+
36288
+
36289
+
36290
+
36291
+
36292
+
36293
+
36294
+
36295
+
36296
+
36297
+
36298
+
36299
+
36300
+
36301
+
36302
+
36303
+
36304
+
36305
+
36306
+
36307
+
36308
+
36309
+
36310
+
36311
+
36312
+
36313
+
36314
+
36315
+
36316
+
36317
+
36318
+
36319
+
36320
+
36321
+
36322
+
36323
+
36324
+
36325
+
36326
+
36327
+
36328
+
36329
+
36330
+
36331
+
36332
+
36333
+
36334
+
36335
+
36336
+
36337
+
36338
+
36339
+
36340
+
36341
+
36342
+
36343
+
36344
+
36345
+
36346
+
36347
+
36348
+
36349
+
36350
+
36351
+
36352
+
36353
+
36354
+
36355
+
36356
+
36357
+
36358
+
36359
+
36360
+
36361
+
36362
+
36363
+
36364
+
36365
+
36366
+
36367
+
36368
+
36369
+
36370
+
36371
+
36372
+
36373
+
36374
+
36375
+
36376
+
36377
+
36378
+
36379
+
36380
+
36381
+
36382
+
36383
+
36384
+
36385
+
36386
+
36387
+
36388
+
36389
+
36390
+
36391
+
36392
+
36393
+
36394
+
36395
+
36396
+
36397
+
36398
+
36399
+
36400
+
36401
+
36402
+
36403
+
36404
+
36405
+
36406
+
36407
+
36408
+
36409
+
36410
+
36411
+
36412
+
36413
+
36414
+
36415
+
36416
+
36417
+
36418
+
36419
+
36420
+
36421
+
36422
+
36423
+
36424
+
36425
+
36426
+
36427
+
36428
+
36429
+
36430
+
36431
+
36432
+
36433
+
36434
+
36435
+
36436
+
36437
+
36438
+
36439
+
36440
+
36441
+
36442
+
36443
+
36444
+
36445
+
36446
+
36447
+
36448
+
36449
+
36450
+
36451
+
36452
+
36453
+
36454
+
36455
+
36456
+
36457
+
36458
+
36459
+
36460
+
36461
+
36462
+
36463
+
36464
+
36465
+
36466
+
36467
+
36468
+
36469
+
36470
+
36471
+
36472
+
36473
+
36474
+
36475
+
36476
+
36477
+
36478
+
36479
+
36480
+
36481
+
36482
+
36483
+
36484
+
36485
+
36486
+
36487
+
36488
+
36489
+
36490
+
36491
+
36492
+
36493
+
36494
+
36495
+
36496
+
36497
+
36498
+
36499
+
36500
+
36501
+
36502
+
36503
+
36504
+
36505
+
36506
+
36507
+
36508
+
36509
+
36510
+
36511
+
36512
+
36513
+
36514
+
36515
+
36516
+
36517
+
36518
+
36519
+
36520
+
36521
+
36522
+
36523
+
36524
+
36525
+
36526
+
36527
+
36528
+
36529
+
36530
+
36531
+
36532
+
36533
+
36534
+
36535
+
36536
+
36537
+
36538
+
36539
+
36540
+
36541
+
36542
+
36543
+
36544
+
36545
+
36546
+
36547
+
36548
+
36549
+
36550
+
36551
+
36552
+
36553
+
36554
+
36555
+
36556
+
36557
+
36558
+
36559
+
36560
+
36561
+
36562
+
36563
+
36564
+
36565
+
36566
+
36567
+
36568
+
36569
+
36570
+
36571
+
36572
+
36573
+
36574
+
36575
+
36576
+
36577
+
36578
+
36579
+
36580
+
36581
+
36582
+
36583
+
36584
+
36585
+
36586
+
36587
+
36588
+
36589
+
36590
+
36591
+
36592
+
36593
+
36594
+
36595
+
36596
+
36597
+
36598
+
36599
+
36600
+
36601
+
36602
+
36603
+
36604
+
36605
+
36606
+
36607
+
36608
+
36609
+
36610
+
36611
+
36612
+
36613
+
36614
+
36615
+
36616
+
36617
+
36618
+
36619
+
36620
+
36621
+
36622
+
36623
+
36624
+
36625
+
36626
+
36627
+
36628
+
36629
+
36630
+
36631
+
36632
+
36633
+
36634
+
36635
+
36636
+
36637
+
36638
+
36639
+
36640
+
36641
+
36642
+
36643
+
36644
+
36645
+
36646
+
36647
+
36648
+
36649
+
36650
+
36651
+
36652
+
36653
+
36654
+
36655
+
36656
+
36657
+
36658
+
36659
+
36660
+
36661
+
36662
+
36663
+
36664
+
36665
+
36666
+
36667
+
36668
+
36669
+
36670
+
36671
+
36672
+
36673
+
36674
+
36675
+
36676
+
36677
+
36678
+
36679
+
36680
+
36681
+
36682
+
36683
+
36684
+
36685
+
36686
+
36687
+
36688
+
36689
+
36690
+
36691
+
36692
+
36693
+
36694
+
36695
+
36696
+
36697
+
36698
+
36699
+
36700
+
36701
+
36702
+
36703
+
36704
+
36705
+
36706
+
36707
+
36708
+
36709
+
36710
+
36711
+
36712
+
36713
+
36714
+
36715
+
36716
+
36717
+
36718
+
36719
+
36720
+
36721
+
36722
+
36723
+
36724
+
36725
+
36726
+
36727
+
36728
+
36729
+
36730
+
36731
+
36732
+
36733
+
36734
+
36735
+
36736
+
36737
+
36738
+
36739
+
36740
+
36741
+
36742
+
36743
+
36744
+
36745
+
36746
+
36747
+
36748
+
36749
+
36750
+
36751
+
36752
+
36753
+
36754
+
36755
+
36756
+
36757
+
36758
+
36759
+
36760
+
36761
+
36762
+
36763
+
36764
+
36765
+
36766
+
36767
+
36768
+
36769
+
36770
+
36771
+
36772
+
36773
+
36774
+
36775
+
36776
+
36777
+
36778
+
36779
+
36780
+
36781
+
36782
+
36783
+
36784
+
36785
+
36786
+
36787
+
36788
+
36789
+
36790
+
36791
+
36792
+
36793
+
36794
+
36795
+
36796
+
36797
+
36798
+
36799
+
36800
+
36801
+
36802
+
36803
+
36804
+
36805
+
36806
+
36807
+
36808
+
36809
+
36810
+
36811
+
36812
+
36813
+
36814
+
36815
+
36816
+
36817
+
36818
+
36819
+
36820
+
36821
+
36822
+
36823
+
36824
+
36825
+
36826
+
36827
+
36828
+
36829
+
36830
+
36831
+
36832
+
36833
+
36834
+
36835
+
36836
+
36837
+
36838
+
36839
+
36840
+
36841
+
36842
+
36843
+
36844
+
36845
+
36846
+
36847
+
36848
+
36849
+
36850
+
36851
+
36852
+
36853
+
36854
+
36855
+
36856
+
36857
+
36858
+
36859
+
36860
+
36861
+
36862
+
36863
+
36864
+
36865
+
36866
+
36867
+
36868
+
36869
+
36870
+
36871
+
36872
+
36873
+
36874
+
36875
+
36876
+
36877
+
36878
+
36879
+
36880
+
36881
+
36882
+
36883
+
36884
+
36885
+
36886
+
36887
+
36888
+
36889
+
36890
+
36891
+
36892
+
36893
+
36894
+
36895
+
36896
+
36897
+
36898
+
36899
+
36900
+
36901
+
36902
+
36903
+
36904
+
36905
+
36906
+
36907
+
36908
+
36909
+
36910
+
36911
+
36912
+
36913
+
36914
+
36915
+
36916
+
36917
+
36918
+
36919
+
36920
+
36921
+
36922
+
36923
+
36924
+
36925
+
36926
+
36927
+
36928
+
36929
+
36930
+
36931
+
36932
+
36933
+
36934
+
36935
+
36936
+
36937
+
36938
+
36939
+
36940
+
36941
+
36942
+
36943
+
36944
+
36945
+
36946
+
36947
+
36948
+
36949
+
36950
+
36951
+
36952
+
36953
+
36954
+
36955
+
36956
+
36957
+
36958
+
36959
+
36960
+
36961
+
36962
+
36963
+
36964
+
36965
+
36966
+
36967
+
36968
+
36969
+
36970
+
36971
+
36972
+
36973
+
36974
+
36975
+
36976
+
36977
+
36978
+
36979
+
36980
+
36981
+
36982
+
36983
+
36984
+
36985
+
36986
+
36987
+
36988
+
36989
+
36990
+
36991
+
36992
+
36993
+
36994
+
36995
+
36996
+
36997
+
36998
+
36999
+
37000
+
37001
+
37002
+
37003
+
37004
+
37005
+
37006
+
37007
+
37008
+
37009
+
37010
+
37011
+
37012
+
37013
+
37014
+
37015
+
37016
+
37017
+
37018
+
37019
+
37020
+
37021
+
37022
+
37023
+
37024
+
37025
+
37026
+
37027
+
37028
+
37029
+
37030
+
37031
+
37032
+
37033
+
37034
+
37035
+
37036
+
37037
+
37038
+
37039
+
37040
+
37041
+
37042
+
37043
+
37044
+
37045
+
37046
+
37047
+
37048
+
37049
+
37050
+
37051
+
37052
+
37053
+
37054
+
37055
+
37056
+
37057
+
37058
+
37059
+
37060
+
37061
+
37062
+
37063
+
37064
+
37065
+
37066
+
37067
+
37068
+
37069
+
37070
+
37071
+
37072
+
37073
+
37074
+
37075
+
37076
+
37077
+
37078
+
37079
+
37080
+
37081
+
37082
+
37083
+
37084
+
37085
+
37086
+
37087
+
37088
+
37089
+
37090
+
37091
+
37092
+
37093
+
37094
+
37095
+
37096
+
37097
+
37098
+
37099
+
37100
+
37101
+
37102
+
37103
+
37104
+
37105
+
37106
+
37107
+
37108
+
37109
+
37110
+
37111
+
37112
+
37113
+
37114
+
37115
+
37116
+
37117
+
37118
+
37119
+
37120
+
37121
+
37122
+
37123
+
37124
+
37125
+
37126
+
37127
+
37128
+
37129
+
37130
+
37131
+
37132
+
37133
+
37134
+
37135
+
37136
+
37137
+
37138
+
37139
+
37140
+
37141
+
37142
+
37143
+
37144
+
37145
+
37146
+
37147
+
37148
+
37149
+
37150
+
37151
+
37152
+
37153
+
37154
+
37155
+
37156
+
37157
+
37158
+
37159
+
37160
+
37161
+
37162
+
37163
+
37164
+
37165
+
37166
+
37167
+
37168
+
37169
+
37170
+
37171
+
37172
+
37173
+
37174
+
37175
+
37176
+
37177
+
37178
+
37179
+
37180
+
37181
+
37182
+
37183
+
37184
+
37185
+
37186
+
37187
+
37188
+
37189
+
37190
+
37191
+
37192
+
37193
+
37194
+
37195
+
37196
+
37197
+
37198
+
37199
+
37200
+
37201
+
37202
+
37203
+
37204
+
37205
+
37206
+
37207
+
37208
+
37209
+
37210
+
37211
+
37212
+
37213
+
37214
+
37215
+
37216
+
37217
+
37218
+
37219
+
37220
+
37221
+
37222
+
37223
+
37224
+
37225
+
37226
+
37227
+
37228
+
37229
+
37230
+
37231
+
37232
+
37233
+
37234
+
37235
+
37236
+
37237
+
37238
+
37239
+
37240
+
37241
+
37242
+
37243
+
37244
+
37245
+
37246
+
37247
+
37248
+
37249
+
37250
+
37251
+
37252
+
37253
+
37254
+
37255
+
37256
+
37257
+
37258
+
37259
+
37260
+
37261
+
37262
+
37263
+
37264
+
37265
+
37266
+
37267
+
37268
+
37269
+
37270
+
37271
+
37272
+
37273
+
37274
+
37275
+
37276
+
37277
+
37278
+
37279
+
37280
+
37281
+
37282
+
37283
+
37284
+
37285
+
37286
+
37287
+
37288
+
37289
+
37290
+
37291
+
37292
+
37293
+
37294
+
37295
+
37296
+
37297
+
37298
+
37299
+
37300
+
37301
+
37302
+
37303
+
37304
+
37305
+
37306
+
37307
+
37308
+
37309
+
37310
+
37311
+
37312
+
37313
+
37314
+
37315
+
37316
+
37317
+
37318
+
37319
+
37320
+
37321
+
37322
+
37323
+
37324
+
37325
+
37326
+
37327
+
37328
+
37329
+
37330
+
37331
+
37332
+
37333
+
37334
+
37335
+
37336
+
37337
+
37338
+
37339
+
37340
+
37341
+
37342
+
37343
+
37344
+
37345
+
37346
+
37347
+
37348
+
37349
+
37350
+
37351
+
37352
+
37353
+
37354
+
37355
+
37356
+
37357
+
37358
+
37359
+
37360
+
37361
+
37362
+
37363
+
37364
+
37365
+
37366
+
37367
+
37368
+
37369
+
37370
+
37371
+
37372
+
37373
+
37374
+
37375
+
37376
+
37377
+
37378
+
37379
+
37380
+
37381
+
37382
+
37383
+
37384
+
37385
+
37386
+
37387
+
37388
+
37389
+
37390
+
37391
+
37392
+
37393
+
37394
+
37395
+
37396
+
37397
+
37398
+
37399
+
37400
+
37401
+
37402
+
37403
+
37404
+
37405
+
37406
+
37407
+
37408
+
37409
+
37410
+
37411
+
37412
+
37413
+
37414
+
37415
+
37416
+
37417
+
37418
+
37419
+
37420
+
37421
+
37422
+
37423
+
37424
+
37425
+
37426
+
37427
+
37428
+
37429
+
37430
+
37431
+
37432
+
37433
+
37434
+
37435
+
37436
+
37437
+
37438
+
37439
+
37440
+
37441
+
37442
+
37443
+
37444
+
37445
+
37446
+
37447
+
37448
+
37449
+
37450
+
37451
+
37452
+
37453
+
37454
+
37455
+
37456
+
37457
+
37458
+
37459
+
37460
+
37461
+
37462
+
37463
+
37464
+
37465
+
37466
+
37467
+
37468
+
37469
+
37470
+
37471
+
37472
+
37473
+
37474
+
37475
+
37476
+
37477
+
37478
+
37479
+
37480
+
37481
+
37482
+
37483
+
37484
+
37485
+
37486
+
37487
+
37488
+
37489
+
37490
+
37491
+
37492
+
37493
+
37494
+
37495
+
37496
+
37497
+
37498
+
37499
+
37500
+
37501
+
37502
+
37503
+
37504
+
37505
+
37506
+
37507
+
37508
+
37509
+
37510
+
37511
+
37512
+
37513
+
37514
+
37515
+
37516
+
37517
+
37518
+
37519
+
37520
+
37521
+
37522
+
37523
+
37524
+
37525
+
37526
+
37527
+
37528
+
37529
+
37530
+
37531
+
37532
+
37533
+
37534
+
37535
+
37536
+
37537
+
37538
+
37539
+
37540
+
37541
+
37542
+
37543
+
37544
+
37545
+
37546
+
37547
+
37548
+
37549
+
37550
+
37551
+
37552
+
37553
+
37554
+
37555
+
37556
+
37557
+
37558
+
37559
+
37560
+
37561
+
37562
+
37563
+
37564
+
37565
+
37566
+
37567
+
37568
+
37569
+
37570
+
37571
+
37572
+
37573
+
37574
+
37575
+
37576
+
37577
+
37578
+
37579
+
37580
+
37581
+
37582
+
37583
+
37584
+
37585
+
37586
+
37587
+
37588
+
37589
+
37590
+
37591
+
37592
+
37593
+
37594
+
37595
+
37596
+
37597
+
37598
+
37599
+
37600
+
37601
+
37602
+
37603
+
37604
+
37605
+
37606
+
37607
+
37608
+
37609
+
37610
+
37611
+
37612
+
37613
+
37614
+
37615
+
37616
+
37617
+
37618
+
37619
+
37620
+
37621
+
37622
+
37623
+
37624
+
37625
+
37626
+
37627
+
37628
+
37629
+
37630
+
37631
+
37632
+
37633
+
37634
+
37635
+
37636
+
37637
+
37638
+
37639
+
37640
+
37641
+
37642
+
37643
+
37644
+
37645
+
37646
+
37647
+
37648
+
37649
+
37650
+
37651
+
37652
+
37653
+
37654
+
37655
+
37656
+
37657
+
37658
+
37659
+
37660
+
37661
+
37662
+
37663
+
37664
+
37665
+
37666
+
37667
+
37668
+
37669
+
37670
+
37671
+
37672
+
37673
+
37674
+
37675
+
37676
+
37677
+
37678
+
37679
+
37680
+
37681
+
37682
+
37683
+
37684
+
37685
+
37686
+
37687
+
37688
+
37689
+
37690
+
37691
+
37692
+
37693
+
37694
+
37695
+
37696
+
37697
+
37698
+
37699
+
37700
+
37701
+
37702
+
37703
+
37704
+
37705
+
37706
+
37707
+
37708
+
37709
+
37710
+
37711
+
37712
+
37713
+
37714
+
37715
+
37716
+
37717
+
37718
+
37719
+
37720
+
37721
+
37722
+
37723
+
37724
+
37725
+
37726
+
37727
+
37728
+
37729
+
37730
+
37731
+
37732
+
37733
+
37734
+
37735
+
37736
+
37737
+
37738
+
37739
+
37740
+
37741
+
37742
+
37743
+
37744
+
37745
+
37746
+
37747
+
37748
+
37749
+
37750
+
37751
+
37752
+
37753
+
37754
+
37755
+
37756
+
37757
+
37758
+
37759
+
37760
+
37761
+
37762
+
37763
+
37764
+
37765
+
37766
+
37767
+
37768
+
37769
+
37770
+
37771
+
37772
+
37773
+
37774
+
37775
+
37776
+
37777
+
37778
+
37779
+
37780
+
37781
+
37782
+
37783
+
37784
+
37785
+
37786
+
37787
+
37788
+
37789
+
37790
+
37791
+
37792
+
37793
+
37794
+
37795
+
37796
+
37797
+
37798
+
37799
+
37800
+
37801
+
37802
+
37803
+
37804
+
37805
+
37806
+
37807
+
37808
+
37809
+
37810
+
37811
+
37812
+
37813
+
37814
+
37815
+
37816
+
37817
+
37818
+
37819
+
37820
+
37821
+
37822
+
37823
+
37824
+
37825
+
37826
+
37827
+
37828
+
37829
+
37830
+
37831
+
37832
+
37833
+
37834
+
37835
+
37836
+
37837
+
37838
+
37839
+
37840
+
37841
+
37842
+
37843
+
37844
+
37845
+
37846
+
37847
+
37848
+
37849
+
37850
+
37851
+
37852
+
37853
+
37854
+
37855
+
37856
+
37857
+
37858
+
37859
+
37860
+
37861
+
37862
+
37863
+
37864
+
37865
+
37866
+
37867
+
37868
+
37869
+
37870
+
37871
+
37872
+
37873
+
37874
+
37875
+
37876
+
37877
+
37878
+
37879
+
37880
+
37881
+
37882
+
37883
+
37884
+
37885
+
37886
+
37887
+ Training...: 79% 3474/4393 [4:37:39<1:27:37, 5.72s/it]
37888
+ Step... (32500 | Loss: 0.003240741789340973, Learning Rate: 3.5355555155547336e-05, Gradient Norm: 0.2734076678752899)
37889
+ Step... (32525 | Loss: 0.022595498710870743, Learning Rate: 3.530505273374729e-05, Gradient Norm: 0.2993549406528473)
37890
+ Step... (32550 | Loss: 0.004729455336928368, Learning Rate: 3.525454303598963e-05, Gradient Norm: 0.36609122157096863)
37891
+ Step... (32575 | Loss: 0.018012160435318947, Learning Rate: 3.520404061418958e-05, Gradient Norm: 0.3467451333999634)
37892
+ Step... (32600 | Loss: 0.003656194545328617, Learning Rate: 3.515353819238953e-05, Gradient Norm: 0.1817796230316162)
37893
+ Step... (32625 | Loss: 0.02160341665148735, Learning Rate: 3.510302849463187e-05, Gradient Norm: 0.2465333789587021)
37894
+ Step... (32650 | Loss: 0.006482965312898159, Learning Rate: 3.5052526072831824e-05, Gradient Norm: 0.4499094486236572)
37895
+ Step... (32675 | Loss: 0.020122623071074486, Learning Rate: 3.500202365103178e-05, Gradient Norm: 0.38773903250694275)
37896
+ Step... (32700 | Loss: 0.008678543381392956, Learning Rate: 3.4951513953274116e-05, Gradient Norm: 0.5408302545547485)
37897
+ Step... (32725 | Loss: 0.030759282410144806, Learning Rate: 3.490101153147407e-05, Gradient Norm: 0.468974232673645)
37898
+ Step... (32750 | Loss: 0.017723778262734413, Learning Rate: 3.4850505471695215e-05, Gradient Norm: 1.1028417348861694)
37899
+ Step... (32775 | Loss: 0.019855264574289322, Learning Rate: 3.479999941191636e-05, Gradient Norm: 0.7205868363380432)
37900
+ Step... (32800 | Loss: 0.009921908378601074, Learning Rate: 3.4749493352137506e-05, Gradient Norm: 0.6433566808700562)
37901
+ Step... (32825 | Loss: 0.008554498665034771, Learning Rate: 3.469899093033746e-05, Gradient Norm: 0.17429663240909576)
37902
+ Step... (32850 | Loss: 0.015867536887526512, Learning Rate: 3.4648484870558605e-05, Gradient Norm: 1.0128272771835327)
37903
+ Step... (32875 | Loss: 0.01661548763513565, Learning Rate: 3.459797881077975e-05, Gradient Norm: 0.27105745673179626)
37904
+ Step... (32900 | Loss: 0.012580855749547482, Learning Rate: 3.45474763889797e-05, Gradient Norm: 0.6393226385116577)
37905
+ Step... (32925 | Loss: 0.02611595205962658, Learning Rate: 3.449696669122204e-05, Gradient Norm: 0.34206777811050415)
37906
+ Step... (32950 | Loss: 0.01171598955988884, Learning Rate: 3.4446464269421995e-05, Gradient Norm: 0.6070415377616882)
37907
+ Step... (32975 | Loss: 0.0251434575766325, Learning Rate: 3.439595457166433e-05, Gradient Norm: 0.315672367811203)
37908
+ Step... (33000 | Loss: 0.0021658502519130707, Learning Rate: 3.4345452149864286e-05, Gradient Norm: 0.10065610706806183)
37909
+ Step... (33025 | Loss: 0.01859498955309391, Learning Rate: 3.429494972806424e-05, Gradient Norm: 0.31704211235046387)
37910
+ Step... (33050 | Loss: 0.009487439878284931, Learning Rate: 3.424444003030658e-05, Gradient Norm: 1.1141239404678345)
37911
+ Step... (33075 | Loss: 0.019629456102848053, Learning Rate: 3.419393760850653e-05, Gradient Norm: 0.3015685975551605)
37912
+ Step... (33100 | Loss: 0.01052027940750122, Learning Rate: 3.414343518670648e-05, Gradient Norm: 0.6721656918525696)
37913
+ Step... (33125 | Loss: 0.016070451587438583, Learning Rate: 3.409292548894882e-05, Gradient Norm: 0.2360723912715912)
37914
+ Step... (33150 | Loss: 0.005491696763783693, Learning Rate: 3.4042423067148775e-05, Gradient Norm: 0.2844001054763794)
37915
+ Step... (33175 | Loss: 0.0110780606046319, Learning Rate: 3.399192064534873e-05, Gradient Norm: 0.22559884190559387)
37916
+ Step... (33200 | Loss: 0.012108217924833298, Learning Rate: 3.3941410947591066e-05, Gradient Norm: 0.6203787326812744)
37917
+ Step... (33225 | Loss: 0.016109203919768333, Learning Rate: 3.389090852579102e-05, Gradient Norm: 0.2231585830450058)
37918
+ Step... (33250 | Loss: 0.02048683539032936, Learning Rate: 3.384040610399097e-05, Gradient Norm: 1.3308199644088745)
37919
+ Step... (33275 | Loss: 0.007731268182396889, Learning Rate: 3.378989640623331e-05, Gradient Norm: 0.16010697185993195)
37920
+ Step... (33300 | Loss: 0.004303706344217062, Learning Rate: 3.3739393984433264e-05, Gradient Norm: 0.34767386317253113)
37921
+ Step... (33325 | Loss: 0.01468588039278984, Learning Rate: 3.3688891562633216e-05, Gradient Norm: 0.24376437067985535)
37922
+ Step... (33350 | Loss: 0.004447631072252989, Learning Rate: 3.3638381864875555e-05, Gradient Norm: 0.6799517869949341)
37923
+ Step... (33375 | Loss: 0.02182612009346485, Learning Rate: 3.358787944307551e-05, Gradient Norm: 0.2627018392086029)
37924
+ Step... (33400 | Loss: 0.007039026822894812, Learning Rate: 3.3537373383296654e-05, Gradient Norm: 0.5744321942329407)
37925
+ Step... (33425 | Loss: 0.020231476053595543, Learning Rate: 3.34868673235178e-05, Gradient Norm: 0.31140726804733276)
37926
+ Step... (33450 | Loss: 0.019542209804058075, Learning Rate: 3.343636490171775e-05, Gradient Norm: 0.839359700679779)
37927
+ Step... (33475 | Loss: 0.012925517745316029, Learning Rate: 3.33858588419389e-05, Gradient Norm: 0.2737915813922882)
37928
+ Step... (33500 | Loss: 0.012760810554027557, Learning Rate: 3.3335352782160044e-05, Gradient Norm: 0.8377636075019836)
37929
+ Step... (33525 | Loss: 0.020903266966342926, Learning Rate: 3.328484672238119e-05, Gradient Norm: 0.5242891311645508)
37930
+ Step... (33550 | Loss: 0.008186263032257557, Learning Rate: 3.323434430058114e-05, Gradient Norm: 0.4458857774734497)
37931
+ Step... (33575 | Loss: 0.008030146360397339, Learning Rate: 3.318383460282348e-05, Gradient Norm: 0.15442919731140137)
37932
+ Step... (33600 | Loss: 0.009593255817890167, Learning Rate: 3.3133332181023434e-05, Gradient Norm: 0.4650934636592865)
37933
+ Step... (33625 | Loss: 0.027050640434026718, Learning Rate: 3.308282975922339e-05, Gradient Norm: 0.30105096101760864)
37934
+ Step... (33650 | Loss: 0.013253802433609962, Learning Rate: 3.3032320061465725e-05, Gradient Norm: 0.6894952058792114)
37935
+ Step... (33675 | Loss: 0.00922253169119358, Learning Rate: 3.298181763966568e-05, Gradient Norm: 0.20608296990394592)
37936
+ Step... (33700 | Loss: 0.005614001303911209, Learning Rate: 3.293131521786563e-05, Gradient Norm: 0.4169163405895233)
37937
+ Step... (33725 | Loss: 0.026930734515190125, Learning Rate: 3.288080552010797e-05, Gradient Norm: 0.4555964469909668)
37938
+ Step... (33750 | Loss: 0.0053314161486923695, Learning Rate: 3.283030309830792e-05, Gradient Norm: 0.3469780683517456)
37939
+ Step... (33775 | Loss: 0.0124602559953928, Learning Rate: 3.2779800676507875e-05, Gradient Norm: 0.2316969931125641)
37940
+ Step... (33800 | Loss: 0.004806062206625938, Learning Rate: 3.2729290978750214e-05, Gradient Norm: 0.4230194687843323)
37941
+ Step... (33825 | Loss: 0.02302425727248192, Learning Rate: 3.267878855695017e-05, Gradient Norm: 0.31783807277679443)
37942
+ Step... (33850 | Loss: 0.009264129213988781, Learning Rate: 3.262828613515012e-05, Gradient Norm: 0.4420391917228699)
37943
+ Step... (33875 | Loss: 0.015245446003973484, Learning Rate: 3.257777643739246e-05, Gradient Norm: 0.19797156751155853)
37944
+ Step... (33900 | Loss: 0.011483004316687584, Learning Rate: 3.252727401559241e-05, Gradient Norm: 0.6684927940368652)
37945
+ Step... (33925 | Loss: 0.014820012263953686, Learning Rate: 3.2476771593792364e-05, Gradient Norm: 0.2446964681148529)
37946
+ Step... (33950 | Loss: 0.011935788206756115, Learning Rate: 3.24262618960347e-05, Gradient Norm: 0.6529338359832764)
37947
+ Step... (33975 | Loss: 0.030441801995038986, Learning Rate: 3.2375759474234655e-05, Gradient Norm: 0.4021615982055664)
37948
+ Step... (34000 | Loss: 0.004538531880825758, Learning Rate: 3.2325249776476994e-05, Gradient Norm: 0.24466592073440552)
37949
+ Step... (34025 | Loss: 0.02973652072250843, Learning Rate: 3.227474735467695e-05, Gradient Norm: 0.5382617115974426)
37950
+ Step... (34050 | Loss: 0.0059531498700380325, Learning Rate: 3.222424129489809e-05, Gradient Norm: 0.5172224640846252)
37951
+ Step... (34075 | Loss: 0.021076254546642303, Learning Rate: 3.217373523511924e-05, Gradient Norm: 0.3070044219493866)
37952
+ Step... (34100 | Loss: 0.014772494323551655, Learning Rate: 3.212323281331919e-05, Gradient Norm: 0.5799047946929932)
37953
+ Step... (34125 | Loss: 0.019663726910948753, Learning Rate: 3.207272675354034e-05, Gradient Norm: 0.27861204743385315)
37954
+ Step... (34150 | Loss: 0.005554813891649246, Learning Rate: 3.202222069376148e-05, Gradient Norm: 0.4016672372817993)
37955
+ Step... (34175 | Loss: 0.02114006131887436, Learning Rate: 3.197171463398263e-05, Gradient Norm: 0.3323518931865692)
37956
+
37957
+
37958
+
37959
+
37960
+
37961
+
37962
+
37963
+
37964
+
37965
+
37966
+
37967
+
37968
+
37969
+
37970
+
37971
+
37972
+
37973
+
37974
+
37975
+
37976
+
37977
+
37978
+
37979
+
37980
+
37981
+
37982
+
37983
+
37984
+
37985
+
37986
+
37987
+
37988
+
37989
+
37990
+
37991
+
37992
+
37993
+
37994
+
37995
+
37996
+
37997
+
37998
+
37999
+
38000
+
38001
+
38002
+
38003
+
38004
+
38005
+
38006
+
38007
+
38008
+
38009
+
38010
+
38011
+
38012
+
38013
+
38014
+
38015
+
38016
+
38017
+
38018
+
38019
+
38020
+
38021
+
38022
+
38023
+
38024
+
38025
+
38026
+
38027
+
38028
+
38029
+
38030
+
38031
+
38032
+
38033
+
38034
+
38035
+
38036
+
38037
+
38038
+
38039
+
38040
+
38041
+
38042
+
38043
+
38044
+
38045
+
38046
+
38047
+
38048
+
38049
+
38050
+
38051
+
38052
+
38053
+
38054
+
38055
+
38056
+
38057
+
38058
+
38059
+
38060
+
38061
+
38062
+
38063
+
38064
+
38065
+
38066
+
38067
+
38068
+
38069
+
38070
+
38071
+
38072
+
38073
+
38074
+
38075
+
38076
+
38077
+
38078
+
38079
+
38080
+
38081
+
38082
+
38083
+
38084
+
38085
+
38086
+
38087
+
38088
+
38089
+
38090
+
38091
+
38092
+
38093
+
38094
+
38095
+
38096
+
38097
+
38098
+
38099
+
38100
+
38101
+
38102
+
38103
+
38104
+
38105
+
38106
+
38107
+
38108
+
38109
+
38110
+
38111
+
38112
+
38113
+
38114
+
38115
+
38116
+
38117
+
38118
+
38119
+
38120
+
38121
+
38122
+
38123
+
38124
+
38125
+
38126
+
38127
+
38128
+
38129
+
38130
+
38131
+
38132
+
38133
+
38134
+
38135
+
38136
+
38137
+
38138
+
38139
+
38140
+
38141
+
38142
+
38143
+
38144
+
38145
+
38146
+
38147
+
38148
+
38149
+
38150
+
38151
+
38152
+
38153
+
38154
+
38155
+
38156
+
38157
+
38158
+
38159
+
38160
+
38161
+
38162
+
38163
+
38164
+
38165
+
38166
+
38167
+
38168
+
38169
+
38170
+
38171
+
38172
+
38173
+
38174
+
38175
+
38176
+
38177
+
38178
+
38179
+
38180
+
38181
+
38182
+
38183
+
38184
+
38185
+
38186
+
38187
+
38188
+
38189
+
38190
+
38191
+
38192
+
38193
+
38194
+
38195
+
38196
+
38197
+
38198
+
38199
+
38200
+
38201
+
38202
+
38203
+
38204
+
38205
+
38206
+
38207
+
38208
+
38209
+
38210
+
38211
+
38212
+
38213
+
38214
+
38215
+
38216
+
38217
+
38218
+
38219
+
38220
+
38221
+
38222
+
38223
+
38224
+
38225
+
38226
+
38227
+
38228
+
38229
+
38230
+
38231
+
38232
+
38233
+
38234
+
38235
+
38236
+
38237
+
38238
+
38239
+
38240
+
38241
+
38242
+
38243
+
38244
+
38245
+
38246
+
38247
+
38248
+
38249
+
38250
+
38251
+
38252
+
38253
+
38254
+
38255
+
38256
+
38257
+
38258
+
38259
+
38260
+
38261
+
38262
+
38263
+
38264
+
38265
+
38266
+
38267
+
38268
+
38269
+
38270
+
38271
+
38272
+
38273
+
38274
+
38275
+
38276
+
38277
+
38278
+
38279
+
38280
+
38281
+
38282
+
38283
+
38284
+
38285
+
38286
+
38287
+
38288
+
38289
+
38290
+
38291
+
38292
+
38293
+
38294
+
38295
+
38296
+
38297
+
38298
+
38299
+
38300
+
38301
+
38302
+
38303
+
38304
+
38305
+
38306
+
38307
+
38308
+
38309
+
38310
+
38311
+
38312
+
38313
+
38314
+
38315
+
38316
+
38317
+
38318
+
38319
+
38320
+
38321
+
38322
+
38323
+
38324
+
38325
+
38326
+
38327
+
38328
+
38329
+
38330
+
38331
+
38332
+
38333
+
38334
+
38335
+
38336
+
38337
+
38338
+
38339
+
38340
+
38341
+
38342
+
38343
+
38344
+
38345
+
38346
+
38347
+
38348
+
38349
+
38350
+
38351
+
38352
+
38353
+
38354
+
38355
+
38356
+
38357
+
38358
+
38359
+
38360
+
38361
+
38362
+
38363
+
38364
+
38365
+
38366
+
38367
+
38368
+
38369
+
38370
+
38371
+
38372
+
38373
+
38374
+
38375
+
38376
+
38377
+
38378
+
38379
+
38380
+
38381
+
38382
+
38383
+
38384
+
38385
+
38386
+
38387
+
38388
+
38389
+
38390
+
38391
+
38392
+
38393
+
38394
+
38395
+
38396
+
38397
+
38398
+
38399
+
38400
+
38401
+
38402
+
38403
+
38404
+
38405
+
38406
+
38407
+
38408
+
38409
+
38410
+
38411
+
38412
+
38413
+
38414
+
38415
+
38416
+
38417
+
38418
+
38419
+
38420
+
38421
+
38422
+
38423
+
38424
+
38425
+
38426
+
38427
+
38428
+
38429
+
38430
+
38431
+
38432
+
38433
+
38434
+
38435
+
38436
+
38437
+
38438
+
38439
+
38440
+
38441
+
38442
+
38443
+
38444
+
38445
+
38446
+
38447
+
38448
+
38449
+
38450
+
38451
+
38452
+
38453
+
38454
+
38455
+
38456
+
38457
+
38458
+
38459
+
38460
+
38461
+
38462
+
38463
+
38464
+
38465
+
38466
+
38467
+
38468
+
38469
+
38470
+
38471
+
38472
+
38473
+
38474
+
38475
+
38476
+
38477
+
38478
+
38479
+
38480
+
38481
+
38482
+
38483
+
38484
+
38485
+
38486
+
38487
+
38488
+
38489
+
38490
+
38491
+
38492
+
38493
+
38494
+
38495
+
38496
+
38497
+
38498
+
38499
+
38500
+
38501
+
38502
+
38503
+
38504
+
38505
+
38506
+
38507
+
38508
+
38509
+
38510
+
38511
+
38512
+
38513
+
38514
+
38515
+
38516
+
38517
+
38518
+
38519
+
38520
+
38521
+
38522
+
38523
+
38524
+
38525
+
38526
+
38527
+
38528
+
38529
+
38530
+
38531
+
38532
+
38533
+
38534
+
38535
+
38536
+
38537
+
38538
+
38539
+
38540
+
38541
+
38542
+
38543
+
38544
+
38545
+
38546
+
38547
+
38548
+
38549
+
38550
+
38551
+
38552
+
38553
+
38554
+
38555
+
38556
+
38557
+
38558
+
38559
+
38560
+
38561
+
38562
+
38563
+
38564
+
38565
+
38566
+
38567
+
38568
+
38569
+
38570
+
38571
+
38572
+
38573
+
38574
+
38575
+
38576
+
38577
+
38578
+
38579
+
38580
+
38581
+
38582
+
38583
+
38584
+
38585
+
38586
+
38587
+
38588
+
38589
+
38590
+
38591
+
38592
+
38593
+
38594
+
38595
+
38596
+
38597
+
38598
+
38599
+
38600
+
38601
+
38602
+
38603
+
38604
+
38605
+
38606
+
38607
+
38608
+
38609
+
38610
+
38611
+
38612
+
38613
+
38614
+
38615
+
38616
+
38617
+
38618
+
38619
+
38620
+
38621
+
38622
+
38623
+
38624
+
38625
+
38626
+
38627
+
38628
+
38629
+
38630
+
38631
+
38632
+
38633
+
38634
+
38635
+
38636
+
38637
+
38638
+
38639
+
38640
+
38641
+
38642
+
38643
+
38644
+
38645
+
38646
+
38647
+
38648
+
38649
+
38650
+
38651
+
38652
+
38653
+
38654
+
38655
+
38656
+
38657
+
38658
+
38659
+
38660
+
38661
+
38662
+
38663
+
38664
+
38665
+
38666
+
38667
+
38668
+
38669
+
38670
+
38671
+
38672
+
38673
+
38674
+
38675
+
38676
+
38677
+
38678
+
38679
+
38680
+
38681
+
38682
+
38683
+
38684
+
38685
+
38686
+
38687
+
38688
+
38689
+
38690
+
38691
+
38692
+
38693
+
38694
+
38695
+
38696
+
38697
+
38698
+
38699
+
38700
+
38701
+
38702
+
38703
+
38704
+
38705
+
38706
+
38707
+
38708
+
38709
+
38710
+
38711
+
38712
+
38713
+
38714
+
38715
+
38716
+
38717
+
38718
+
38719
+
38720
+
38721
+
38722
+
38723
+
38724
+
38725
+
38726
+
38727
+
38728
+
38729
+
38730
+
38731
+
38732
+
38733
+
38734
+
38735
+
38736
+
38737
+
38738
+
38739
+
38740
+
38741
+
38742
+
38743
+
38744
+
38745
+
38746
+
38747
+
38748
+
38749
+
38750
+
38751
+
38752
+
38753
+
38754
+
38755
+
38756
+
38757
+
38758
+
38759
+
38760
+
38761
+
38762
+
38763
+
38764
+
38765
+
38766
+
38767
+
38768
+
38769
+
38770
+
38771
+
38772
+
38773
+
38774
+
38775
+
38776
+
38777
+
38778
+
38779
+
38780
+
38781
+
38782
+
38783
+
38784
+
38785
+
38786
+
38787
+
38788
+
38789
+
38790
+
38791
+
38792
+
38793
+
38794
+
38795
+
38796
+
38797
+
38798
+
38799
+
38800
+
38801
+
38802
+
38803
+
38804
+
38805
+
38806
+
38807
+
38808
+
38809
+
38810
+
38811
+
38812
+
38813
+
38814
+
38815
+
38816
+
38817
+
38818
+
38819
+
38820
+
38821
+
38822
+
38823
+
38824
+
38825
+
38826
+
38827
+
38828
+
38829
+
38830
+
38831
+
38832
+
38833
+
38834
+
38835
+
38836
+
38837
+
38838
+
38839
+
38840
+
38841
+
38842
+
38843
+
38844
+
38845
+
38846
+
38847
+
38848
+
38849
+
38850
+
38851
+
38852
+
38853
+
38854
+
38855
+
38856
+ Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 67% 8/12 [46:48:52<23:25:59, 21089.92s/it]
38857
+ Step... (34225 | Loss: 0.01761227659881115, Learning Rate: 3.187070251442492e-05, Gradient Norm: 0.3862716555595398)
38858
+ Step... (34250 | Loss: 0.0024810461327433586, Learning Rate: 3.182020009262487e-05, Gradient Norm: 0.16885358095169067)
38859
+ Step... (34275 | Loss: 0.020597655326128006, Learning Rate: 3.1769697670824826e-05, Gradient Norm: 0.26309433579444885)
38860
+ Step... (34300 | Loss: 0.002395742107182741, Learning Rate: 3.1719187973067164e-05, Gradient Norm: 0.09939858317375183)
38861
+ Step... (34325 | Loss: 0.01582499034702778, Learning Rate: 3.166868555126712e-05, Gradient Norm: 0.40641799569129944)
38862
+ Step... (34350 | Loss: 0.006080330815166235, Learning Rate: 3.161818312946707e-05, Gradient Norm: 0.4112752676010132)
38863
+ Step... (34375 | Loss: 0.012621372006833553, Learning Rate: 3.156767343170941e-05, Gradient Norm: 0.20963077247142792)
38864
+ Step... (34400 | Loss: 0.020280960947275162, Learning Rate: 3.151717100990936e-05, Gradient Norm: 0.8154411315917969)
38865
+ Step... (34425 | Loss: 0.02049107477068901, Learning Rate: 3.1466668588109314e-05, Gradient Norm: 0.27243077754974365)
38866
+ Step... (34450 | Loss: 0.008309825323522091, Learning Rate: 3.141615889035165e-05, Gradient Norm: 0.4491531252861023)
38867
+ Step... (34475 | Loss: 0.021880406886339188, Learning Rate: 3.1365656468551606e-05, Gradient Norm: 3.2739105224609375)
38868
+ Step... (34500 | Loss: 0.007439988665282726, Learning Rate: 3.131515404675156e-05, Gradient Norm: 0.544802725315094)
38869
+ Step... (34525 | Loss: 0.02172919735312462, Learning Rate: 3.12646443489939e-05, Gradient Norm: 0.28622761368751526)
38870
+ Step... (34550 | Loss: 0.006562023889273405, Learning Rate: 3.121414192719385e-05, Gradient Norm: 0.3183799386024475)
38871
+ Step... (34575 | Loss: 0.010621322318911552, Learning Rate: 3.11636395053938e-05, Gradient Norm: 0.3131429851055145)
38872
+ Step... (34600 | Loss: 0.013357987627387047, Learning Rate: 3.111312980763614e-05, Gradient Norm: 0.48718786239624023)
38873
+ Step... (34625 | Loss: 0.017189813777804375, Learning Rate: 3.1062627385836095e-05, Gradient Norm: 0.2213728129863739)
38874
+ Step... (34650 | Loss: 0.002611755859106779, Learning Rate: 3.101212132605724e-05, Gradient Norm: 0.1833813488483429)
38875
+ Step... (34675 | Loss: 0.026818081736564636, Learning Rate: 3.0961615266278386e-05, Gradient Norm: 0.32487785816192627)
38876
+ Step... (34700 | Loss: 0.009743205271661282, Learning Rate: 3.091111284447834e-05, Gradient Norm: 0.591576337814331)
38877
+ Step... (34725 | Loss: 0.015863729640841484, Learning Rate: 3.0860606784699485e-05, Gradient Norm: 0.3375481367111206)
38878
+
38879
+
38880
+
38881
+
38882
+
38883
+
38884
+
38885
+
38886
+
38887
+
38888
+
38889
+
38890
+
38891
+
38892
+
38893
+
38894
+
38895
+
38896
+
38897
+
38898
+
38899
+
38900
+
38901
+
38902
+
38903
+
38904
+
38905
+
38906
+
38907
+
38908
+
38909
+
38910
+
38911
+
38912
+
38913
+
38914
+
38915
+
38916
+
38917
+
38918
+
38919
+
38920
+
38921
+
38922
+
38923
+
38924
+
38925
+
38926
+
38927
+
38928
+
38929
+
38930
+
38931
+
38932
+
38933
+
38934
+
38935
+
38936
+
38937
+
38938
+
38939
+
38940
+
38941
+
38942
+
38943
+
38944
+
38945
+
38946
+
38947
+
38948
+
38949
+
38950
+
38951
+
38952
+
38953
+
38954
+
38955
+
38956
+
38957
+
38958
+
38959
+
38960
+
38961
+
38962
+
38963
+
38964
+
38965
+
38966
+
38967
+
38968
+
38969
+
38970
+
38971
+
38972
+
38973
+
38974
+
38975
+
38976
+
38977
+
38978
+
38979
+
38980
+
38981
+
38982
+
38983
+
38984
+
38985
+
38986
+
38987
+
38988
+
38989
+
38990
+
38991
+
38992
+
38993
+
38994
+
38995
+
38996
+
38997
+
38998
+
38999
+
39000
+
39001
+
39002
+
39003
+
39004
+
39005
+
39006
+
39007
+
39008
+
39009
+
39010
+
39011
+
39012
+
39013
+
39014
+
39015
+
39016
+
39017
+
39018
+
39019
+
39020
+
39021
+
39022
+
39023
+
39024
+
39025
+
39026
+
39027
+
39028
+
39029
+
39030
+
39031
+
39032
+
39033
+
39034
+
39035
+
39036
+
39037
+
39038
+
39039
+
39040
+
39041
+
39042
+
39043
+
39044
+
39045
+
39046
+
39047
+
39048
+
39049
+
39050
+
39051
+
39052
+
39053
+
39054
+
39055
+
39056
+
39057
+
39058
+
39059
+
39060
+
39061
+
39062
+
39063
+
39064
+
39065
+
39066
+
39067
+
39068
+
39069
+
39070
+
39071
+
39072
+
39073
+
39074
+
39075
+
39076
+
39077
+
39078
+
39079
+
39080
+
39081
+
39082
+
39083
+
39084
+
39085
+
39086
+
39087
+
39088
+
39089
+
39090
+
39091
+
39092
+
39093
+
39094
+
39095
+
39096
+
39097
+
39098
+
39099
+
39100
+
39101
+
39102
+
39103
+
39104
+
39105
+
39106
+
39107
+
39108
+
39109
+
39110
+
39111
+
39112
+
39113
+
39114
+
39115
+
39116
+
39117
+
39118
+
39119
+
39120
+
39121
+
39122
+
39123
+
39124
+
39125
+
39126
+
39127
+
39128
+
39129
+
39130
+
39131
+
39132
+
39133
+
39134
+
39135
+
39136
+
39137
+
39138
+
39139
+
39140
+
39141
+
39142
+
39143
+
39144
+
39145
+
39146
+
39147
+
39148
+
39149
+
39150
+
39151
+
39152
+
39153
+
39154
+
39155
+
39156
+
39157
+
39158
+
39159
+
39160
+
39161
+
39162
+
39163
+
39164
+
39165
+
39166
+
39167
+
39168
+
39169
+
39170
+
39171
+
39172
+
39173
+
39174
+
39175
+
39176
+
39177
+
39178
+
39179
+
39180
+
39181
+
39182
+
39183
+
39184
+
39185
+
39186
+
39187
+
39188
+
39189
+
39190
+
39191
+
39192
+
39193
+
39194
+
39195
+
39196
+
39197
+
39198
+
39199
+
39200
+
39201
+
39202
+
39203
+
39204
+
39205
+
39206
+
39207
+
39208
+
39209
+
39210
+
39211
+
39212
+
39213
+
39214
+
39215
+
39216
+
39217
+
39218
+
39219
+
39220
+
39221
+
39222
+
39223
+
39224
+
39225
+
39226
+
39227
+
39228
+
39229
+
39230
+
39231
+
39232
+
39233
+
39234
+
39235
+
39236
+
39237
+
39238
+
39239
+
39240
+
39241
+
39242
+
39243
+
39244
+
39245
+
39246
+
39247
+
39248
+
39249
+
39250
+
39251
+
39252
+
39253
+
39254
+
39255
+
39256
+
39257
+
39258
+
39259
+
39260
+
39261
+
39262
+
39263
+
39264
+
39265
+
39266
+
39267
+
39268
+
39269
+
39270
+
39271
+
39272
+
39273
+
39274
+
39275
+
39276
+
39277
+
39278
+
39279
+
39280
+
39281
+
39282
+
39283
+
39284
+
39285
+
39286
+
39287
+
39288
+
39289
+
39290
+
39291
+
39292
+
39293
+
39294
+
39295
+
39296
+
39297
+
39298
+
39299
+
39300
+
39301
+
39302
+
39303
+
39304
+
39305
+
39306
+
39307
+
39308
+
39309
+
39310
+
39311
+
39312
+
39313
+
39314
+
39315
+
39316
+
39317
+
39318
+
39319
+
39320
+
39321
+
39322
+
39323
+
39324
+
39325
+
39326
+
39327
+
39328
+
39329
+
39330
+
39331
+
39332
+
39333
+
39334
+
39335
+
39336
+
39337
+
39338
+
39339
+
39340
+
39341
+
39342
+
39343
+
39344
+
39345
+
39346
+
39347
+
39348
+
39349
+
39350
+
39351
+
39352
+
39353
+
39354
+
39355
+
39356
+
39357
+
39358
+
39359
+
39360
+
39361
+
39362
+
39363
+
39364
+
39365
+
39366
+
39367
+
39368
+
39369
+
39370
+
39371
+
39372
+
39373
+
39374
+
39375
+
39376
+
39377
+
39378
+
39379
+
39380
+
39381
+
39382
+
39383
+
39384
+
39385
+
39386
+
39387
+
39388
+
39389
+
39390
+
39391
+
39392
+
39393
+
39394
+
39395
+
39396
+
39397
+
39398
+
39399
+
39400
+
39401
+
39402
+
39403
+
39404
+
39405
+
39406
+
39407
+
39408
+
39409
+
39410
+
39411
+
39412
+
39413
+
39414
+
39415
+
39416
+
39417
+
39418
+
39419
+
39420
+
39421
+
39422
+
39423
+
39424
+
39425
+
39426
+
39427
+
39428
+
39429
+
39430
+
39431
+
39432
+
39433
+
39434
+
39435
+
39436
+
39437
+
39438
+
39439
+
39440
+
39441
+
39442
+
39443
+
39444
+
39445
+
39446
+
39447
+
39448
+
39449
+
39450
+
39451
+
39452
+
39453
+
39454
+
39455
+
39456
+
39457
+
39458
+
39459
+
39460
+
39461
+
39462
+
39463
+
39464
+
39465
+
39466
+
39467
+
39468
+
39469
+
39470
+
39471
+
39472
+
39473
+
39474
+
39475
+
39476
+
39477
+
39478
+
39479
+
39480
+
39481
+
39482
+
39483
+
39484
+
39485
+
39486
+
39487
+
39488
+
39489
+
39490
+
39491
+
39492
+
39493
+
39494
+
39495
+
39496
+
39497
+
39498
+
39499
+
39500
+
39501
+
39502
+
39503
+
39504
+
39505
+
39506
+
39507
+
39508
+
39509
+
39510
+
39511
+
39512
+
39513
+
39514
+
39515
+
39516
+
39517
+
39518
+
39519
+
39520
+
39521
+
39522
+
39523
+
39524
+
39525
+
39526
+
39527
+
39528
+
39529
+
39530
+
39531
+
39532
+
39533
+
39534
+
39535
+
39536
+
39537
+
39538
+
39539
+
39540
+
39541
+
39542
+
39543
+
39544
+
39545
+
39546
+
39547
+
39548
+
39549
+
39550
+
39551
+
39552
+
39553
+
39554
+
39555
+
39556
+
39557
+
39558
+
39559
+
39560
+
39561
+
39562
+
39563
+
39564
+
39565
+
39566
+
39567
+
39568
+
39569
+
39570
+
39571
+
39572
+
39573
+
39574
+
39575
+
39576
+
39577
+
39578
+
39579
+
39580
+
39581
+
39582
+
39583
+
39584
+
39585
+
39586
+
39587
+
39588
+
39589
+
39590
+
39591
+
39592
+
39593
+
39594
+
39595
+
39596
+
39597
+
39598
+
39599
+
39600
+
39601
+
39602
+
39603
+
39604
+
39605
+
39606
+
39607
+
39608
+
39609
+
39610
+
39611
+
39612
+
39613
+
39614
+
39615
+
39616
+
39617
+
39618
+
39619
+
39620
+
39621
+
39622
+
39623
+
39624
+
39625
+
39626
+
39627
+
39628
+
39629
+
39630
+
39631
+
39632
+
39633
+
39634
+
39635
+
39636
+
39637
+
39638
+
39639
+
39640
+
39641
+
39642
+
39643
+
39644
+
39645
+
39646
+
39647
+
39648
+
39649
+
39650
+
39651
+
39652
+
39653
+
39654
+
39655
+
39656
+
39657
+
39658
+
39659
+
39660
+
39661
+
39662
+
39663
+
39664
+
39665
+
39666
+
39667
+
39668
+
39669
+
39670
+
39671
+
39672
+
39673
+
39674
+
39675
+
39676
+
39677
+
39678
+
39679
+
39680
+
39681
+
39682
+
39683
+
39684
+
39685
+
39686
+
39687
+
39688
+
39689
+
39690
+
39691
+
39692
+
39693
+
39694
+
39695
+
39696
+
39697
+
39698
+
39699
+
39700
+
39701
+
39702
+
39703
+
39704
+
39705
+
39706
+
39707
+
39708
+
39709
+
39710
+
39711
+
39712
+
39713
+
39714
+
39715
+
39716
+
39717
+
39718
+
39719
+
39720
+
39721
+
39722
+
39723
+
39724
+
39725
+
39726
+
39727
+
39728
+
39729
+
39730
+
39731
+
39732
+
39733
+
39734
+
39735
+
39736
+
39737
+
39738
+
39739
+
39740
+
39741
+
39742
+
39743
+
39744
+
39745
+
39746
+
39747
+
39748
+
39749
+
39750
+
39751
+
39752
+
39753
+
39754
+
39755
+
39756
+
39757
+
39758
+
39759
+
39760
+
39761
+
39762
+
39763
+
39764
+
39765
+
39766
+
39767
+
39768
+
39769
+
39770
+
39771
+
39772
+
39773
+
39774
+
39775
+
39776
+
39777
+
39778
+
39779
+
39780
+
39781
+
39782
+
39783
+
39784
+
39785
+
39786
+
39787
+
39788
+
39789
+
39790
+
39791
+
39792
+
39793
+
39794
+
39795
+
39796
+
39797
+
39798
+
39799
+
39800
+
39801
+
39802
+
39803
+
39804
+
39805
+
39806
+
39807
+
39808
+
39809
+
39810
+
39811
+
39812
+
39813
+
39814
+
39815
+
39816
+
39817
+
39818
+
39819
+
39820
+
39821
+
39822
+
39823
+
39824
+
39825
+
39826
+
39827
+
39828
+
39829
+
39830
+
39831
+
39832
+
39833
+
39834
+
39835
+
39836
+
39837
+
39838
+
39839
+
39840
+
39841
+
39842
+
39843
+
39844
+
39845
+
39846
+
39847
+
39848
+
39849
+
39850
+
39851
+
39852
+
39853
+
39854
+
39855
+
39856
+
39857
+
39858
+
39859
+
39860
+
39861
+
39862
+
39863
+
39864
+
39865
+
39866
+
39867
+
39868
+
39869
+
39870
+
39871
+
39872
+
39873
+
39874
+
39875
+
39876
+
39877
+
39878
+
39879
+
39880
+
39881
+
39882
+
39883
+
39884
+
39885
+
39886
+
39887
+
39888
+
39889
+
39890
+
39891
+
39892
+
39893
+
39894
+
39895
+
39896
+
39897
+
39898
+
39899
+
39900
+
39901
+
39902
+
39903
+
39904
+
39905
+
39906
+
39907
+
39908
+
39909
+
39910
+
39911
+
39912
+
39913
+
39914
+
39915
+
39916
+
39917
+
39918
+
39919
+
39920
+
39921
+
39922
+
39923
+
39924
+
39925
+
39926
+
39927
+
39928
+
39929
+
39930
+
39931
+
39932
+
39933
+
39934
+
39935
+
39936
+
39937
+
39938
+
39939
+
39940
+
39941
+
39942
+
39943
+
39944
+
39945
+
39946
+
39947
+
39948
+
39949
+
39950
+
39951
+
39952
+
39953
+
39954
+
39955
+
39956
+
39957
+
39958
+
39959
+
39960
+
39961
+
39962
+
39963
+
39964
+
39965
+
39966
+
39967
+
39968
+
39969
+
39970
+
39971
+
39972
+
39973
+
39974
+
39975
+
39976
+
39977
+
39978
+
39979
+
39980
+
39981
+
39982
+
39983
+
39984
+
39985
+
39986
+
39987
+
39988
+
39989
+
39990
+
39991
+
39992
+
39993
+
39994
+
39995
+
39996
+
39997
+
39998
+
39999
+
40000
+
40001
+
40002
+
40003
+
40004
+
40005
+
40006
+
40007
+
40008
+
40009
+
40010
+
40011
+
40012
+
40013
+
40014
+
40015
+
40016
+
40017
+
40018
+
40019
+
40020
+
40021
+
40022
+
40023
+
40024
+
40025
+
40026
+
40027
+
40028
+
40029
+
40030
+
40031
+
40032
+
40033
+
40034
+
40035
+
40036
+
40037
+
40038
+
40039
+
40040
+
40041
+
40042
+
40043
+
40044
+
40045
+
40046
+
40047
+
40048
+
40049
+
40050
+
40051
+
40052
+
40053
+
40054
+
40055
+
40056
+
40057
+
40058
+
40059
+
40060
+
40061
+
40062
+
40063
+
40064
+
40065
+
40066
+
40067
+
40068
+
40069
+
40070
+
40071
+
40072
+
40073
+
40074
+
40075
+
40076
+
40077
+
40078
+
40079
+
40080
+
40081
+
40082
+
40083
+
40084
+
40085
+
40086
+
40087
+
40088
+
40089
+
40090
+
40091
+
40092
+
40093
+
40094
+
40095
+
40096
+
40097
+
40098
+
40099
+
40100
+
40101
+
40102
+
40103
+
40104
+
40105
+
40106
+
40107
+
40108
+
40109
+
40110
+
40111
+
40112
+
40113
+
40114
+
40115
+
40116
+
40117
+
40118
+
40119
+
40120
+
40121
+
40122
+
40123
+
40124
+
40125
+
40126
+
40127
+
40128
+
40129
+
40130
+
40131
+
40132
+
40133
+
40134
+
40135
+
40136
+
40137
+
40138
+
40139
+
40140
+
40141
+
40142
+
40143
+
40144
+
40145
+
40146
+
40147
+
40148
+
40149
+
40150
+
40151
+
40152
+
40153
+
40154
+
40155
+
40156
+
40157
+
40158
+
40159
+
40160
+
40161
+
40162
+
40163
+
40164
+
40165
+
40166
+
40167
+
40168
+
40169
+
40170
+
40171
+
40172
+
40173
+
40174
+
40175
+
40176
+
40177
+
40178
+
40179
+
40180
+
40181
+
40182
+
40183
+
40184
+
40185
+
40186
+
40187
+
40188
+
40189
+
40190
+
40191
+
40192
+
40193
+
40194
+
40195
+
40196
+
40197
+
40198
+
40199
+
40200
+
40201
+
40202
+
40203
+
40204
+
40205
+
40206
+
40207
+
40208
+
40209
+
40210
+
40211
+
40212
+
40213
+
40214
+
40215
+
40216
+
40217
+
40218
+
40219
+
40220
+
40221
+
40222
+
40223
+
40224
+
40225
+
40226
+
40227
+
40228
+
40229
+
40230
+
40231
+
40232
+
40233
+
40234
+
40235
+
40236
+
40237
+
40238
+
40239
+
40240
+
40241
+
40242
+
40243
+
40244
+
40245
+
40246
+
40247
+
40248
+
40249
+
40250
+
40251
+
40252
+
40253
+
40254
+
40255
+
40256
+
40257
+
40258
+
40259
+
40260
+
40261
+
40262
+
40263
+
40264
+
40265
+
40266
+
40267
+
40268
+
40269
+
40270
+
40271
+
40272
+
40273
+
40274
+
40275
+
40276
+
40277
+
40278
+
40279
+
40280
+
40281
+
40282
+
40283
+
40284
+
40285
+
40286
+
40287
+
40288
+
40289
+
40290
+
40291
+
40292
+
40293
+
40294
+
40295
+
40296
+
40297
+
40298
+
40299
+
40300
+
40301
+
40302
+
40303
+
40304
+
40305
+
40306
+
40307
+
40308
+
40309
+
40310
+
40311
+
40312
+
40313
+
40314
+
40315
+
40316
+
40317
+
40318
+
40319
+
40320
+
40321
+
40322
+
40323
+
40324
+
40325
+
40326
+
40327
+
40328
+
40329
+
40330
+
40331
+
40332
+
40333
+
40334
+
40335
+
40336
+
40337
+
40338
+
40339
+
40340
+
40341
+
40342
+
40343
+
40344
+
40345
+
40346
+
40347
+
40348
+
40349
+
40350
+
40351
+
40352
+
40353
+
40354
+
40355
+
40356
+
40357
+
40358
+
40359
+
40360
+
40361
+
40362
+
40363
+
40364
+
40365
+
40366
+
40367
+
40368
+
40369
+
40370
+
40371
+
40372
+
40373
+
40374
+
40375
+
40376
+
40377
+
40378
+
40379
+
40380
+
40381
+
40382
+
40383
+
40384
+
40385
+
40386
+
40387
+
40388
+
40389
+
40390
+
40391
+
40392
+
40393
+
40394
+
40395
+
40396
+
40397
+
40398
+
40399
+
40400
+
40401
+
40402
+
40403
+
40404
+
40405
+
40406
+
40407
+
40408
+
40409
+
40410
+
40411
+
40412
+
40413
+
40414
+
40415
+
40416
+
40417
+
40418
+
40419
+
40420
+
40421
+
40422
+
40423
+
40424
+
40425
+
40426
+
40427
+
40428
+
40429
+
40430
+
40431
+
40432
+
40433
+
40434
+
40435
+
40436
+
40437
+
40438
+
40439
+
40440
+
40441
+
40442
+
40443
+
40444
+
40445
+
40446
+
40447
+
40448
+
40449
+
40450
+
40451
+
40452
+
40453
+
40454
+
40455
+
40456
+
40457
+
40458
+
40459
+
40460
+
40461
+
40462
+
40463
+
40464
+
40465
+
40466
+
40467
+
40468
+
40469
+
40470
+
40471
+
40472
+
40473
+
40474
+
40475
+
40476
+
40477
+
40478
+
40479
+
40480
+
40481
+
40482
+
40483
+
40484
+
40485
+
40486
+
40487
+
40488
+
40489
+
40490
+
40491
+
40492
+
40493
+
40494
+
40495
+
40496
+
40497
+
40498
+
40499
+
40500
+
40501
+
40502
+
40503
+
40504
+
40505
+
40506
+
40507
+
40508
+
40509
+
40510
+
40511
+
40512
+
40513
+
40514
+
40515
+
40516
+
40517
+
40518
+
40519
+
40520
+
40521
+
40522
+
40523
+
40524
+
40525
+
40526
+
40527
+
40528
+
40529
+
40530
+
40531
+
40532
+
40533
+
40534
+
40535
+
40536
+
40537
+
40538
+
40539
+
40540
+
40541
+
40542
+
40543
+
40544
+
40545
+
40546
+
40547
+
40548
+ Training...: 39% 1705/4393 [2:15:22<4:00:59, 5.38s/it]
40549
+ Step... (34775 | Loss: 0.018730444833636284, Learning Rate: 3.0759594665141776e-05, Gradient Norm: 0.3146304488182068)
40550
+ Step... (34800 | Loss: 0.0131835350766778, Learning Rate: 3.070909224334173e-05, Gradient Norm: 0.6344761252403259)
40551
+ Step... (34825 | Loss: 0.02017289213836193, Learning Rate: 3.065858254558407e-05, Gradient Norm: 0.2658047080039978)
40552
+ Step... (34850 | Loss: 0.011610460467636585, Learning Rate: 3.060808012378402e-05, Gradient Norm: 0.5844695568084717)
40553
+ Step... (34875 | Loss: 0.020308807492256165, Learning Rate: 3.055757770198397e-05, Gradient Norm: 0.3984432518482208)
40554
+ Step... (34900 | Loss: 0.008994593285024166, Learning Rate: 3.0507069823215716e-05, Gradient Norm: 0.6693220138549805)
40555
+ Step... (34925 | Loss: 0.024976585060358047, Learning Rate: 3.0456565582426265e-05, Gradient Norm: 0.31148794293403625)
40556
+ Step... (34950 | Loss: 0.01379681471735239, Learning Rate: 3.0406063160626218e-05, Gradient Norm: 0.6182345151901245)
40557
+ Step... (34975 | Loss: 0.015182623639702797, Learning Rate: 3.0355553462868556e-05, Gradient Norm: 0.24722100794315338)
40558
+ Step... (35000 | Loss: 0.0074134087190032005, Learning Rate: 3.030505104106851e-05, Gradient Norm: 0.35679754614830017)
40559
+ Step... (35025 | Loss: 0.012640486471354961, Learning Rate: 3.0254548619268462e-05, Gradient Norm: 0.3233649730682373)
40560
+ Step... (35050 | Loss: 0.003796222386881709, Learning Rate: 3.02040389215108e-05, Gradient Norm: 0.23635932803153992)
40561
+ Step... (35075 | Loss: 0.015530078671872616, Learning Rate: 3.0153536499710754e-05, Gradient Norm: 0.3063891530036926)
40562
+ Step... (35100 | Loss: 0.00287941237911582, Learning Rate: 3.0103026801953092e-05, Gradient Norm: 0.12883423268795013)
40563
+ Step... (35125 | Loss: 0.018001878634095192, Learning Rate: 3.0052524380153045e-05, Gradient Norm: 0.2948234975337982)
40564
+ Step... (35150 | Loss: 0.01038078498095274, Learning Rate: 3.0002020139363594e-05, Gradient Norm: 0.16426806151866913)
40565
+ Step... (35175 | Loss: 0.010202213190495968, Learning Rate: 2.9951512260595337e-05, Gradient Norm: 0.24624192714691162)
40566
+ Step... (35200 | Loss: 0.014662917703390121, Learning Rate: 2.990100983879529e-05, Gradient Norm: 0.9740239977836609)
40567
+ Step... (35225 | Loss: 0.010558203794062138, Learning Rate: 2.985050559800584e-05, Gradient Norm: 0.2638781666755676)
40568
+ Step... (35250 | Loss: 0.015992023050785065, Learning Rate: 2.979999771923758e-05, Gradient Norm: 0.25827088952064514)
40569
+ Step... (35275 | Loss: 0.005929975304752588, Learning Rate: 2.974949347844813e-05, Gradient Norm: 0.20532174408435822)
40570
+ Step... (35300 | Loss: 0.01789812184870243, Learning Rate: 2.9698991056648083e-05, Gradient Norm: 0.22047241032123566)
40571
+ Step... (35325 | Loss: 0.01096875499933958, Learning Rate: 2.964848135889042e-05, Gradient Norm: 0.7241865396499634)
40572
+ Step... (35350 | Loss: 0.013526245020329952, Learning Rate: 2.9597978937090375e-05, Gradient Norm: 0.2711643576622009)
40573
+ Step... (35375 | Loss: 0.00827824231237173, Learning Rate: 2.9547476515290327e-05, Gradient Norm: 0.18412812054157257)
40574
+ Step... (35400 | Loss: 0.01867770217359066, Learning Rate: 2.9496966817532666e-05, Gradient Norm: 0.32241126894950867)
40575
+ Step... (35425 | Loss: 0.007050258107483387, Learning Rate: 2.944646439573262e-05, Gradient Norm: 0.15257257223129272)
40576
+ Step... (35450 | Loss: 0.017707733437418938, Learning Rate: 2.9395960154943168e-05, Gradient Norm: 0.2753700911998749)
40577
+ Step... (35475 | Loss: 0.005322085693478584, Learning Rate: 2.934545227617491e-05, Gradient Norm: 0.17496798932552338)
40578
+ Step... (35500 | Loss: 0.009057429619133472, Learning Rate: 2.9294949854374863e-05, Gradient Norm: 0.25817573070526123)
40579
+ Step... (35525 | Loss: 0.013614678755402565, Learning Rate: 2.9244445613585413e-05, Gradient Norm: 0.26426923274993896)
40580
+ Step... (35550 | Loss: 0.010442844592034817, Learning Rate: 2.9193937734817155e-05, Gradient Norm: 0.2709563076496124)
40581
+ Step... (35575 | Loss: 0.005399641115218401, Learning Rate: 2.9143433494027704e-05, Gradient Norm: 0.1638725847005844)
40582
+ Step... (35600 | Loss: 0.00854143314063549, Learning Rate: 2.9092931072227657e-05, Gradient Norm: 0.14298127591609955)
40583
+ Step... (35625 | Loss: 0.010740221478044987, Learning Rate: 2.9042421374469995e-05, Gradient Norm: 0.21739308536052704)
40584
+ Step... (35650 | Loss: 0.013804024085402489, Learning Rate: 2.899191895266995e-05, Gradient Norm: 0.2226768285036087)
40585
+ Step... (35675 | Loss: 0.0050653815269470215, Learning Rate: 2.89414165308699e-05, Gradient Norm: 0.12603415548801422)
40586
+ Step... (35700 | Loss: 0.0073559898883104324, Learning Rate: 2.889090683311224e-05, Gradient Norm: 0.18728569149971008)
40587
+ Step... (35725 | Loss: 0.014252002350986004, Learning Rate: 2.8840404411312193e-05, Gradient Norm: 0.2678786516189575)
40588
+ Step... (35750 | Loss: 0.01553369965404272, Learning Rate: 2.8789900170522742e-05, Gradient Norm: 0.2801755666732788)
40589
+ Step... (35775 | Loss: 0.007932987995445728, Learning Rate: 2.8739392291754484e-05, Gradient Norm: 0.15107274055480957)
40590
+ Step... (35800 | Loss: 0.009900908917188644, Learning Rate: 2.8688889869954437e-05, Gradient Norm: 0.18677286803722382)
40591
+ Step... (35825 | Loss: 0.008868378587067127, Learning Rate: 2.8638385629164986e-05, Gradient Norm: 0.21733544766902924)
40592
+ Step... (35850 | Loss: 0.014373034238815308, Learning Rate: 2.858787775039673e-05, Gradient Norm: 0.321470707654953)
40593
+ Step... (35875 | Loss: 0.008444367907941341, Learning Rate: 2.8537373509607278e-05, Gradient Norm: 0.20683307945728302)
40594
+ Step... (35900 | Loss: 0.008994083851575851, Learning Rate: 2.848687108780723e-05, Gradient Norm: 0.15626375377178192)
40595
+ Step... (35925 | Loss: 0.008749552071094513, Learning Rate: 2.843636139004957e-05, Gradient Norm: 0.5908063054084778)
40596
+ Step... (35950 | Loss: 0.011478588916361332, Learning Rate: 2.8385858968249522e-05, Gradient Norm: 0.2266269028186798)
40597
+ Step... (35975 | Loss: 0.01196364313364029, Learning Rate: 2.8335356546449475e-05, Gradient Norm: 0.29479607939720154)
40598
+ Step... (36000 | Loss: 0.011318921111524105, Learning Rate: 2.8284846848691814e-05, Gradient Norm: 0.17441242933273315)
40599
+ Step... (36025 | Loss: 0.015252816490828991, Learning Rate: 2.8234344426891766e-05, Gradient Norm: 0.3211612403392792)
40600
+ Step... (36050 | Loss: 0.0149453179910779, Learning Rate: 2.8183840186102316e-05, Gradient Norm: 0.2780107855796814)
40601
+ Step... (36075 | Loss: 0.013392834924161434, Learning Rate: 2.8133332307334058e-05, Gradient Norm: 0.30323371291160583)
40602
+ Step... (36100 | Loss: 0.008990200236439705, Learning Rate: 2.808282988553401e-05, Gradient Norm: 0.1986197680234909)
40603
+ Step... (36125 | Loss: 0.007589063141494989, Learning Rate: 2.803232018777635e-05, Gradient Norm: 0.16976489126682281)
40604
+ Step... (36150 | Loss: 0.018465880304574966, Learning Rate: 2.7981817765976302e-05, Gradient Norm: 3.223573923110962)
40605
+ Step... (36175 | Loss: 0.017651699483394623, Learning Rate: 2.793131352518685e-05, Gradient Norm: 0.3118066191673279)
40606
+ Step... (36200 | Loss: 0.006813254207372665, Learning Rate: 2.7880805646418594e-05, Gradient Norm: 0.17273718118667603)
40607
+ Step... (36225 | Loss: 0.017117872834205627, Learning Rate: 2.7830301405629143e-05, Gradient Norm: 0.25965094566345215)
40608
+ Step... (36250 | Loss: 0.0120441485196352, Learning Rate: 2.7779798983829096e-05, Gradient Norm: 0.3207753300666809)
40609
+ Step... (36275 | Loss: 0.011607665568590164, Learning Rate: 2.7729289286071435e-05, Gradient Norm: 0.22257204353809357)
40610
+ Step... (36300 | Loss: 0.020630035549402237, Learning Rate: 2.7678786864271387e-05, Gradient Norm: 0.45128902792930603)
40611
+ Step... (36325 | Loss: 0.005392152350395918, Learning Rate: 2.762828444247134e-05, Gradient Norm: 0.1612943410873413)
40612
+ Step... (36350 | Loss: 0.014236119575798512, Learning Rate: 2.757777474471368e-05, Gradient Norm: 0.37850603461265564)
40613
+ Step... (36375 | Loss: 0.015626532956957817, Learning Rate: 2.7527272322913632e-05, Gradient Norm: 2.135969638824463)
40614
+ Step... (36400 | Loss: 0.011072629131376743, Learning Rate: 2.7476769901113585e-05, Gradient Norm: 0.16433218121528625)
40615
+ Step... (36425 | Loss: 0.014065195806324482, Learning Rate: 2.7426260203355923e-05, Gradient Norm: 0.26176708936691284)
40616
+ Step... (36450 | Loss: 0.017493167892098427, Learning Rate: 2.7375757781555876e-05, Gradient Norm: 0.24977616965770721)
40617
+ Step... (36475 | Loss: 0.009039318189024925, Learning Rate: 2.7325253540766425e-05, Gradient Norm: 0.1969795823097229)
40618
+ Step... (36500 | Loss: 0.020493078976869583, Learning Rate: 2.7274745661998168e-05, Gradient Norm: 0.3413366675376892)
40619
+ Step... (36525 | Loss: 0.011266868561506271, Learning Rate: 2.7224241421208717e-05, Gradient Norm: 0.2362344115972519)
40620
+ Step... (36550 | Loss: 0.010742920450866222, Learning Rate: 2.717373899940867e-05, Gradient Norm: 0.23466497659683228)
40621
+ Step... (36575 | Loss: 0.011458583176136017, Learning Rate: 2.712322930165101e-05, Gradient Norm: 0.20105010271072388)
40622
+ Step... (36600 | Loss: 0.00982366781681776, Learning Rate: 2.707272687985096e-05, Gradient Norm: 0.19666042923927307)
40623
+ Step... (36625 | Loss: 0.010288187302649021, Learning Rate: 2.7022224458050914e-05, Gradient Norm: 0.26822808384895325)
40624
+ Step... (36650 | Loss: 0.011828682385385036, Learning Rate: 2.6971714760293253e-05, Gradient Norm: 0.20932193100452423)
40625
+ Step... (36675 | Loss: 0.01743757352232933, Learning Rate: 2.6921212338493206e-05, Gradient Norm: 0.33511021733283997)
40626
+ Step... (36700 | Loss: 0.02315790392458439, Learning Rate: 2.6870708097703755e-05, Gradient Norm: 0.41521745920181274)
40627
+ Step... (36725 | Loss: 0.01171206310391426, Learning Rate: 2.6820200218935497e-05, Gradient Norm: 0.30450528860092163)
40628
+ Step... (36750 | Loss: 0.01481856219470501, Learning Rate: 2.676969779713545e-05, Gradient Norm: 0.6513252854347229)
40629
+ Step... (36775 | Loss: 0.010270772501826286, Learning Rate: 2.6719193556346e-05, Gradient Norm: 0.44218534231185913)
40630
+ Step... (36800 | Loss: 0.01345136296004057, Learning Rate: 2.666868567757774e-05, Gradient Norm: 0.2729993760585785)
40631
+
40632
+
40633
+
40634
+
40635
+
40636
+
40637
+
40638
+
40639
+
40640
+
40641
+
40642
+
40643
+
40644
+
40645
+
40646
+
40647
+
40648
+
40649
+
40650
+
40651
+
40652
+
40653
+
40654
+
40655
+
40656
+
40657
+
40658
+
40659
+
40660
+
40661
+
40662
+
40663
+
40664
+
40665
+
40666
+
40667
+
40668
+
40669
+
40670
+
40671
+
40672
+
40673
+
40674
+
40675
+
40676
+
40677
+
40678
+
40679
+
40680
+
40681
+
40682
+
40683
+
40684
+
40685
+
40686
+
40687
+
40688
+
40689
+
40690
+
40691
+
40692
+
40693
+
40694
+
40695
+
40696
+
40697
+
40698
+
40699
+
40700
+
40701
+
40702
+
40703
+
40704
+
40705
+
40706
+
40707
+
40708
+
40709
+
40710
+
40711
+
40712
+
40713
+
40714
+
40715
+
40716
+
40717
+
40718
+
40719
+
40720
+
40721
+
40722
+
40723
+
40724
+
40725
+
40726
+
40727
+
40728
+
40729
+
40730
+
40731
+
40732
+
40733
+
40734
+
40735
+
40736
+
40737
+
40738
+
40739
+
40740
+
40741
+
40742
+
40743
+
40744
+
40745
+
40746
+
40747
+
40748
+
40749
+
40750
+
40751
+
40752
+
40753
+
40754
+
40755
+
40756
+
40757
+
40758
+
40759
+
40760
+
40761
+
40762
+
40763
+
40764
+
40765
+
40766
+
40767
+
40768
+
40769
+
40770
+
40771
+
40772
+
40773
+
40774
+
40775
+
40776
+
40777
+
40778
+
40779
+
40780
+
40781
+
40782
+
40783
+
40784
+
40785
+
40786
+
40787
+
40788
+
40789
+
40790
+
40791
+
40792
+
40793
+
40794
+
40795
+
40796
+
40797
+
40798
+
40799
+
40800
+
40801
+
40802
+
40803
+
40804
+
40805
+
40806
+
40807
+
40808
+
40809
+
40810
+
40811
+
40812
+
40813
+
40814
+
40815
+
40816
+
40817
+
40818
+
40819
+
40820
+
40821
+
40822
+
40823
+
40824
+
40825
+
40826
+
40827
+
40828
+
40829
+
40830
+
40831
+
40832
+
40833
+
40834
+
40835
+
40836
+
40837
+
40838
+
40839
+
40840
+
40841
+
40842
+
40843
+
40844
+
40845
+
40846
+
40847
+
40848
+
40849
+
40850
+
40851
+
40852
+
40853
+
40854
+
40855
+
40856
+
40857
+
40858
+
40859
+
40860
+
40861
+
40862
+
40863
+
40864
+
40865
+
40866
+
40867
+
40868
+
40869
+
40870
+
40871
+
40872
+
40873
+
40874
+
40875
+
40876
+
40877
+
40878
+
40879
+
40880
+
40881
+
40882
+
40883
+
40884
+
40885
+
40886
+
40887
+
40888
+
40889
+
40890
+
40891
+
40892
+
40893
+
40894
+
40895
+
40896
+
40897
+
40898
+
40899
+
40900
+
40901
+
40902
+
40903
+
40904
+
40905
+
40906
+
40907
+
40908
+
40909
+
40910
+
40911
+
40912
+
40913
+
40914
+
40915
+
40916
+
40917
+
40918
+
40919
+
40920
+
40921
+
40922
+
40923
+
40924
+
40925
+
40926
+
40927
+
40928
+
40929
+
40930
+
40931
+
40932
+
40933
+
40934
+
40935
+
40936
+
40937
+
40938
+
40939
+
40940
+
40941
+
40942
+
40943
+
40944
+
40945
+
40946
+
40947
+
40948
+
40949
+
40950
+
40951
+
40952
+
40953
+
40954
+
40955
+
40956
+
40957
+
40958
+
40959
+
40960
+
40961
+
40962
+
40963
+
40964
+
40965
+
40966
+
40967
+
40968
+
40969
+
40970
+
40971
+
40972
+
40973
+
40974
+
40975
+
40976
+
40977
+
40978
+
40979
+
40980
+
40981
+
40982
+
40983
+
40984
+
40985
+
40986
+
40987
+
40988
+
40989
+
40990
+
40991
+
40992
+
40993
+
40994
+
40995
+
40996
+
40997
+
40998
+
40999
+
41000
+
41001
+
41002
+
41003
+
41004
+
41005
+
41006
+
41007
+
41008
+
41009
+
41010
+
41011
+
41012
+
41013
+
41014
+
41015
+
41016
+
41017
+
41018
+
41019
+
41020
+
41021
+
41022
+
41023
+
41024
+
41025
+
41026
+
41027
+
41028
+
41029
+
41030
+
41031
+
41032
+
41033
+
41034
+
41035
+
41036
+
41037
+
41038
+
41039
+
41040
+
41041
+
41042
+
41043
+
41044
+
41045
+
41046
+
41047
+
41048
+
41049
+
41050
+
41051
+
41052
+
41053
+
41054
+
41055
+
41056
+
41057
+
41058
+
41059
+
41060
+
41061
+
41062
+
41063
+
41064
+
41065
+
41066
+
41067
+
41068
+
41069
+
41070
+
41071
+
41072
+
41073
+
41074
+
41075
+
41076
+
41077
+
41078
+
41079
+
41080
+
41081
+
41082
+
41083
+
41084
+
41085
+
41086
+
41087
+
41088
+
41089
+
41090
+
41091
+
41092
+
41093
+
41094
+
41095
+
41096
+
41097
+
41098
+
41099
+
41100
+
41101
+
41102
+
41103
+
41104
+
41105
+
41106
+
41107
+
41108
+
41109
+
41110
+
41111
+
41112
+
41113
+
41114
+
41115
+
41116
+
41117
+
41118
+
41119
+
41120
+
41121
+
41122
+
41123
+
41124
+
41125
+
41126
+
41127
+
41128
+
41129
+
41130
+
41131
+
41132
+
41133
+
41134
+
41135
+
41136
+
41137
+
41138
+
41139
+
41140
+
41141
+
41142
+
41143
+
41144
+
41145
+
41146
+
41147
+
41148
+
41149
+
41150
+
41151
+
41152
+
41153
+
41154
+
41155
+
41156
+
41157
+
41158
+
41159
+
41160
+
41161
+
41162
+
41163
+
41164
+
41165
+
41166
+
41167
+
41168
+
41169
+
41170
+
41171
+
41172
+
41173
+
41174
+
41175
+
41176
+
41177
+
41178
+
41179
+
41180
+
41181
+
41182
+
41183
+
41184
+
41185
+
41186
+
41187
+
41188
+
41189
+
41190
+
41191
+
41192
+
41193
+
41194
+
41195
+
41196
+
41197
+
41198
+
41199
+
41200
+
41201
+
41202
+
41203
+
41204
+
41205
+
41206
+
41207
+
41208
+
41209
+
41210
+
41211
+
41212
+
41213
+
41214
+
41215
+
41216
+
41217
+
41218
+
41219
+
41220
+
41221
+
41222
+
41223
+
41224
+
41225
+
41226
+
41227
+
41228
+
41229
+
41230
+
41231
+
41232
+
41233
+
41234
+
41235
+
41236
+
41237
+
41238
+
41239
+
41240
+
41241
+
41242
+
41243
+
41244
+
41245
+
41246
+
41247
+
41248
+
41249
+
41250
+
41251
+
41252
+
41253
+
41254
+
41255
+
41256
+
41257
+
41258
+
41259
+
41260
+
41261
+
41262
+
41263
+
41264
+
41265
+
41266
+
41267
+
41268
+
41269
+
41270
+
41271
+
41272
+
41273
+
41274
+
41275
+
41276
+
41277
+
41278
+
41279
+
41280
+
41281
+
41282
+
41283
+
41284
+
41285
+
41286
+
41287
+
41288
+
41289
+
41290
+
41291
+
41292
+
41293
+
41294
+
41295
+
41296
+
41297
+
41298
+
41299
+
41300
+
41301
+
41302
+
41303
+
41304
+
41305
+
41306
+
41307
+
41308
+
41309
+
41310
+
41311
+
41312
+
41313
+
41314
+
41315
+
41316
+
41317
+
41318
+
41319
+
41320
+
41321
+
41322
+
41323
+
41324
+
41325
+
41326
+
41327
+
41328
+
41329
+
41330
+
41331
+
41332
+
41333
+
41334
+
41335
+
41336
+
41337
+
41338
+
41339
+
41340
+
41341
+
41342
+
41343
+
41344
+
41345
+
41346
+
41347
+
41348
+
41349
+
41350
+
41351
+
41352
+
41353
+
41354
+
41355
+
41356
+
41357
+
41358
+
41359
+
41360
+
41361
+
41362
+
41363
+
41364
+
41365
+
41366
+
41367
+
41368
+
41369
+
41370
+
41371
+
41372
+
41373
+
41374
+
41375
+
41376
+
41377
+
41378
+
41379
+
41380
+
41381
+
41382
+
41383
+
41384
+
41385
+
41386
+
41387
+
41388
+
41389
+
41390
+
41391
+
41392
+
41393
+
41394
+
41395
+
41396
+
41397
+
41398
+
41399
+
41400
+
41401
+
41402
+
41403
+
41404
+
41405
+
41406
+
41407
+
41408
+
41409
+
41410
+
41411
+
41412
+
41413
+
41414
+
41415
+
41416
+
41417
+
41418
+
41419
+
41420
+
41421
+
41422
+
41423
+
41424
+
41425
+
41426
+
41427
+
41428
+
41429
+
41430
+
41431
+
41432
+
41433
+
41434
+
41435
+
41436
+
41437
+
41438
+
41439
+
41440
+
41441
+
41442
+
41443
+
41444
+
41445
+
41446
+
41447
+
41448
+
41449
+
41450
+
41451
+
41452
+
41453
+
41454
+
41455
+
41456
+
41457
+
41458
+
41459
+
41460
+
41461
+
41462
+
41463
+
41464
+
41465
+
41466
+
41467
+
41468
+
41469
+
41470
+
41471
+
41472
+
41473
+
41474
+
41475
+
41476
+
41477
+
41478
+
41479
+
41480
+
41481
+
41482
+
41483
+
41484
+
41485
+
41486
+
41487
+
41488
+
41489
+
41490
+
41491
+
41492
+
41493
+
41494
+
41495
+
41496
+
41497
+
41498
+
41499
+
41500
+
41501
+
41502
+
41503
+
41504
+
41505
+
41506
+
41507
+
41508
+
41509
+
41510
+
41511
+
41512
+
41513
+
41514
+
41515
+
41516
+
41517
+
41518
+
41519
+
41520
+
41521
+
41522
+
41523
+
41524
+
41525
+
41526
+
41527
+
41528
+
41529
+
41530
+
41531
+
41532
+
41533
+
41534
+
41535
+
41536
+
41537
+
41538
+
41539
+
41540
+
41541
+
41542
+
41543
+
41544
+
41545
+
41546
+
41547
+
41548
+
41549
+
41550
+
41551
+
41552
+
41553
+
41554
+
41555
+
41556
+
41557
+
41558
+
41559
+
41560
+
41561
+
41562
+
41563
+
41564
+
41565
+
41566
+
41567
+
41568
+
41569
+
41570
+
41571
+
41572
+
41573
+
41574
+
41575
+
41576
+
41577
+
41578
+
41579
+
41580
+
41581
+
41582
+
41583
+
41584
+
41585
+
41586
+
41587
+
41588
+
41589
+
41590
+
41591
+
41592
+
41593
+
41594
+
41595
+
41596
+
41597
+
41598
+
41599
+
41600
+
41601
+
41602
+
41603
+
41604
+
41605
+
41606
+
41607
+
41608
+
41609
+
41610
+
41611
+
41612
+
41613
+
41614
+
41615
+
41616
+
41617
+
41618
+
41619
+
41620
+
41621
+
41622
+
41623
+
41624
+
41625
+
41626
+
41627
+
41628
+
41629
+
41630
+
41631
+
41632
+
41633
+
41634
+
41635
+
41636
+
41637
+
41638
+
41639
+
41640
+
41641
+
41642
+
41643
+
41644
+
41645
+
41646
+
41647
+
41648
+
41649
+
41650
+
41651
+
41652
+
41653
+
41654
+
41655
+
41656
+
41657
+
41658
+
41659
+
41660
+
41661
+
41662
+
41663
+
41664
+
41665
+
41666
+
41667
+
41668
+
41669
+
41670
+
41671
+
41672
+
41673
+
41674
+
41675
+
41676
+
41677
+
41678
+
41679
+
41680
+
41681
+
41682
+
41683
+
41684
+
41685
+
41686
+
41687
+
41688
+
41689
+
41690
+
41691
+
41692
+
41693
+
41694
+
41695
+
41696
+
41697
+
41698
+
41699
+
41700
+
41701
+
41702
+
41703
+
41704
+
41705
+
41706
+
41707
+
41708
+
41709
+
41710
+
41711
+
41712
+
41713
+
41714
+
41715
+
41716
+
41717
+
41718
+
41719
+
41720
+
41721
+
41722
+
41723
+
41724
+
41725
+
41726
+
41727
+
41728
+
41729
+
41730
+
41731
+
41732
+
41733
+
41734
+
41735
+
41736
+
41737
+
41738
+
41739
+
41740
+
41741
+
41742
+
41743
+
41744
+
41745
+
41746
+
41747
+
41748
+
41749
+
41750
+
41751
+
41752
+
41753
+
41754
+
41755
+
41756
+
41757
+
41758
+
41759
+
41760
+
41761
+
41762
+
41763
+
41764
+
41765
+
41766
+
41767
+
41768
+
41769
+
41770
+
41771
+
41772
+
41773
+
41774
+
41775
+
41776
+
41777
+
41778
+
41779
+
41780
+
41781
+
41782
+
41783
+
41784
+
41785
+
41786
+
41787
+
41788
+
41789
+
41790
+
41791
+
41792
+
41793
+
41794
+
41795
+
41796
+
41797
+
41798
+
41799
+
41800
+
41801
+
41802
+
41803
+
41804
+
41805
+
41806
+
41807
+
41808
+
41809
+
41810
+
41811
+
41812
+
41813
+
41814
+
41815
+
41816
+
41817
+
41818
+
41819
+
41820
+
41821
+
41822
+
41823
+
41824
+
41825
+
41826
+
41827
+
41828
+
41829
+
41830
+
41831
+
41832
+
41833
+
41834
+
41835
+
41836
+
41837
+
41838
+
41839
+
41840
+
41841
+
41842
+
41843
+
41844
+
41845
+
41846
+
41847
+
41848
+
41849
+
41850
+
41851
+
41852
+
41853
+
41854
+
41855
+
41856
+
41857
+
41858
+
41859
+
41860
+
41861
+
41862
+
41863
+
41864
+
41865
+
41866
+
41867
+
41868
+
41869
+
41870
+
41871
+
41872
+
41873
+
41874
+
41875
+
41876
+
41877
+
41878
+
41879
+
41880
+
41881
+
41882
+
41883
+
41884
+
41885
+
41886
+
41887
+
41888
+
41889
+
41890
+
41891
+
41892
+
41893
+
41894
+
41895
+
41896
+
41897
+
41898
+
41899
+
41900
+
41901
+
41902
+
41903
+
41904
+
41905
+
41906
+
41907
+
41908
+
41909
+
41910
+
41911
+
41912
+
41913
+
41914
+
41915
+
41916
+
41917
+
41918
+
41919
+
41920
+
41921
+
41922
+
41923
+
41924
+
41925
+
41926
+
41927
+
41928
+
41929
+
41930
+
41931
+
41932
+
41933
+
41934
+
41935
+
41936
+
41937
+
41938
+
41939
+
41940
+
41941
+
41942
+
41943
+
41944
+
41945
+
41946
+
41947
+
41948
+
41949
+
41950
+
41951
+
41952
+
41953
+
41954
+
41955
+
41956
+
41957
+
41958
+
41959
+
41960
+
41961
+
41962
+
41963
+
41964
+
41965
+
41966
+
41967
+
41968
+
41969
+
41970
+
41971
+
41972
+
41973
+
41974
+
41975
+
41976
+
41977
+
41978
+
41979
+
41980
+
41981
+
41982
+
41983
+
41984
+
41985
+
41986
+
41987
+
41988
+
41989
+
41990
+
41991
+
41992
+
41993
+
41994
+
41995
+
41996
+
41997
+
41998
+
41999
+
42000
+
42001
+
42002
+
42003
+
42004
+
42005
+
42006
+
42007
+
42008
+
42009
+
42010
+
42011
+
42012
+
42013
+
42014
+
42015
+
42016
+
42017
+
42018
+
42019
+
42020
+
42021
+
42022
+
42023
+
42024
+
42025
+
42026
+
42027
+
42028
+
42029
+
42030
+
42031
+
42032
+
42033
+
42034
+
42035
+
42036
+
42037
+
42038
+
42039
+
42040
+
42041
+
42042
+
42043
+
42044
+
42045
+
42046
+
42047
+
42048
+
42049
+
42050
+
42051
+
42052
+
42053
+
42054
+
42055
+
42056
+
42057
+
42058
+
42059
+
42060
+
42061
+
42062
+
42063
+
42064
+
42065
+
42066
+
42067
+
42068
+
42069
+
42070
+
42071
+
42072
+
42073
+
42074
+
42075
+
42076
+
42077
+
42078
+
42079
+
42080
+
42081
+
42082
+
42083
+
42084
+
42085
+
42086
+
42087
+
42088
+
42089
+
42090
+
42091
+
42092
+
42093
+
42094
+
42095
+
42096
+
42097
+
42098
+
42099
+
42100
+
42101
+
42102
+
42103
+
42104
+
42105
+
42106
+
42107
+
42108
+
42109
+
42110
+
42111
+
42112
+
42113
+
42114
+
42115
+
42116
+
42117
+
42118
+
42119
+
42120
+
42121
+
42122
+
42123
+
42124
+
42125
+
42126
+
42127
+
42128
+
42129
+
42130
+
42131
+
42132
+
42133
+
42134
+
42135
+
42136
+
42137
+
42138
+
42139
+
42140
+
42141
+
42142
+
42143
+
42144
+
42145
+
42146
+
42147
+
42148
+
42149
+
42150
+
42151
+
42152
+
42153
+
42154
+
42155
+
42156
+
42157
+
42158
+
42159
+
42160
+
42161
+
42162
+
42163
+
42164
+
42165
+
42166
+
42167
+
42168
+
42169
+
42170
+
42171
+
42172
+
42173
+
42174
+
42175
+
42176
+
42177
+
42178
+
42179
+
42180
+
42181
+
42182
+
42183
+
42184
+
42185
+
42186
+
42187
+
42188
+
42189
+
42190
+
42191
+
42192
+
42193
+
42194
+
42195
+
42196
+
42197
+
42198
+
42199
+
42200
+
42201
+
42202
+
42203
+
42204
+
42205
+
42206
+
42207
+
42208
+
42209
+
42210
+
42211
+
42212
+
42213
+
42214
+
42215
+
42216
+
42217
+
42218
+
42219
+
42220
+
42221
+
42222
+
42223
+
42224
+
42225
+
42226
+
42227
+
42228
+
42229
+
42230
+
42231
+
42232
+
42233
+
42234
+
42235
+
42236
+
42237
+
42238
+
42239
+
42240
+
42241
+
42242
+
42243
+
42244
+
42245
+
42246
+
42247
+
42248
+
42249
+
42250
+
42251
+
42252
+
42253
+
42254
+
42255
+
42256
+
42257
+
42258
+
42259
+
42260
+
42261
+
42262
+
42263
+
42264
+
42265
+
42266
+
42267
+
42268
+
42269
+
42270
+
42271
+
42272
+
42273
+
42274
+
42275
+
42276
+
42277
+
42278
+
42279
+
42280
+
42281
+
42282
+
42283
+
42284
+
42285
+
42286
+
42287
+
42288
+
42289
+
42290
+
42291
+
42292
+ Training...: 78% 3405/4393 [4:30:03<1:28:38, 5.38s/it]
42293
+ Step... (36850 | Loss: 0.020339932292699814, Learning Rate: 2.6567679014988244e-05, Gradient Norm: 0.28321829438209534)
42294
+ Step... (36875 | Loss: 0.010295059531927109, Learning Rate: 2.6517169317230582e-05, Gradient Norm: 0.22561118006706238)
42295
+ Step... (36900 | Loss: 0.02300870791077614, Learning Rate: 2.6466666895430535e-05, Gradient Norm: 0.3506458103656769)
42296
+ Step... (36925 | Loss: 0.008988190442323685, Learning Rate: 2.6416164473630488e-05, Gradient Norm: 0.22951672971248627)
42297
+ Step... (36950 | Loss: 0.017710519954562187, Learning Rate: 2.6365654775872827e-05, Gradient Norm: 0.2859334647655487)
42298
+ Step... (36975 | Loss: 0.011881349608302116, Learning Rate: 2.631515235407278e-05, Gradient Norm: 0.28052568435668945)
42299
+ Step... (37000 | Loss: 0.016069943085312843, Learning Rate: 2.626464811328333e-05, Gradient Norm: 0.22248497605323792)
42300
+ Step... (37025 | Loss: 0.008865948766469955, Learning Rate: 2.621414023451507e-05, Gradient Norm: 0.223648339509964)
42301
+ Step... (37050 | Loss: 0.013166670687496662, Learning Rate: 2.6163637812715024e-05, Gradient Norm: 0.24581901729106903)
42302
+ Step... (37075 | Loss: 0.010862416587769985, Learning Rate: 2.6113133571925573e-05, Gradient Norm: 0.21010161936283112)
42303
+ Step... (37100 | Loss: 0.012013047933578491, Learning Rate: 2.6062625693157315e-05, Gradient Norm: 0.2584199607372284)
42304
+ Step... (37125 | Loss: 0.007369975559413433, Learning Rate: 2.6012121452367865e-05, Gradient Norm: 0.32532358169555664)
42305
+ Step... (37150 | Loss: 0.018205538392066956, Learning Rate: 2.5961613573599607e-05, Gradient Norm: 0.3200891315937042)
42306
+ Step... (37175 | Loss: 0.008243363350629807, Learning Rate: 2.5911109332810156e-05, Gradient Norm: 0.2219228297472)
42307
+ Step... (37200 | Loss: 0.007743628229945898, Learning Rate: 2.586060691101011e-05, Gradient Norm: 0.20197027921676636)
42308
+ Step... (37225 | Loss: 0.009823272004723549, Learning Rate: 2.5810097213252448e-05, Gradient Norm: 0.16621631383895874)
42309
+ Step... (37250 | Loss: 0.01478399708867073, Learning Rate: 2.57595947914524e-05, Gradient Norm: 0.21830250322818756)
42310
+ Step... (37275 | Loss: 0.009942734614014626, Learning Rate: 2.5709092369652353e-05, Gradient Norm: 0.23925411701202393)
42311
+ Step... (37300 | Loss: 0.02040395885705948, Learning Rate: 2.5658582671894692e-05, Gradient Norm: 0.27813541889190674)
42312
+ Step... (37325 | Loss: 0.00806287582963705, Learning Rate: 2.5608080250094645e-05, Gradient Norm: 0.18820592761039734)
42313
+ Step... (37350 | Loss: 0.012719778344035149, Learning Rate: 2.5557577828294598e-05, Gradient Norm: 0.22716785967350006)
42314
+ Step... (37375 | Loss: 0.01238447055220604, Learning Rate: 2.5507068130536936e-05, Gradient Norm: 0.3181142807006836)
42315
+ Step... (37400 | Loss: 0.008483604528009892, Learning Rate: 2.545656570873689e-05, Gradient Norm: 0.14901268482208252)
42316
+ Step... (37425 | Loss: 0.009233653545379639, Learning Rate: 2.540606146794744e-05, Gradient Norm: 0.23794111609458923)
42317
+ Step... (37450 | Loss: 0.018127061426639557, Learning Rate: 2.535555358917918e-05, Gradient Norm: 0.27508270740509033)
42318
+ Step... (37475 | Loss: 0.010165028274059296, Learning Rate: 2.530504934838973e-05, Gradient Norm: 0.22427453100681305)
42319
+ Step... (37500 | Loss: 0.010916335508227348, Learning Rate: 2.5254546926589683e-05, Gradient Norm: 0.18438170850276947)
42320
+ Step... (37525 | Loss: 0.009773856028914452, Learning Rate: 2.520403722883202e-05, Gradient Norm: 0.23069195449352264)
42321
+ Step... (37550 | Loss: 0.016379348933696747, Learning Rate: 2.5153534807031974e-05, Gradient Norm: 0.2591167390346527)
42322
+ Step... (37575 | Loss: 0.014079141430556774, Learning Rate: 2.5103032385231927e-05, Gradient Norm: 0.2219134420156479)
42323
+ Step... (37600 | Loss: 0.010436130687594414, Learning Rate: 2.5052522687474266e-05, Gradient Norm: 0.20184342563152313)
42324
+ Step... (37625 | Loss: 0.02243305929005146, Learning Rate: 2.500202026567422e-05, Gradient Norm: 0.385381817817688)
42325
+ Step... (37650 | Loss: 0.009557241573929787, Learning Rate: 2.495151784387417e-05, Gradient Norm: 0.18861576914787292)
42326
+ Step... (37675 | Loss: 0.02001246064901352, Learning Rate: 2.490100814611651e-05, Gradient Norm: 0.3057927191257477)
42327
+ Step... (37700 | Loss: 0.015565990470349789, Learning Rate: 2.4850505724316463e-05, Gradient Norm: 0.26939934492111206)
42328
+ Step... (37725 | Loss: 0.010391747578978539, Learning Rate: 2.4800001483527012e-05, Gradient Norm: 0.2478579878807068)
42329
+ Step... (37750 | Loss: 0.012761175632476807, Learning Rate: 2.4749493604758754e-05, Gradient Norm: 0.2503322958946228)
42330
+ Step... (37775 | Loss: 0.007083362899720669, Learning Rate: 2.4698989363969304e-05, Gradient Norm: 0.20680712163448334)
42331
+ Step... (37800 | Loss: 0.01457024272531271, Learning Rate: 2.4648486942169257e-05, Gradient Norm: 0.21773581206798553)
42332
+ Step... (37825 | Loss: 0.016917739063501358, Learning Rate: 2.4597977244411595e-05, Gradient Norm: 0.4584696590900421)
42333
+ Step... (37850 | Loss: 0.020319297909736633, Learning Rate: 2.4547474822611548e-05, Gradient Norm: 0.28668245673179626)
42334
+ Step... (37875 | Loss: 0.007732781581580639, Learning Rate: 2.44969724008115e-05, Gradient Norm: 0.22831612825393677)
42335
+ Step... (37900 | Loss: 0.010798927396535873, Learning Rate: 2.444646270305384e-05, Gradient Norm: 0.19562046229839325)
42336
+ Step... (37925 | Loss: 0.0118245929479599, Learning Rate: 2.4395960281253792e-05, Gradient Norm: 0.378909707069397)
42337
+ Step... (37950 | Loss: 0.011480288580060005, Learning Rate: 2.434545604046434e-05, Gradient Norm: 0.18696899712085724)
42338
+ Step... (37975 | Loss: 0.01095297746360302, Learning Rate: 2.4294948161696084e-05, Gradient Norm: 0.20788218080997467)
42339
+ Step... (38000 | Loss: 0.014074883423745632, Learning Rate: 2.4244445739896037e-05, Gradient Norm: 0.2136598378419876)
42340
+ Step... (38025 | Loss: 0.00818438921123743, Learning Rate: 2.4193941499106586e-05, Gradient Norm: 0.18253900110721588)
42341
+ Step... (38050 | Loss: 0.016727542504668236, Learning Rate: 2.4143433620338328e-05, Gradient Norm: 0.2901366949081421)
42342
+ Step... (38075 | Loss: 0.007837343961000443, Learning Rate: 2.4092929379548877e-05, Gradient Norm: 0.19501838088035583)
42343
+ Step... (38100 | Loss: 0.008517307229340076, Learning Rate: 2.404242695774883e-05, Gradient Norm: 0.2721792459487915)
42344
+ Step... (38125 | Loss: 0.009104839526116848, Learning Rate: 2.399191725999117e-05, Gradient Norm: 0.4130169749259949)
42345
+ Step... (38150 | Loss: 0.0076513695530593395, Learning Rate: 2.3941414838191122e-05, Gradient Norm: 0.15972954034805298)
42346
+ Step... (38175 | Loss: 0.012398313730955124, Learning Rate: 2.389090514043346e-05, Gradient Norm: 0.2811894714832306)
42347
+ Step... (38200 | Loss: 0.01311738695949316, Learning Rate: 2.3840402718633413e-05, Gradient Norm: 0.20744727551937103)
42348
+ Step... (38225 | Loss: 0.013343630358576775, Learning Rate: 2.3789900296833366e-05, Gradient Norm: 0.23322346806526184)
42349
+ Step... (38250 | Loss: 0.013739443384110928, Learning Rate: 2.3739390599075705e-05, Gradient Norm: 0.3291863799095154)
42350
+ Step... (38275 | Loss: 0.008902786299586296, Learning Rate: 2.3688888177275658e-05, Gradient Norm: 0.18816299736499786)
42351
+ Step... (38300 | Loss: 0.029258565977215767, Learning Rate: 2.363838575547561e-05, Gradient Norm: 0.3861159086227417)
42352
+ Step... (38325 | Loss: 0.009011116810142994, Learning Rate: 2.358787605771795e-05, Gradient Norm: 0.34315818548202515)
42353
+ Step... (38350 | Loss: 0.012569929473102093, Learning Rate: 2.3537373635917902e-05, Gradient Norm: 0.23500430583953857)
42354
+ Step... (38375 | Loss: 0.005166625138372183, Learning Rate: 2.348686939512845e-05, Gradient Norm: 0.17557674646377563)
42355
+ Step... (38400 | Loss: 0.01499476283788681, Learning Rate: 2.3436361516360193e-05, Gradient Norm: 0.2289804071187973)
42356
+ Step... (38425 | Loss: 0.006382503546774387, Learning Rate: 2.3385857275570743e-05, Gradient Norm: 0.1602679044008255)
42357
+ Step... (38450 | Loss: 0.010056969709694386, Learning Rate: 2.3335354853770696e-05, Gradient Norm: 0.18118983507156372)
42358
+ Step... (38475 | Loss: 0.008315986953675747, Learning Rate: 2.3284845156013034e-05, Gradient Norm: 0.18881313502788544)
42359
+ Step... (38500 | Loss: 0.01467866450548172, Learning Rate: 2.3234342734212987e-05, Gradient Norm: 0.5424960255622864)
42360
+
42361
+
42362
+
42363
+
42364
+
42365
+
42366
+
42367
+
42368
+
42369
+
42370
+
42371
+
42372
+
42373
+
42374
+
42375
+
42376
+
42377
+
42378
+
42379
+
42380
+
42381
+
42382
+
42383
+
42384
+
42385
+
42386
+
42387
+
42388
+
42389
+
42390
+
42391
+
42392
+
42393
+
42394
+
42395
+
42396
+
42397
+
42398
+
42399
+
42400
+
42401
+
42402
+
42403
+
42404
+
42405
+
42406
+
42407
+
42408
+
42409
+
42410
+
42411
+
42412
+
42413
+
42414
+
42415
+
42416
+
42417
+
42418
+
42419
+
42420
+
42421
+
42422
+
42423
+
42424
+
42425
+
42426
+
42427
+
42428
+
42429
+
42430
+
42431
+
42432
+
42433
+
42434
+
42435
+
42436
+
42437
+
42438
+
42439
+
42440
+
42441
+
42442
+
42443
+
42444
+
42445
+
42446
+
42447
+
42448
+
42449
+
42450
+
42451
+
42452
+
42453
+
42454
+
42455
+
42456
+
42457
+
42458
+
42459
+
42460
+
42461
+
42462
+
42463
+
42464
+
42465
+
42466
+
42467
+
42468
+
42469
+
42470
+
42471
+
42472
+
42473
+
42474
+
42475
+
42476
+
42477
+
42478
+
42479
+
42480
+
42481
+
42482
+
42483
+
42484
+
42485
+
42486
+
42487
+
42488
+
42489
+
42490
+
42491
+
42492
+
42493
+
42494
+
42495
+
42496
+
42497
+
42498
+
42499
+
42500
+
42501
+
42502
+
42503
+
42504
+
42505
+
42506
+
42507
+
42508
+
42509
+
42510
+
42511
+
42512
+
42513
+
42514
+
42515
+
42516
+
42517
+
42518
+
42519
+
42520
+
42521
+
42522
+
42523
+
42524
+
42525
+
42526
+
42527
+
42528
+
42529
+
42530
+
42531
+
42532
+
42533
+
42534
+
42535
+
42536
+
42537
+
42538
+
42539
+
42540
+
42541
+
42542
+
42543
+
42544
+
42545
+
42546
+
42547
+
42548
+
42549
+
42550
+
42551
+
42552
+
42553
+
42554
+
42555
+
42556
+
42557
+
42558
+
42559
+
42560
+
42561
+
42562
+
42563
+
42564
+
42565
+
42566
+
42567
+
42568
+
42569
+
42570
+
42571
+
42572
+
42573
+
42574
+
42575
+
42576
+
42577
+
42578
+
42579
+
42580
+
42581
+
42582
+
42583
+
42584
+
42585
+
42586
+
42587
+
42588
+
42589
+
42590
+
42591
+
42592
+
42593
+
42594
+
42595
+
42596
+
42597
+
42598
+
42599
+
42600
+
42601
+
42602
+
42603
+
42604
+
42605
+
42606
+
42607
+
42608
+
42609
+
42610
+
42611
+
42612
+
42613
+
42614
+
42615
+
42616
+
42617
+
42618
+
42619
+
42620
+
42621
+
42622
+
42623
+
42624
+
42625
+
42626
+
42627
+
42628
+
42629
+
42630
+
42631
+
42632
+
42633
+
42634
+
42635
+
42636
+
42637
+
42638
+
42639
+
42640
+
42641
+
42642
+
42643
+
42644
+
42645
+
42646
+
42647
+
42648
+
42649
+
42650
+
42651
+
42652
+
42653
+
42654
+
42655
+
42656
+
42657
+
42658
+
42659
+
42660
+
42661
+
42662
+
42663
+
42664
+
42665
+
42666
+
42667
+
42668
+
42669
+
42670
+
42671
+
42672
+
42673
+
42674
+
42675
+
42676
+
42677
+
42678
+
42679
+
42680
+
42681
+
42682
+
42683
+
42684
+
42685
+
42686
+
42687
+
42688
+
42689
+
42690
+
42691
+
42692
+
42693
+
42694
+
42695
+
42696
+
42697
+
42698
+
42699
+
42700
+
42701
+
42702
+
42703
+
42704
+
42705
+
42706
+
42707
+
42708
+
42709
+
42710
+
42711
+
42712
+
42713
+
42714
+
42715
+
42716
+
42717
+
42718
+
42719
+
42720
+
42721
+
42722
+
42723
+
42724
+
42725
+
42726
+
42727
+
42728
+
42729
+
42730
+
42731
+
42732
+
42733
+
42734
+
42735
+
42736
+
42737
+
42738
+
42739
+
42740
+
42741
+
42742
+
42743
+
42744
+
42745
+
42746
+
42747
+
42748
+
42749
+
42750
+
42751
+
42752
+
42753
+
42754
+
42755
+
42756
+
42757
+
42758
+
42759
+
42760
+
42761
+
42762
+
42763
+
42764
+
42765
+
42766
+
42767
+
42768
+
42769
+
42770
+
42771
+
42772
+
42773
+
42774
+
42775
+
42776
+
42777
+
42778
+
42779
+
42780
+
42781
+
42782
+
42783
+
42784
+
42785
+
42786
+
42787
+
42788
+
42789
+
42790
+
42791
+
42792
+
42793
+
42794
+
42795
+
42796
+
42797
+
42798
+
42799
+
42800
+
42801
+
42802
+
42803
+
42804
+
42805
+
42806
+
42807
+
42808
+
42809
+
42810
+
42811
+
42812
+
42813
+
42814
+
42815
+
42816
+
42817
+
42818
+
42819
+
42820
+
42821
+
42822
+
42823
+
42824
+
42825
+
42826
+
42827
+
42828
+
42829
+
42830
+
42831
+
42832
+
42833
+
42834
+
42835
+
42836
+
42837
+
42838
+
42839
+
42840
+
42841
+
42842
+
42843
+
42844
+
42845
+
42846
+
42847
+
42848
+
42849
+
42850
+
42851
+
42852
+
42853
+
42854
+
42855
+
42856
+
42857
+
42858
+
42859
+
42860
+
42861
+
42862
+
42863
+
42864
+
42865
+
42866
+
42867
+
42868
+
42869
+
42870
+
42871
+
42872
+
42873
+
42874
+
42875
+
42876
+
42877
+
42878
+
42879
+
42880
+
42881
+
42882
+
42883
+
42884
+
42885
+
42886
+
42887
+
42888
+
42889
+
42890
+
42891
+
42892
+
42893
+
42894
+
42895
+
42896
+
42897
+
42898
+
42899
+
42900
+
42901
+
42902
+
42903
+
42904
+
42905
+
42906
+
42907
+
42908
+
42909
+
42910
+
42911
+
42912
+
42913
+
42914
+
42915
+
42916
+
42917
+
42918
+
42919
+
42920
+
42921
+
42922
+
42923
+
42924
+
42925
+
42926
+
42927
+
42928
+
42929
+
42930
+
42931
+
42932
+
42933
+
42934
+
42935
+
42936
+
42937
+
42938
+
42939
+
42940
+
42941
+
42942
+
42943
+
42944
+
42945
+
42946
+
42947
+
42948
+
42949
+
42950
+
42951
+
42952
+
42953
+
42954
+
42955
+
42956
+
42957
+
42958
+
42959
+
42960
+
42961
+
42962
+
42963
+
42964
+
42965
+
42966
+
42967
+
42968
+
42969
+
42970
+
42971
+
42972
+
42973
+
42974
+
42975
+
42976
+
42977
+
42978
+
42979
+
42980
+
42981
+
42982
+
42983
+
42984
+
42985
+
42986
+
42987
+
42988
+
42989
+
42990
+
42991
+
42992
+
42993
+
42994
+
42995
+
42996
+
42997
+
42998
+
42999
+
43000
+
43001
+
43002
+
43003
+
43004
+
43005
+
43006
+
43007
+
43008
+
43009
+
43010
+
43011
+
43012
+
43013
+
43014
+
43015
+
43016
+
43017
+
43018
+
43019
+
43020
+
43021
+
43022
+
43023
+
43024
+
43025
+
43026
+
43027
+
43028
+
43029
+
43030
+
43031
+
43032
+
43033
+
43034
+
43035
+
43036
+
43037
+
43038
+
43039
+
43040
+
43041
+
43042
+
43043
+
43044
+
43045
+
43046
+
43047
+
43048
+
43049
+
43050
+
43051
+
43052
+
43053
+
43054
+
43055
+
43056
+
43057
+
43058
+
43059
+
43060
+
43061
+
43062
+
43063
+
43064
+
43065
+
43066
+
43067
+
43068
+
43069
+
43070
+
43071
+
43072
+
43073
+
43074
+
43075
+
43076
+
43077
+
43078
+
43079
+
43080
+
43081
+
43082
+
43083
+
43084
+
43085
+
43086
+
43087
+
43088
+
43089
+
43090
+
43091
+
43092
+
43093
+
43094
+
43095
+
43096
+
43097
+
43098
+
43099
+
43100
+
43101
+
43102
+
43103
+
43104
+
43105
+
43106
+
43107
+
43108
+
43109
+
43110
+
43111
+
43112
+
43113
+
43114
+
43115
+
43116
+
43117
+
43118
+
43119
+
43120
+
43121
+
43122
+
43123
+
43124
+
43125
+
43126
+
43127
+
43128
+
43129
+
43130
+
43131
+
43132
+
43133
+
43134
+
43135
+
43136
+
43137
+
43138
+
43139
+
43140
+
43141
+
43142
+
43143
+
43144
+
43145
+
43146
+
43147
+
43148
+
43149
+
43150
+
43151
+
43152
+
43153
+
43154
+
43155
+
43156
+
43157
+
43158
+
43159
+
43160
+
43161
+
43162
+
43163
+
43164
+
43165
+
43166
+
43167
+
43168
+
43169
+
43170
+
43171
+
43172
+
43173
+
43174
+
43175
+
43176
+
43177
+
43178
+
43179
+
43180
+
43181
+
43182
+
43183
+
43184
+
43185
+
43186
+
43187
+
43188
+
43189
+
43190
+
43191
+
43192
+
43193
+
43194
+
43195
+
43196
+
43197
+
43198
+
43199
+
43200
+
43201
+
43202
+
43203
+
43204
+
43205
+
43206
+
43207
+
43208
+
43209
+
43210
+
43211
+
43212
+
43213
+
43214
+
43215
+
43216
+
43217
+
43218
+
43219
+
43220
+
43221
+
43222
+
43223
+
43224
+
43225
+
43226
+
43227
+
43228
+
43229
+
43230
+
43231
+
43232
+
43233
+
43234
+
43235
+
43236
+
43237
+
43238
+
43239
+
43240
+
43241
+
43242
+
43243
+
43244
+
43245
+
43246
+
43247
+
43248
+
43249
+
43250
+
43251
+
43252
+
43253
+
43254
+
43255
+
43256
+
43257
+
43258
+
43259
+
43260
+
43261
+
43262
+
43263
+
43264
+
43265
+
43266
+
43267
+
43268
+
43269
+
43270
+
43271
+
43272
+
43273
+
43274
+
43275
+
43276
+
43277
+
43278
+
43279
+
43280
+
43281
+
43282
+
43283
+
43284
+
43285
+
43286
+
43287
+
43288
+
43289
+
43290
+
43291
+
43292
+
43293
+
43294
+
43295
+
43296
+
43297
+
43298
+
43299
+
43300
+
43301
+
43302
+
43303
+
43304
+
43305
+
43306
+
43307
+
43308
+
43309
+
43310
+
43311
+
43312
+
43313
+
43314
+
43315
+
43316
+
43317
+
43318
+
43319
+
43320
+
43321
+
43322
+ Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 75% 9/12 [52:36:57<17:31:17, 21025.84s/it]
43323
+ Training...: 0% 0/4393 [00:00<?, ?it/s]
43324
+ Step... (38550 | Loss: 0.010347037576138973, Learning Rate: 2.313333061465528e-05, Gradient Norm: 0.2250284105539322)
43325
+ Step... (38575 | Loss: 0.0063611362129449844, Learning Rate: 2.308282819285523e-05, Gradient Norm: 0.1974717080593109)
43326
+ Step... (38600 | Loss: 0.004289443604648113, Learning Rate: 2.3032325771055184e-05, Gradient Norm: 0.09970905631780624)
43327
+ Step... (38625 | Loss: 0.011062589474022388, Learning Rate: 2.2981816073297523e-05, Gradient Norm: 0.2628202438354492)
43328
+ Step... (38650 | Loss: 0.010620958171784878, Learning Rate: 2.2931313651497476e-05, Gradient Norm: 0.21071377396583557)
43329
+ Step... (38675 | Loss: 0.007905133068561554, Learning Rate: 2.2880809410708025e-05, Gradient Norm: 0.1927347332239151)
43330
+ Step... (38700 | Loss: 0.021522454917430878, Learning Rate: 2.2830301531939767e-05, Gradient Norm: 0.26387181878089905)
43331
+ Step... (38725 | Loss: 0.01568610407412052, Learning Rate: 2.2779797291150317e-05, Gradient Norm: 0.3414469063282013)
43332
+ Step... (38750 | Loss: 0.007739794906228781, Learning Rate: 2.272929486935027e-05, Gradient Norm: 0.20845206081867218)
43333
+ Step... (38775 | Loss: 0.01500839926302433, Learning Rate: 2.2678785171592608e-05, Gradient Norm: 0.366758793592453)
43334
+ Step... (38800 | Loss: 0.01572476327419281, Learning Rate: 2.262828274979256e-05, Gradient Norm: 0.23734962940216064)
43335
+ Step... (38825 | Loss: 0.01844528131186962, Learning Rate: 2.2577780327992514e-05, Gradient Norm: 0.3101344704627991)
43336
+ Step... (38850 | Loss: 0.022008061408996582, Learning Rate: 2.2527270630234852e-05, Gradient Norm: 0.8459998965263367)
43337
+ Step... (38875 | Loss: 0.01427079550921917, Learning Rate: 2.2476768208434805e-05, Gradient Norm: 0.23986835777759552)
43338
+ Step... (38900 | Loss: 0.010430174879729748, Learning Rate: 2.2426265786634758e-05, Gradient Norm: 0.2690977454185486)
43339
+ Step... (38925 | Loss: 0.013039502315223217, Learning Rate: 2.2375756088877097e-05, Gradient Norm: 0.26950889825820923)
43340
+ Step... (38950 | Loss: 0.011772986501455307, Learning Rate: 2.232525366707705e-05, Gradient Norm: 0.22706244885921478)
43341
+ Step... (38975 | Loss: 0.012624364346265793, Learning Rate: 2.22747494262876e-05, Gradient Norm: 0.35651257634162903)
43342
+ Step... (39000 | Loss: 0.01489238440990448, Learning Rate: 2.222424154751934e-05, Gradient Norm: 0.2117861658334732)
43343
+ Step... (39025 | Loss: 0.02081136405467987, Learning Rate: 2.217373730672989e-05, Gradient Norm: 0.3005424439907074)
43344
+ Step... (39050 | Loss: 0.007413056679069996, Learning Rate: 2.2123234884929843e-05, Gradient Norm: 0.14772680401802063)
43345
+ Step... (39075 | Loss: 0.011335279792547226, Learning Rate: 2.2072725187172182e-05, Gradient Norm: 0.23882412910461426)
43346
+ Step... (39100 | Loss: 0.020389100536704063, Learning Rate: 2.2022222765372135e-05, Gradient Norm: 0.2939780652523041)
43347
+ Step... (39125 | Loss: 0.016199955716729164, Learning Rate: 2.1971720343572088e-05, Gradient Norm: 0.2736057639122009)
43348
+ Step... (39150 | Loss: 0.01839425601065159, Learning Rate: 2.1921210645814426e-05, Gradient Norm: 0.3050045967102051)
43349
+ Step... (39175 | Loss: 0.008332288824021816, Learning Rate: 2.187070822401438e-05, Gradient Norm: 0.18911497294902802)
43350
+ Step... (39200 | Loss: 0.0066859484650194645, Learning Rate: 2.1820198526256718e-05, Gradient Norm: 0.16664181649684906)
43351
+ Step... (39225 | Loss: 0.010230396874248981, Learning Rate: 2.176969610445667e-05, Gradient Norm: 0.29577866196632385)
43352
+ Step... (39250 | Loss: 0.009517351165413857, Learning Rate: 2.1719193682656623e-05, Gradient Norm: 0.17758044600486755)
43353
+ Step... (39275 | Loss: 0.006245317868888378, Learning Rate: 2.1668683984898962e-05, Gradient Norm: 0.19972631335258484)
43354
+ Step... (39300 | Loss: 0.013634399510920048, Learning Rate: 2.1618181563098915e-05, Gradient Norm: 0.22748829424381256)
43355
+ Step... (39325 | Loss: 0.006963997147977352, Learning Rate: 2.1567677322309464e-05, Gradient Norm: 0.2040238231420517)
43356
+ Step... (39350 | Loss: 0.022512728348374367, Learning Rate: 2.1517169443541206e-05, Gradient Norm: 0.30836740136146545)
43357
+ Step... (39375 | Loss: 0.011416936293244362, Learning Rate: 2.1466665202751756e-05, Gradient Norm: 0.2511473000049591)
43358
+ Step... (39400 | Loss: 0.016352824866771698, Learning Rate: 2.141616278095171e-05, Gradient Norm: 0.2511434555053711)
43359
+ Step... (39425 | Loss: 0.009906467981636524, Learning Rate: 2.1365653083194047e-05, Gradient Norm: 0.2822321951389313)
43360
+ Step... (39450 | Loss: 0.011319736018776894, Learning Rate: 2.1315150661394e-05, Gradient Norm: 0.2532801330089569)
43361
+ Step... (39475 | Loss: 0.008581236936151981, Learning Rate: 2.1264648239593953e-05, Gradient Norm: 0.1751236915588379)
43362
+ Step... (39500 | Loss: 0.01655127853155136, Learning Rate: 2.121413854183629e-05, Gradient Norm: 0.23636147379875183)
43363
+
43364
+
43365
+
43366
+
43367
+
43368
+
43369
+
43370
+
43371
+
43372
+
43373
+
43374
+
43375
+
43376
+
43377
+
43378
+
43379
+
43380
+
43381
+
43382
+
43383
+
43384
+
43385
+
43386
+
43387
+
43388
+
43389
+
43390
+
43391
+
43392
+
43393
+
43394
+
43395
+
43396
+
43397
+
43398
+
43399
+
43400
+
43401
+
43402
+
43403
+
43404
+
43405
+
43406
+
43407
+
43408
+
43409
+
43410
+
43411
+
43412
+
43413
+
43414
+
43415
+
43416
+
43417
+
43418
+
43419
+
43420
+
43421
+
43422
+
43423
+
43424
+
43425
+
43426
+
43427
+
43428
+
43429
+
43430
+
43431
+
43432
+
43433
+
43434
+
43435
+
43436
+
43437
+
43438
+
43439
+
43440
+
43441
+
43442
+
43443
+
43444
+
43445
+
43446
+
43447
+
43448
+
43449
+
43450
+
43451
+
43452
+
43453
+
43454
+
43455
+
43456
+
43457
+
43458
+
43459
+
43460
+
43461
+
43462
+
43463
+
43464
+
43465
+
43466
+
43467
+
43468
+
43469
+
43470
+
43471
+
43472
+
43473
+
43474
+
43475
+
43476
+
43477
+
43478
+
43479
+
43480
+
43481
+
43482
+
43483
+
43484
+
43485
+
43486
+
43487
+
43488
+
43489
+
43490
+
43491
+
43492
+
43493
+
43494
+
43495
+
43496
+
43497
+
43498
+
43499
+
43500
+
43501
+
43502
+
43503
+
43504
+
43505
+
43506
+
43507
+
43508
+
43509
+
43510
+
43511
+
43512
+
43513
+
43514
+
43515
+
43516
+
43517
+
43518
+
43519
+
43520
+
43521
+
43522
+
43523
+
43524
+
43525
+
43526
+
43527
+
43528
+
43529
+
43530
+
43531
+
43532
+
43533
+
43534
+
43535
+
43536
+
43537
+
43538
+
43539
+
43540
+
43541
+
43542
+
43543
+
43544
+
43545
+
43546
+
43547
+
43548
+
43549
+
43550
+
43551
+
43552
+
43553
+
43554
+
43555
+
43556
+
43557
+
43558
+
43559
+
43560
+
43561
+
43562
+
43563
+
43564
+
43565
+
43566
+
43567
+
43568
+
43569
+
43570
+
43571
+
43572
+
43573
+
43574
+
43575
+
43576
+
43577
+
43578
+
43579
+
43580
+
43581
+
43582
+
43583
+
43584
+
43585
+
43586
+
43587
+
43588
+
43589
+
43590
+
43591
+
43592
+
43593
+
43594
+
43595
+
43596
+
43597
+
43598
+
43599
+
43600
+
43601
+
43602
+
43603
+
43604
+
43605
+
43606
+
43607
+
43608
+
43609
+
43610
+
43611
+
43612
+
43613
+
43614
+
43615
+
43616
+
43617
+
43618
+
43619
+
43620
+
43621
+
43622
+
43623
+
43624
+
43625
+
43626
+
43627
+
43628
+
43629
+
43630
+
43631
+
43632
+
43633
+
43634
+
43635
+
43636
+
43637
+
43638
+
43639
+
43640
+
43641
+
43642
+
43643
+
43644
+
43645
+
43646
+
43647
+
43648
+
43649
+
43650
+
43651
+
43652
+
43653
+
43654
+
43655
+
43656
+
43657
+
43658
+
43659
+
43660
+
43661
+
43662
+
43663
+
43664
+
43665
+
43666
+
43667
+
43668
+
43669
+
43670
+
43671
+
43672
+
43673
+
43674
+
43675
+
43676
+
43677
+
43678
+
43679
+
43680
+
43681
+
43682
+
43683
+
43684
+
43685
+
43686
+
43687
+
43688
+
43689
+
43690
+
43691
+
43692
+
43693
+
43694
+
43695
+
43696
+
43697
+
43698
+
43699
+
43700
+
43701
+
43702
+
43703
+
43704
+
43705
+
43706
+
43707
+
43708
+
43709
+
43710
+
43711
+
43712
+
43713
+
43714
+
43715
+
43716
+
43717
+
43718
+
43719
+
43720
+
43721
+
43722
+
43723
+
43724
+
43725
+
43726
+
43727
+
43728
+
43729
+
43730
+
43731
+
43732
+
43733
+
43734
+
43735
+
43736
+
43737
+
43738
+
43739
+
43740
+
43741
+
43742
+
43743
+
43744
+
43745
+
43746
+
43747
+
43748
+
43749
+
43750
+
43751
+
43752
+
43753
+
43754
+
43755
+
43756
+
43757
+
43758
+
43759
+
43760
+
43761
+
43762
+
43763
+
43764
+
43765
+
43766
+
43767
+
43768
+
43769
+
43770
+
43771
+
43772
+
43773
+
43774
+
43775
+
43776
+
43777
+
43778
+
43779
+
43780
+
43781
+
43782
+
43783
+
43784
+
43785
+
43786
+
43787
+
43788
+
43789
+
43790
+
43791
+
43792
+
43793
+
43794
+
43795
+
43796
+
43797
+
43798
+
43799
+
43800
+
43801
+
43802
+
43803
+
43804
+
43805
+
43806
+
43807
+
43808
+
43809
+
43810
+
43811
+
43812
+
43813
+
43814
+ Training...: 11% 462/4393 [37:00<6:26:59, 5.91s/it]
43815
+ Evaluating ...: 0% 0/85 [00:00<?, ?it/s]
43816
+ Step... (39550 | Loss: 0.013957838527858257, Learning Rate: 2.1113133698236197e-05, Gradient Norm: 0.25816094875335693)
43817
+ Step... (39575 | Loss: 0.003900324460119009, Learning Rate: 2.1062624000478536e-05, Gradient Norm: 0.15411750972270966)
43818
+ Step... (39600 | Loss: 0.004219492897391319, Learning Rate: 2.101212157867849e-05, Gradient Norm: 0.1492801159620285)
43819
+ Step... (39625 | Loss: 0.007128617260605097, Learning Rate: 2.0961617337889038e-05, Gradient Norm: 0.21547576785087585)
43820
+ Step... (39650 | Loss: 0.012958021834492683, Learning Rate: 2.091110945912078e-05, Gradient Norm: 0.28005728125572205)
43821
+ Step... (39675 | Loss: 0.006572413258254528, Learning Rate: 2.086060521833133e-05, Gradient Norm: 0.3049915134906769)
43822
+ Step... (39700 | Loss: 0.008246519602835178, Learning Rate: 2.0810102796531282e-05, Gradient Norm: 0.2068808376789093)
43823
+ Step... (39725 | Loss: 0.004701581783592701, Learning Rate: 2.075959309877362e-05, Gradient Norm: 0.23778201639652252)
43824
+ Step... (39750 | Loss: 0.010084073059260845, Learning Rate: 2.0709090676973574e-05, Gradient Norm: 0.1955907642841339)
43825
+ Step... (39775 | Loss: 0.0037632507737725973, Learning Rate: 2.0658588255173527e-05, Gradient Norm: 0.18372221291065216)
43826
+ Step... (39800 | Loss: 0.009699777700006962, Learning Rate: 2.0608078557415865e-05, Gradient Norm: 0.19054998457431793)
43827
+ Step... (39825 | Loss: 0.0035127829760313034, Learning Rate: 2.0557576135615818e-05, Gradient Norm: 0.17126810550689697)
43828
+ Step... (39850 | Loss: 0.007305247243493795, Learning Rate: 2.050707371381577e-05, Gradient Norm: 0.15133488178253174)
43829
+ Step... (39875 | Loss: 0.007563646882772446, Learning Rate: 2.045656401605811e-05, Gradient Norm: 0.2595842778682709)
43830
+ Step... (39900 | Loss: 0.009252386167645454, Learning Rate: 2.0406061594258063e-05, Gradient Norm: 0.16340221464633942)
43831
+ Step... (39925 | Loss: 0.009359311312437057, Learning Rate: 2.0355557353468612e-05, Gradient Norm: 0.15072058141231537)
43832
+ Step... (39950 | Loss: 0.005342613440006971, Learning Rate: 2.0305049474700354e-05, Gradient Norm: 0.1631428301334381)
43833
+ Step... (39975 | Loss: 0.008378183469176292, Learning Rate: 2.0254545233910903e-05, Gradient Norm: 0.23551413416862488)
43834
+ /home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/jax_utils.py:312: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
43835
+ return jax.tree_map(pad, tree)
43836
+ /home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/jax_utils.py:321: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
43837
+ return out if static_return else jax.tree_map(unpad, out)
43838
+
43839
+
43840
+
43841
+
43842
+
43843
+
43844
+
43845
+
43846
+
43847
+
43848
+
43849
+
43850
+
43851
+
43852
+
43853
+
43854
+
43855
+
43856
+
43857
+
43858
+
43859
+
43860
+
43861
+
43862
+
43863
+
43864
+
43865
+
43866
+
43867
+
43868
+
43869
+
43870
+
43871
+
43872
+
43873
+
43874
+
43875
+
43876
+
43877
+
43878
+
43879
+
43880
+
43881
+
43882
+
43883
+
43884
+
43885
+
43886
+
43887
+
43888
+
43889
+
43890
+
43891
+
43892
+
43893
+
43894
+
43895
+
43896
+
43897
+
43898
+
43899
+
43900
+
43901
+
43902
+
43903
+
43904
+
43905
+
43906
+
43907
+
43908
+
43909
+
43910
+
43911
+
43912
+
43913
+
43914
+
43915
+
43916
+
43917
+
43918
+
43919
+
43920
+
43921
+
43922
+ device_metrics = jax.tree_map(lambda x: x[0], device_metrics)
43923
+ /home/sanchitgandhi/hf/lib/python3.8/site-packages/flax/training/common_utils.py:45: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
43924
+ return jax.tree_map(stack_args, *forest)
43925
+ run_flax_speech_recognition_seq2seq.py:1392: FutureWarning: jax.tree_map is deprecated, and will be removed in a future release. Use jax.tree_util.tree_map instead.
43926
+ eval_metrics = jax.tree_map(jnp.mean, eval_metrics)
43927
+ Step... (30000/50000 | Eval Loss: 0.9876799583435059 | Eval wer: 0.04759016212639241 | Eval cer: 0.035079873533571844 |): 75% 9/12 [53:21:16<17:31:17, 21025.84s/it]
43928
+ params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
43929
+ Configuration saved in /home/sanchitgandhi/flax-wav2vec2-2-bart-large-ls-960h-black-box/config.json
43930
+ tcmalloc: large alloc 2586787840 bytes == 0x46a526000 @ 0x7f7cba873680 0x7f7cba893bdd 0x7f7b690721ff 0x7f7b6908142c 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6908241d 0x7f7b6907c164 0x7f7b6907c91e 0x505166 0x56bbfa 0x569dba 0x5f6eb3 0x56cc1f 0x569dba 0x5f6eb3 0x56cc1f 0x5f6cd6 0x56bacd 0x569dba 0x50bca0 0x56cc1f 0x569dba 0x5f6eb3 0x56bacd 0x569dba 0x5f6eb3
43931
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43932
+ To disable this warning, you can either:
43933
+ - Avoid using `tokenizers` before the fork if possible
43934
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
43935
+ Model weights saved in /home/sanchitgandhi/flax-wav2vec2-2-bart-large-ls-960h-black-box/flax_model.msgpack
43936
+ tokenizer config file saved in ./tokenizer_config.json
43937
+ Special tokens file saved in ./special_tokens_map.json
43938
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43939
+ To disable this warning, you can either:
43940
+ - Avoid using `tokenizers` before the fork if possible
43941
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
43942
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43943
+ To disable this warning, you can either:
43944
+ - Avoid using `tokenizers` before the fork if possible
43945
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
43946
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43947
+ To disable this warning, you can either:
43948
+ - Avoid using `tokenizers` before the fork if possible
43949
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
43950
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43951
+ To disable this warning, you can either:
43952
+ - Avoid using `tokenizers` before the fork if possible
43953
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
43954
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43955
+ To disable this warning, you can either:
43956
+ - Avoid using `tokenizers` before the fork if possible
43957
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
43958
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43959
+ To disable this warning, you can either:
43960
+ - Avoid using `tokenizers` before the fork if possible
43961
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
43962
+ Adding files tracked by Git LFS: ['wandb/run-20220828_085247-2hx8pk65/logs/debug-internal.log']. This may take a bit of time if the files are large.
43963
+ WARNING:huggingface_hub.repository:Adding files tracked by Git LFS: ['wandb/run-20220828_085247-2hx8pk65/logs/debug-internal.log']. This may take a bit of time if the files are large.
43964
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43965
+ To disable this warning, you can either:
43966
+ - Avoid using `tokenizers` before the fork if possible
43967
+ - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
43968
+ huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
43969
  To disable this warning, you can either:
43970
  - Avoid using `tokenizers` before the fork if possible
wandb/run-20220828_085247-2hx8pk65/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/decoder_grad_norm": 0.17611896991729736, "train/decoder_param_norm": 1062.4339599609375, "train/encoder_grad_norm": 0.1704455018043518, "train/encoder_param_norm": 2322.47119140625, "train/grad_norm": 0.24509090185165405, "layer_grad_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 0.006716505624353886}, "embed_tokens": {"embedding": 0.0642298087477684}, "layernorm_embedding": {"bias": 0.002760232426226139, "scale": 0.002035953802987933}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 4.814476142200874e-06, "kernel": 0.009996717795729637}, "out_proj": {"bias": 0.007056824862957001, "kernel": 0.035733651369810104}, "q_proj": {"bias": 0.00045872520422562957, "kernel": 0.010119627229869366}, "v_proj": {"bias": 0.014002962969243526, "kernel": 0.028897128999233246}}, "encoder_attn_layer_norm": {"bias": 0.010591115802526474, "scale": 0.01078032236546278}, "fc1": {"bias": 0.004002838861197233, "kernel": 0.09873048961162567}, "fc2": {"bias": 0.010417253710329533, "kernel": 0.09346094727516174}, "final_layer_norm": {"bias": 0.02925114333629608, "scale": 0.025131691247224808}, "self_attn": {"k_proj": {"bias": 1.495998731115833e-06, "kernel": 0.009474096819758415}, "out_proj": {"bias": 0.016716178506612778, "kernel": 0.03129136934876442}, "q_proj": {"bias": 0.0007107162964530289, "kernel": 0.00879898015409708}, "v_proj": {"bias": 0.01823665015399456, "kernel": 0.04575955122709274}}, "self_attn_layer_norm": {"bias": 0.006624831352382898, "scale": 0.0074623520486056805}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 0.024443458765745163, "kernel": 0.04797496274113655}}, "1": {"conv": {"bias": 0.018976185470819473, "kernel": 0.03335544094443321}}, "2": {"conv": {"bias": 0.02025711163878441, "kernel": 0.05017269030213356}}}}, "encoder": {"layer_norm": {"bias": 0.09079575538635254, "scale": 0.0370769128203392}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 2.546356427046703e-06, "kernel": 0.018089286983013153}, "out_proj": {"bias": 0.002156489295884967, "kernel": 0.034899111837148666}, "q_proj": {"bias": 0.0026696091517806053, "kernel": 0.017847701907157898}, "v_proj": {"bias": 0.009628637693822384, "kernel": 0.03381096571683884}}, "feed_forward": {"intermediate_dense": {"bias": 0.004856355953961611, "kernel": 0.04390779137611389}, "output_dense": {"bias": 0.001986933406442404, "kernel": 0.03847503662109375}}, "final_layer_norm": {"bias": 0.02509871870279312, "scale": 0.028251897543668747}, "layer_norm": {"bias": 0.043583061546087265, "scale": 0.03362990543246269}}}, "pos_conv_embed": {"conv": {"bias": 0.0006979878526180983, "weight_g": 0.002805550117045641, "weight_v": 0.011207042261958122}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.0032904886174947023, "scale": 0.003584273625165224}, "projection": {"bias": 0.0010678826365619898, "kernel": 0.03113219328224659}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 58.647159576416016}, "embed_tokens": {"embedding": 628.4832763671875}, "layernorm_embedding": {"bias": 2.4181270599365234, "scale": 13.876160621643066}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 47.96188735961914, "kernel": 331.3609313964844}, "out_proj": {"bias": 6.137172698974609, "kernel": 227.73728942871094}, "q_proj": {"bias": 20.867891311645508, "kernel": 338.29461669921875}, "v_proj": {"bias": 3.5909812450408936, "kernel": 231.93850708007812}}, "encoder_attn_layer_norm": {"bias": 10.96327018737793, "scale": 57.53877639770508}, "fc1": {"bias": 26.014301300048828, "kernel": 345.999755859375}, "fc2": {"bias": 7.84496545791626, "kernel": 248.36624145507812}, "final_layer_norm": {"bias": 3.921175003051758, "scale": 63.50761413574219}, "self_attn": {"k_proj": {"bias": 59.543113708496094, "kernel": 279.90435791015625}, "out_proj": {"bias": 3.776594638824463, "kernel": 132.7462158203125}, "q_proj": {"bias": 32.19286346435547, "kernel": 283.0003662109375}, "v_proj": {"bias": 2.568603992462158, "kernel": 141.117431640625}}, "self_attn_layer_norm": {"bias": 8.895291328430176, "scale": 84.70443725585938}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 1.1401727199554443, "kernel": 62.00471878051758}}, "1": {"conv": {"bias": 1.3068256378173828, "kernel": 59.942413330078125}}, "2": {"conv": {"bias": 1.5166486501693726, "kernel": 59.78715133666992}}}}, "encoder": {"layer_norm": {"bias": 0.2932465970516205, "scale": 4.228818893432617}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 19.381807327270508, "kernel": 553.2210083007812}, "out_proj": {"bias": 16.85451316833496, "kernel": 705.1119995117188}, "q_proj": {"bias": 40.90638732910156, "kernel": 545.735107421875}, "v_proj": {"bias": 15.575094223022461, "kernel": 696.6250610351562}}, "feed_forward": {"intermediate_dense": {"bias": 24.463790893554688, "kernel": 1377.1727294921875}, "output_dense": {"bias": 20.80949592590332, "kernel": 1303.4677734375}}, "final_layer_norm": {"bias": 32.52007293701172, "scale": 141.95835876464844}, "layer_norm": {"bias": 7.280069828033447, "scale": 45.696510314941406}}}, "pos_conv_embed": {"conv": {"bias": 15.224443435668945, "weight_g": 21.051162719726562, "weight_v": 213.89393615722656}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.5982058644294739, "kernel": 8.08896541595459}, "layer_norm": {"bias": 10.069783210754395, "scale": 10.451257705688477}}, "1": {"conv": {"bias": 4.74075174331665, "kernel": 90.8435287475586}, "layer_norm": {"bias": 6.922820091247559, "scale": 19.5467586517334}}, "2": {"conv": {"bias": 6.7732415199279785, "kernel": 146.13897705078125}, "layer_norm": {"bias": 9.044225692749023, "scale": 19.424888610839844}}, "3": {"conv": {"bias": 5.224758148193359, "kernel": 159.10508728027344}, "layer_norm": {"bias": 8.319666862487793, "scale": 17.64743423461914}}, "4": {"conv": {"bias": 4.434978008270264, "kernel": 157.35813903808594}, "layer_norm": {"bias": 9.193974494934082, "scale": 15.562357902526855}}, "5": {"conv": {"bias": 5.297643661499023, "kernel": 131.1835174560547}, "layer_norm": {"bias": 10.735219955444336, "scale": 13.812533378601074}}, "6": {"conv": {"bias": 5.615579128265381, "kernel": 136.41822814941406}, "layer_norm": {"bias": 12.515308380126953, "scale": 11.152680397033691}}}}, "feature_projection": {"layer_norm": {"bias": 9.262188911437988, "scale": 27.640396118164062}, "projection": {"bias": 4.317654132843018, "kernel": 88.17610931396484}}, "masked_spec_embed": 26.247730255126953}}, "train/learning_rate": 4.045656169182621e-05, "train/loss": 0.02121199481189251, "train/param_norm": 2553.945556640625, "_timestamp": 1661823749, "_runtime": 146982, "_step": 29975, "eval/loss": 1.021510124206543, "eval/wer": 0.05054961214661226, "eval/cer": 0.0362100285658818, "eval/step_10k": {"_type": "table-file", "sha256": "8b44e8a00a036a18ffdf81b4d076c8bf849ea6649001c69e94fa439b14f110ee", "size": 26434, "artifact_path": "wandb-client-artifact://18m0dj4hts3yiat04x5pvmncavkjapd5wb8bznb37vw8c0lqna3m2yjd1wtdrfstuoo7ejt2sphvjo0zuw1e5ne5d3qbkd7c1fylclfggig6us5tsmsj2uum5pchx48n:latest/eval/step_10k.table.json", "_latest_artifact_path": "wandb-client-artifact://18m0dj4hts3yiat04x5pvmncavkjapd5wb8bznb37vw8c0lqna3m2yjd1wtdrfstuoo7ejt2sphvjo0zuw1e5ne5d3qbkd7c1fylclfggig6us5tsmsj2uum5pchx48n:latest/eval/step_10k.table.json", "path": "media/table/eval/step_10k_10000_8b44e8a00a036a18ffdf.table.json", "ncols": 7, "nrows": 50}, "eval/step_20k": {"_type": "table-file", "sha256": "a0a50c5d8793ca99e4646f70c3624f8742c5285825bc1c59ab4083ac4de9d6e3", "size": 26657, "artifact_path": "wandb-client-artifact://13ri9hnxp93kf7dsdol2hs1j0v7bpkwwvujpi27awdck0fjm6vfog0dun9k9toif5xrt3cijlotddakikiw0bnbo3go679b4d2spq9c0w865vq0k9auiszkkbvev62fc:latest/eval/step_20k.table.json", "_latest_artifact_path": "wandb-client-artifact://13ri9hnxp93kf7dsdol2hs1j0v7bpkwwvujpi27awdck0fjm6vfog0dun9k9toif5xrt3cijlotddakikiw0bnbo3go679b4d2spq9c0w865vq0k9auiszkkbvev62fc:latest/eval/step_20k.table.json", "path": "media/table/eval/step_20k_20000_a0a50c5d8793ca99e464.table.json", "ncols": 7, "nrows": 50}}
 
1
+ {"train/decoder_grad_norm": 0.18293321132659912, "train/decoder_param_norm": 1063.0654296875, "train/encoder_grad_norm": 0.148331880569458, "train/encoder_param_norm": 2323.336669921875, "train/grad_norm": 0.23551413416862488, "layer_grad_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 0.008046639151871204}, "embed_tokens": {"embedding": 0.060666970908641815}, "layernorm_embedding": {"bias": 0.003096886444836855, "scale": 0.0024350089952349663}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 5.256703389022732e-06, "kernel": 0.011271456256508827}, "out_proj": {"bias": 0.007880721241235733, "kernel": 0.03872065246105194}, "q_proj": {"bias": 0.0004976371419616044, "kernel": 0.01105893962085247}, "v_proj": {"bias": 0.015226359479129314, "kernel": 0.030586158856749535}}, "encoder_attn_layer_norm": {"bias": 0.01159473042935133, "scale": 0.012393548153340816}, "fc1": {"bias": 0.004439335782080889, "kernel": 0.10113218426704407}, "fc2": {"bias": 0.011387723498046398, "kernel": 0.09879624843597412}, "final_layer_norm": {"bias": 0.026725012809038162, "scale": 0.026244478300213814}, "self_attn": {"k_proj": {"bias": 1.8891130366682773e-06, "kernel": 0.010783703066408634}, "out_proj": {"bias": 0.016153542324900627, "kernel": 0.039619628340005875}, "q_proj": {"bias": 0.0008692306582815945, "kernel": 0.01054653525352478}, "v_proj": {"bias": 0.01716933213174343, "kernel": 0.04919762164354324}}, "self_attn_layer_norm": {"bias": 0.007215098012238741, "scale": 0.008393414318561554}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 0.022141050547361374, "kernel": 0.049536462873220444}}, "1": {"conv": {"bias": 0.017486272379755974, "kernel": 0.03386249393224716}}, "2": {"conv": {"bias": 0.020221581682562828, "kernel": 0.05212334543466568}}}}, "encoder": {"layer_norm": {"bias": 0.07752241939306259, "scale": 0.03339512273669243}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 1.787670839803468e-06, "kernel": 0.016130739822983742}, "out_proj": {"bias": 0.001517767203040421, "kernel": 0.0303343553096056}, "q_proj": {"bias": 0.002082030288875103, "kernel": 0.01582256518304348}, "v_proj": {"bias": 0.00666964752599597, "kernel": 0.027823949232697487}}, "feed_forward": {"intermediate_dense": {"bias": 0.002947209170088172, "kernel": 0.03786110132932663}, "output_dense": {"bias": 0.0014471356989815831, "kernel": 0.03480111435055733}}, "final_layer_norm": {"bias": 0.014765393920242786, "scale": 0.01634989120066166}, "layer_norm": {"bias": 0.03281298279762268, "scale": 0.024696579203009605}}}, "pos_conv_embed": {"conv": {"bias": 0.0003536914009600878, "weight_g": 0.0017076540971174836, "weight_v": 0.0059150331653654575}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.00199363986030221, "scale": 0.0019663891289383173}, "projection": {"bias": 0.000548368610907346, "kernel": 0.015020000748336315}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"decoder": {"model": {"decoder": {"embed_positions": {"embedding": 58.655296325683594}, "embed_tokens": {"embedding": 628.4320678710938}, "layernorm_embedding": {"bias": 2.423579692840576, "scale": 13.864307403564453}, "layers": {"FlaxBartDecoderLayers": {"encoder_attn": {"k_proj": {"bias": 47.95212173461914, "kernel": 331.52020263671875}, "out_proj": {"bias": 6.132379531860352, "kernel": 227.88941955566406}, "q_proj": {"bias": 20.880966186523438, "kernel": 338.4452819824219}, "v_proj": {"bias": 3.570829391479492, "kernel": 232.0838623046875}}, "encoder_attn_layer_norm": {"bias": 11.024534225463867, "scale": 57.68368148803711}, "fc1": {"bias": 26.054283142089844, "kernel": 346.79296875}, "fc2": {"bias": 7.841187953948975, "kernel": 248.94561767578125}, "final_layer_norm": {"bias": 3.914222002029419, "scale": 63.57343673706055}, "self_attn": {"k_proj": {"bias": 59.552547454833984, "kernel": 280.0233459472656}, "out_proj": {"bias": 3.7681872844696045, "kernel": 132.86544799804688}, "q_proj": {"bias": 32.200714111328125, "kernel": 283.1105651855469}, "v_proj": {"bias": 2.561887741088867, "kernel": 141.2297821044922}}, "self_attn_layer_norm": {"bias": 8.89633846282959, "scale": 84.71704864501953}}}}}}, "encoder": {"adapter": {"layers": {"0": {"conv": {"bias": 1.1991273164749146, "kernel": 62.47041320800781}}, "1": {"conv": {"bias": 1.3806315660476685, "kernel": 60.54581069946289}}, "2": {"conv": {"bias": 1.5864932537078857, "kernel": 60.361602783203125}}}}, "encoder": {"layer_norm": {"bias": 0.2923617959022522, "scale": 4.219600677490234}, "layers": {"FlaxWav2Vec2EncoderLayers": {"attention": {"k_proj": {"bias": 19.385311126708984, "kernel": 553.5303955078125}, "out_proj": {"bias": 16.84536361694336, "kernel": 705.300537109375}, "q_proj": {"bias": 40.93048858642578, "kernel": 546.0474243164062}, "v_proj": {"bias": 15.559945106506348, "kernel": 696.8070068359375}}, "feed_forward": {"intermediate_dense": {"bias": 24.44249725341797, "kernel": 1377.5845947265625}, "output_dense": {"bias": 20.795345306396484, "kernel": 1303.997314453125}}, "final_layer_norm": {"bias": 32.52366256713867, "scale": 142.01145935058594}, "layer_norm": {"bias": 7.272017478942871, "scale": 45.770084381103516}}}, "pos_conv_embed": {"conv": {"bias": 15.211658477783203, "weight_g": 21.0489501953125, "weight_v": 214.11346435546875}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.5982058644294739, "kernel": 8.08896541595459}, "layer_norm": {"bias": 10.069783210754395, "scale": 10.451257705688477}}, "1": {"conv": {"bias": 4.74075174331665, "kernel": 90.8435287475586}, "layer_norm": {"bias": 6.922820091247559, "scale": 19.5467586517334}}, "2": {"conv": {"bias": 6.7732415199279785, "kernel": 146.13897705078125}, "layer_norm": {"bias": 9.044225692749023, "scale": 19.424888610839844}}, "3": {"conv": {"bias": 5.224758148193359, "kernel": 159.10508728027344}, "layer_norm": {"bias": 8.319666862487793, "scale": 17.64743423461914}}, "4": {"conv": {"bias": 4.434978008270264, "kernel": 157.35813903808594}, "layer_norm": {"bias": 9.193974494934082, "scale": 15.562357902526855}}, "5": {"conv": {"bias": 5.297643661499023, "kernel": 131.1835174560547}, "layer_norm": {"bias": 10.735219955444336, "scale": 13.812533378601074}}, "6": {"conv": {"bias": 5.615579128265381, "kernel": 136.41822814941406}, "layer_norm": {"bias": 12.515308380126953, "scale": 11.152680397033691}}}}, "feature_projection": {"layer_norm": {"bias": 9.234997749328613, "scale": 27.593135833740234}, "projection": {"bias": 4.323590278625488, "kernel": 88.11954498291016}}, "masked_spec_embed": 26.247730255126953}}, "train/learning_rate": 2.0254545233910903e-05, "train/loss": 0.008378183469176292, "train/param_norm": 2554.995361328125, "_timestamp": 1661871981, "_runtime": 195214, "_step": 39975, "eval/loss": 0.9876799583435059, "eval/wer": 0.04759016212639241, "eval/cer": 0.035079873533571844, "eval/step_10k": {"_type": "table-file", "sha256": "8b44e8a00a036a18ffdf81b4d076c8bf849ea6649001c69e94fa439b14f110ee", "size": 26434, "artifact_path": "wandb-client-artifact://18m0dj4hts3yiat04x5pvmncavkjapd5wb8bznb37vw8c0lqna3m2yjd1wtdrfstuoo7ejt2sphvjo0zuw1e5ne5d3qbkd7c1fylclfggig6us5tsmsj2uum5pchx48n:latest/eval/step_10k.table.json", "_latest_artifact_path": "wandb-client-artifact://18m0dj4hts3yiat04x5pvmncavkjapd5wb8bznb37vw8c0lqna3m2yjd1wtdrfstuoo7ejt2sphvjo0zuw1e5ne5d3qbkd7c1fylclfggig6us5tsmsj2uum5pchx48n:latest/eval/step_10k.table.json", "path": "media/table/eval/step_10k_10000_8b44e8a00a036a18ffdf.table.json", "ncols": 7, "nrows": 50}, "eval/step_20k": {"_type": "table-file", "sha256": "a0a50c5d8793ca99e4646f70c3624f8742c5285825bc1c59ab4083ac4de9d6e3", "size": 26657, "artifact_path": "wandb-client-artifact://13ri9hnxp93kf7dsdol2hs1j0v7bpkwwvujpi27awdck0fjm6vfog0dun9k9toif5xrt3cijlotddakikiw0bnbo3go679b4d2spq9c0w865vq0k9auiszkkbvev62fc:latest/eval/step_20k.table.json", "_latest_artifact_path": "wandb-client-artifact://13ri9hnxp93kf7dsdol2hs1j0v7bpkwwvujpi27awdck0fjm6vfog0dun9k9toif5xrt3cijlotddakikiw0bnbo3go679b4d2spq9c0w865vq0k9auiszkkbvev62fc:latest/eval/step_20k.table.json", "path": "media/table/eval/step_20k_20000_a0a50c5d8793ca99e464.table.json", "ncols": 7, "nrows": 50}, "eval/step_30k": {"_type": "table-file", "sha256": "509ad8614e16ae2800f100e82fd40356d3559e0425cf50e8e19c3155dc6c604e", "size": 26610, "artifact_path": "wandb-client-artifact://1v7mr89p2ldfoyyvrj6x6g0s8qpvgnioclm4puk5i0wphipeujkyna0xf7y63nt4a5qivkdzytt4lzhwuraoe53a5683ln1yzntyw9kh7vhcxz7oci70rpw62o1tz6ho:latest/eval/step_30k.table.json", "_latest_artifact_path": "wandb-client-artifact://1v7mr89p2ldfoyyvrj6x6g0s8qpvgnioclm4puk5i0wphipeujkyna0xf7y63nt4a5qivkdzytt4lzhwuraoe53a5683ln1yzntyw9kh7vhcxz7oci70rpw62o1tz6ho:latest/eval/step_30k.table.json", "path": "media/table/eval/step_30k_30000_509ad8614e16ae2800f1.table.json", "ncols": 7, "nrows": 50}}
wandb/run-20220828_085247-2hx8pk65/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220828_085247-2hx8pk65/run-2hx8pk65.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbd52b7487584ca71d80fed0e3182d1bd195ab787ea787199645db598d730074
3
- size 12917901
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:704bf667df7d300df731d8d0bb3301cfbf4b05488ba632c3bc0bcf3bd69bea8c
3
+ size 17153865