Spaces:

bankholdup
/

rugpt3_song_writer

Build error

App Files Files Community

bankholdup commited on Jan 28, 2022

Commit

dc60d3c

•

1 Parent(s): 41e6718

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -16

app.py CHANGED Viewed

@@ -13,13 +13,10 @@ def load_model():
   model = GPT2LMHeadModel.from_pretrained(model_ckpt)
   return tokenizer, model
-def set_seed(args):
-    rd = np.random.randint(100000)
-    print('seed =', rd)
     np.random.seed(rd)
     torch.manual_seed(rd)
-    if args.n_gpu > 0:
-        torch.cuda.manual_seed_all(rd)
 title = st.title("Загрузка модели")
 tokenizer, model = load_model()
@@ -28,38 +25,34 @@ context = st.text_input("Введите начало песни", "Как дел
 generated_sequences = []
 if st.button("Поехали", help="Может занять какое-то время"):
     prompt_text = f"{context}"
     encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt")
     output_sequences = model.generate(
             input_ids=encoded_prompt,
-            max_length=200 + len(encoded_prompt[0]),
-            temperature=0.95,
             top_k=50,
             top_p=0.95,
             repetition_penalty=1.0,
             do_sample=True,
             num_return_sequences=1,
         )
-    # Remove the batch dimension when returning multiple sequences
     if len(output_sequences.shape) > 2:
         output_sequences.squeeze_()
     for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
         print("ruGPT:".format(generated_sequence_idx + 1))
         generated_sequence = generated_sequence.tolist()
-        # Decode text
         text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
-        # Remove all text after the stop token
         text = text[: text.find("</s>") if "</s>" else None]
-        # Add the prompt at the beginning of the sequence. Remove the excess text that was used for pre-processing
         total_sequence = (
-            prompt_text + text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)) :] + "/n"
         )
-        generated_sequences.append(total_sequence)
         # os.system('clear')
         st.write(total_sequence)

   model = GPT2LMHeadModel.from_pretrained(model_ckpt)
   return tokenizer, model
+def set_seed(rng=100000):
+    rd = np.random.randint(rng)
     np.random.seed(rd)
     torch.manual_seed(rd)
 title = st.title("Загрузка модели")
 tokenizer, model = load_model()
 generated_sequences = []
 if st.button("Поехали", help="Может занять какое-то время"):
+    set_seed()
     prompt_text = f"{context}"
     encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt")
     output_sequences = model.generate(
             input_ids=encoded_prompt,
+            max_length=250 + len(encoded_prompt[0]),
+            temperature=1.95,
             top_k=50,
             top_p=0.95,
             repetition_penalty=1.0,
             do_sample=True,
             num_return_sequences=1,
         )
     if len(output_sequences.shape) > 2:
         output_sequences.squeeze_()
     for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
         print("ruGPT:".format(generated_sequence_idx + 1))
         generated_sequence = generated_sequence.tolist()
         text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
         text = text[: text.find("</s>") if "</s>" else None]
         total_sequence = (
+            prompt_text + text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True)) :]
         )
+        # generated_sequences.append(total_sequence)
         # os.system('clear')
         st.write(total_sequence)