SaviAnna commited on
Commit
78953d6
·
verified ·
1 Parent(s): aabd4e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -9
app.py CHANGED
@@ -31,16 +31,32 @@ def generate_text(model, tokenizer, prompt, max_len, temperature):
31
  input_ids = tokenizer.encode(prompt, return_tensors='pt')
32
 
33
  # Генерация текста
34
- output = model.generate(input_ids=input_ids,
35
- max_length=max_len,
36
- do_sample=True,
37
- temperature=temperature,
38
- top_k=50,
39
- top_p=0.6,
40
- no_repeat_ngram_size=3,
41
- num_return_sequences=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
44
  return generated_text
45
 
46
  # Streamlit приложение
 
31
  input_ids = tokenizer.encode(prompt, return_tensors='pt')
32
 
33
  # Генерация текста
34
+ # output = model.generate(input_ids=input_ids,
35
+ # max_length=max_len,
36
+ # do_sample=True,
37
+ # temperature=temperature,
38
+ # top_k=50,
39
+ # top_p=0.6,
40
+ # no_repeat_ngram_size=3,
41
+ # num_return_sequences=1,
42
+ # pad_token_id=tokenizer.eos_token_id)
43
+ output = model.generate(
44
+ input_ids,
45
+ max_length=max_length,
46
+ temperature=temperature, # Controls the diversity of the generated text
47
+ top_k=50, # Keeps only the top-k most likely words
48
+ top_p=0.9, # Nucleus sampling (cumulative probability)
49
+ repetition_penalty=1.2, # Penalty for repeating words or phrases
50
+ no_repeat_ngram_size=4, # Prevents repetition of n-grams (e.g., bigrams)
51
+ do_sample=True, # Enables sampling for greater diversity
52
+ pad_token_id=tokenizer.eos_token_id,
53
+ max_length=max_len
54
+ )[0]
55
 
56
+
57
+ #generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
58
+ #Decode the generated token IDs to text
59
+ generated_text = tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
60
  return generated_text
61
 
62
  # Streamlit приложение