AlexWortega
commited on
Commit
•
ec565ff
1
Parent(s):
787d54b
Update README.md
Browse files
README.md
CHANGED
@@ -48,13 +48,7 @@ tags:
|
|
48 |
|
49 |
# Quick Start
|
50 |
|
51 |
-
|
52 |
-
from transformers import pipeline
|
53 |
-
#в душе не ебу будет ли норм работать, ставлю жопу автора хф что токенайзер мисматчнет с моделью, вообще грузите по нормальному
|
54 |
-
pipe = pipeline(model='AlexWortega/instruct_rugptlarge')
|
55 |
-
pipe('''Как собрать питон код?''')
|
56 |
-
```
|
57 |
-
or
|
58 |
```python
|
59 |
from transformers import GPT2TokenizerFast,GPT2LMHeadModel
|
60 |
tokenizer = GPT2TokenizerFast.from_pretrained("AlexWortega/instruct_rugptlarge")
|
@@ -66,6 +60,31 @@ model = GPT2LMHeadModel.from_pretrained("AlexWortega/instruct_rugptlarge")
|
|
66 |
model.to(device)
|
67 |
|
68 |
model.resize_token_embeddings(len(tokenizer))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
```
|
70 |
обратите внимание, что лучшие параметры для генерации
|
71 |
```
|
|
|
48 |
|
49 |
# Quick Start
|
50 |
|
51 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
```python
|
53 |
from transformers import GPT2TokenizerFast,GPT2LMHeadModel
|
54 |
tokenizer = GPT2TokenizerFast.from_pretrained("AlexWortega/instruct_rugptlarge")
|
|
|
60 |
model.to(device)
|
61 |
|
62 |
model.resize_token_embeddings(len(tokenizer))
|
63 |
+
|
64 |
+
def generate_seqs(q,model, k=2):
|
65 |
+
gen_kwargs = {
|
66 |
+
"min_length": 20,
|
67 |
+
"max_new_tokens": 100,
|
68 |
+
"top_k": 50,
|
69 |
+
"top_p": 0.7,
|
70 |
+
"do_sample": True,
|
71 |
+
"early_stopping": True,
|
72 |
+
"no_repeat_ngram_size": 2,
|
73 |
+
"eos_token_id": tokenizer.eos_token_id,
|
74 |
+
"pad_token_id": tokenizer.eos_token_id,
|
75 |
+
"use_cache": True,
|
76 |
+
"repetition_penalty": 1.5,
|
77 |
+
"length_penalty": 1.2,
|
78 |
+
"num_beams": 4,
|
79 |
+
"num_return_sequences": k
|
80 |
+
}
|
81 |
+
q = q + '<instructionS>'
|
82 |
+
t = tokenizer.encode(q, return_tensors='pt').to(device)
|
83 |
+
g = model.generate(t, **gen_kwargs)
|
84 |
+
generated_sequences = tokenizer.batch_decode(g, skip_special_tokens=True)
|
85 |
+
|
86 |
+
return generated_sequences
|
87 |
+
|
88 |
```
|
89 |
обратите внимание, что лучшие параметры для генерации
|
90 |
```
|