Update README.md
Browse files
README.md
CHANGED
@@ -36,7 +36,8 @@ tokenizer = AutoTokenizer.from_pretrained("erfanzar/PGT-1B-2EP")
|
|
36 |
|
37 |
model = AutoModelForCausalLM.from_pretrained("erfanzar/PGT-1B-2EP",device_map='auto',load_in_8bit=True)
|
38 |
|
39 |
-
|
|
|
40 |
|
41 |
|
42 |
def ppp(text:str):
|
@@ -52,8 +53,11 @@ def generate(text,max_new_tokens:int=1024,use_ppp:bool=False,b_pair=False):
|
|
52 |
enc = tokenizer(text,return_tensors='pt',add_special_tokens=False)
|
53 |
text_r = text
|
54 |
enc = model.generate(enc.input_ids,max_new_tokens=1,pad_token_id=0)
|
55 |
-
text = tokenizer.decode(enc[0],skip_special_tokens=False)
|
56 |
-
|
|
|
|
|
|
|
57 |
break
|
58 |
else:
|
59 |
yield text[len(text_r):] if b_pair else text
|
|
|
36 |
|
37 |
model = AutoModelForCausalLM.from_pretrained("erfanzar/PGT-1B-2EP",device_map='auto',load_in_8bit=True)
|
38 |
|
39 |
+
|
40 |
+
verify_text = lambda txt : '\n'.join([textwrap.fill(txt, width=110) for txt in txt.split('\n')])
|
41 |
|
42 |
|
43 |
def ppp(text:str):
|
|
|
53 |
enc = tokenizer(text,return_tensors='pt',add_special_tokens=False)
|
54 |
text_r = text
|
55 |
enc = model.generate(enc.input_ids,max_new_tokens=1,pad_token_id=0)
|
56 |
+
text = tokenizer.decode(enc[0],skip_special_tokens=False)
|
57 |
+
text = text[:-4]+tokenizer.eos_token if text[-4:] == '\n\n\n\n' else text
|
58 |
+
|
59 |
+
if text.endswith(tokenizer.eos_token) or text.endswith('\n\n\n\n\n'):
|
60 |
+
yield text[len(text_r):] if b_pair else text
|
61 |
break
|
62 |
else:
|
63 |
yield text[len(text_r):] if b_pair else text
|