erfanzar commited on
Commit
ad8d01a
1 Parent(s): 0a5fa50

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -3
README.md CHANGED
@@ -36,7 +36,8 @@ tokenizer = AutoTokenizer.from_pretrained("erfanzar/PGT-1B-2EP")
36
 
37
  model = AutoModelForCausalLM.from_pretrained("erfanzar/PGT-1B-2EP",device_map='auto',load_in_8bit=True)
38
 
39
- verify_text = lambda txt : '\n'.join([textwrap.fill(txt, width=140) for txt in txt.split('\n')])
 
40
 
41
 
42
  def ppp(text:str):
@@ -52,8 +53,11 @@ def generate(text,max_new_tokens:int=1024,use_ppp:bool=False,b_pair=False):
52
  enc = tokenizer(text,return_tensors='pt',add_special_tokens=False)
53
  text_r = text
54
  enc = model.generate(enc.input_ids,max_new_tokens=1,pad_token_id=0)
55
- text = tokenizer.decode(enc[0],skip_special_tokens=False).replace('\n\n\n\n',tokenizer.eos_token)
56
- if text.endswith(tokenizer.eos_token) or text.endswith('\n\n\n\n'):
 
 
 
57
  break
58
  else:
59
  yield text[len(text_r):] if b_pair else text
 
36
 
37
  model = AutoModelForCausalLM.from_pretrained("erfanzar/PGT-1B-2EP",device_map='auto',load_in_8bit=True)
38
 
39
+
40
+ verify_text = lambda txt : '\n'.join([textwrap.fill(txt, width=110) for txt in txt.split('\n')])
41
 
42
 
43
  def ppp(text:str):
 
53
  enc = tokenizer(text,return_tensors='pt',add_special_tokens=False)
54
  text_r = text
55
  enc = model.generate(enc.input_ids,max_new_tokens=1,pad_token_id=0)
56
+ text = tokenizer.decode(enc[0],skip_special_tokens=False)
57
+ text = text[:-4]+tokenizer.eos_token if text[-4:] == '\n\n\n\n' else text
58
+
59
+ if text.endswith(tokenizer.eos_token) or text.endswith('\n\n\n\n\n'):
60
+ yield text[len(text_r):] if b_pair else text
61
  break
62
  else:
63
  yield text[len(text_r):] if b_pair else text