Yeyito commited on
Commit
f6d7d3d
Β·
1 Parent(s): 98282cd

It was and not or πŸ™‚

Browse files
detect-pretrain-code-contamination/src/run.py CHANGED
@@ -87,7 +87,7 @@ def sample_generation(sentence, model, tokenizer, args,data_name):
87
  input_ids = input_ids.to(model.device)
88
 
89
  output = None
90
- if data_name != "cais/mmlu" or data_name != "gsm8k":
91
  output = model.generate(input_ids, max_new_tokens=len(sentence.split())-half_sentence_index, min_new_tokens=1, num_return_sequences=args['num_z'], pad_token_id=tokenizer.eos_token_id, **args['generate_args'])
92
  else:
93
  output = model.generate(input_ids, max_new_tokens=(len(sentence.split())-half_sentence_index)/2, min_new_tokens=1, num_return_sequences=int(args['num_z']/2), pad_token_id=tokenizer.eos_token_id, **args['generate_args'])
 
87
  input_ids = input_ids.to(model.device)
88
 
89
  output = None
90
+ if data_name != "cais/mmlu" and data_name != "gsm8k":
91
  output = model.generate(input_ids, max_new_tokens=len(sentence.split())-half_sentence_index, min_new_tokens=1, num_return_sequences=args['num_z'], pad_token_id=tokenizer.eos_token_id, **args['generate_args'])
92
  else:
93
  output = model.generate(input_ids, max_new_tokens=(len(sentence.split())-half_sentence_index)/2, min_new_tokens=1, num_return_sequences=int(args['num_z']/2), pad_token_id=tokenizer.eos_token_id, **args['generate_args'])