Caasi/Kexin HUANG commited on
Commit
e711713
β€’
1 Parent(s): afe1a8c

commit from kexinhuang

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
models/config.json β†’ config.json RENAMED
File without changes
models/configuration_internlm.py β†’ configuration_internlm.py RENAMED
File without changes
infer.py CHANGED
@@ -16,6 +16,10 @@ from models.modeling_internlm import InternLMForSequenceClassification
16
  logger = logging.getLogger(__name__)
17
  logging.basicConfig(level=logging.INFO)
18
 
 
 
 
 
19
  def load_data_with_prompt(data_path, tokenizer, max_length, data_type='train'):
20
  tokenized_data = None
21
  logger.info(f"Tokenizing {data_type} data...")
@@ -89,8 +93,7 @@ def generate(args):
89
 
90
  categories = ['Fairness', 'Safety', 'Morality', 'Legality', 'Data protection']
91
 
92
- tokenizer = InternLMTokenizer.from_pretrained(args.model_name_or_path)
93
- model = InternLMForSequenceClassification.from_pretrained(args.model_name_or_path, use_cache=False)
94
  collate_fn = DataCollatorWithPadding(tokenizer)
95
  model.to('cuda')
96
  model.eval()
@@ -151,7 +154,6 @@ def generate_score(data_path):
151
 
152
  if __name__ == '__main__':
153
  parser = argparse.ArgumentParser()
154
- parser.add_argument('--model_name_or_path', type=str, default='./models')
155
  parser.add_argument('--data_path', type=str, default='./data/Flames_1k_Chinese_InternLM2_7B.jsonl') # Modify the path of data to be evaluated
156
  parser.add_argument('--max_length', type=int, default=512)
157
  parser.add_argument('--val_bsz_per_gpu', type=int, default=16)
 
16
  logger = logging.getLogger(__name__)
17
  logging.basicConfig(level=logging.INFO)
18
 
19
+ tokenizer = InternLMTokenizer.from_pretrained("CaasiHUANG/flames-scorer", trust_remote_code=True)
20
+ model = InternLMForSequenceClassification.from_pretrained("CaasiHUANG/flames-scorer", trust_remote_code=True)
21
+
22
+
23
  def load_data_with_prompt(data_path, tokenizer, max_length, data_type='train'):
24
  tokenized_data = None
25
  logger.info(f"Tokenizing {data_type} data...")
 
93
 
94
  categories = ['Fairness', 'Safety', 'Morality', 'Legality', 'Data protection']
95
 
96
+
 
97
  collate_fn = DataCollatorWithPadding(tokenizer)
98
  model.to('cuda')
99
  model.eval()
 
154
 
155
  if __name__ == '__main__':
156
  parser = argparse.ArgumentParser()
 
157
  parser.add_argument('--data_path', type=str, default='./data/Flames_1k_Chinese_InternLM2_7B.jsonl') # Modify the path of data to be evaluated
158
  parser.add_argument('--max_length', type=int, default=512)
159
  parser.add_argument('--val_bsz_per_gpu', type=int, default=16)
models/modeling_internlm.py β†’ modeling_internlm.py RENAMED
File without changes
models/.DS_Store DELETED
Binary file (6.15 kB)
 
models/pytorch_model-00001-of-00002.bin β†’ pytorch_model-00001-of-00002.bin RENAMED
File without changes
models/pytorch_model-00002-of-00002.bin β†’ pytorch_model-00002-of-00002.bin RENAMED
File without changes
models/pytorch_model.bin.index.json β†’ pytorch_model.bin.index.json RENAMED
File without changes
models/special_tokens_map.json β†’ special_tokens_map.json RENAMED
File without changes
models/tokenization_internlm.py β†’ tokenization_internlm.py RENAMED
File without changes
models/tokenizer.model β†’ tokenizer.model RENAMED
File without changes