DSXiangLi commited on
Commit
1cabc1d
1 Parent(s): 15fc122
main.py CHANGED
@@ -30,5 +30,11 @@ if __name__ == '__main__':
30
  openai_key ='a'
31
  n_human=2
32
  n_machine=1
33
- n_instruct=5
34
  instance = SELF(seed_file, openai_key, n_human, n_machine, n_instruct, None)
 
 
 
 
 
 
 
30
  openai_key ='a'
31
  n_human=2
32
  n_machine=1
33
+ n_instruct=4
34
  instance = SELF(seed_file, openai_key, n_human, n_machine, n_instruct, None)
35
+
36
+ scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=False, tokenizer=ChineseTokenizer())
37
+ inst_tokens = scorer._tokenizer.tokenize('诊断患者')
38
+ with Pool(os.cpu_count()) as p:
39
+ rouge_scores = p.map(partial(rouge_scorer._score_lcs,inst_tokens), instance.all_instruction_tokens)
40
+ rouge_l = [score.fmeasure for score in rouge_scores]
self/__pycache__/generate.cpython-38.pyc CHANGED
Binary files a/self/__pycache__/generate.cpython-38.pyc and b/self/__pycache__/generate.cpython-38.pyc differ
 
self/__pycache__/prompt.cpython-38.pyc CHANGED
Binary files a/self/__pycache__/prompt.cpython-38.pyc and b/self/__pycache__/prompt.cpython-38.pyc differ
 
self/generate.py CHANGED
@@ -1,4 +1,6 @@
1
  # -*-coding:utf-8 -*-
 
 
2
  import re
3
  import numpy as np
4
  import os
@@ -65,8 +67,8 @@ class SELF(object):
65
  prompt = self_prompt
66
  self.chain = LLMChain(llm=self.llm, prompt=PromptTemplate.from_template(prompt))
67
  self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=False, tokenizer=ChineseTokenizer())
68
- self.all_instruction = self.human_instruction_data + self.machine_instruction_data
69
- self.all_instruction_tokens = [self.scorer._tokenizer.tokenize(i['instruction']) for i in
70
  self.all_instruction]
71
 
72
  @property
@@ -138,7 +140,7 @@ class SELF(object):
138
  for inst in instruction_data:
139
  inst_tokens = self.scorer._tokenizer.tokenize(inst['instruction'])
140
  with Pool(os.cpu_count()) as p:
141
- rouge_scores = p.map(partial(rouge_scorer._score_lcs, self.all_instruction_tokens), inst_tokens)
142
  rouge_l = [score.fmeasure for score in rouge_scores]
143
  print(rouge_scores)
144
  print(rouge_l)
 
1
  # -*-coding:utf-8 -*-
2
+ import os
3
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
  import re
5
  import numpy as np
6
  import os
 
67
  prompt = self_prompt
68
  self.chain = LLMChain(llm=self.llm, prompt=PromptTemplate.from_template(prompt))
69
  self.scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=False, tokenizer=ChineseTokenizer())
70
+ self.all_instruction = [i['instruction'] for i in self.human_instruction_data + self.machine_instruction_data]
71
+ self.all_instruction_tokens = [self.scorer._tokenizer.tokenize(i) for i in
72
  self.all_instruction]
73
 
74
  @property
 
140
  for inst in instruction_data:
141
  inst_tokens = self.scorer._tokenizer.tokenize(inst['instruction'])
142
  with Pool(os.cpu_count()) as p:
143
+ rouge_scores = p.map(partial(rouge_scorer._score_lcs, inst_tokens), self.all_instruction_tokens)
144
  rouge_l = [score.fmeasure for score in rouge_scores]
145
  print(rouge_scores)
146
  print(rouge_l)