ayushi0430 commited on
Commit
6ef9a7a
1 Parent(s): 1fba336
requirements.txt CHANGED
@@ -11,5 +11,5 @@ python-dateutil==2.8.2
11
  requests==2.28.2
12
  tqdm==4.65.0
13
  git+https://github.com/EleutherAI/lm-evaluation-harness.git@a79a7c33d04d7090b94256d2fcd631c5e6fd8631#egg=lm-eval
14
- lamini
15
  openai
 
11
  requests==2.28.2
12
  tqdm==4.65.0
13
  git+https://github.com/EleutherAI/lm-evaluation-harness.git@a79a7c33d04d7090b94256d2fcd631c5e6fd8631#egg=lm-eval
14
+ lamini==2.1.4a3
15
  openai
src/backend/harness_evaluator.py CHANGED
@@ -28,5 +28,6 @@ class HarnessEvaluator:
28
  batch_size=self.batch_size,
29
  num_fewshot=0,
30
  limit=MAX_EXAMPLES,
 
31
  )
32
  return results
 
28
  batch_size=self.batch_size,
29
  num_fewshot=0,
30
  limit=MAX_EXAMPLES,
31
+ random_seed=42
32
  )
33
  return results
src/backend/lamini_eval/evaluators/icd_evaluator.py CHANGED
@@ -118,8 +118,8 @@ class ICDAnswerGenerator(GenerationNode):
118
  )
119
  prompt += "You are answering questions about the ICD 11 standard. "
120
  prompt += "Include the most relevant ICD11 code in your answer. "
121
- prompt += "Every code starts with a letter, followed by a two digit number. "
122
- prompt += "In the ICD-11, the next level of the hierarchy is indicated in the code by a dot and a single number or letter from A to Z."
123
  prompt += "A customer asks\n"
124
  prompt += chunk.data["question"]
125
  prompt += " [/INST]"
 
118
  )
119
  prompt += "You are answering questions about the ICD 11 standard. "
120
  prompt += "Include the most relevant ICD11 code in your answer. "
121
+ # prompt += "Every code starts with a letter, followed by a two digit number. "
122
+ # prompt += "In the ICD-11, the next level of the hierarchy is indicated in the code by a dot and a single number or letter from A to Z."
123
  prompt += "A customer asks\n"
124
  prompt += chunk.data["question"]
125
  prompt += " [/INST]"
src/backend/lamini_eval/models/gpt4_model.py CHANGED
@@ -48,7 +48,8 @@ class GPT4Model(LM):
48
  obj = TruthfulQAEvaluator()
49
  prompt = obj.get_prompt(question)
50
  try:
51
- response = self.__call__(prompt=prompt, output_type=str)
 
52
  except Exception as e:
53
  print("Error fetching gpt4 response: ", e)
54
  # select random answer
@@ -59,8 +60,8 @@ class GPT4Model(LM):
59
  prompt = obj.get_prompt(question)
60
  try:
61
  op_type = {"explanation": "str", "answer": "str"}
62
- answer = self.__call__(prompt=prompt, output_type=op_type)
63
- response = f"({answer['answer']})"
64
  except Exception as e:
65
  print("Error fetching gpt4 response: ", e)
66
  # select random answer
 
48
  obj = TruthfulQAEvaluator()
49
  prompt = obj.get_prompt(question)
50
  try:
51
+ result = self.__call__(prompt=prompt, output_type={"answer": "str"})
52
+ response = result['answer']
53
  except Exception as e:
54
  print("Error fetching gpt4 response: ", e)
55
  # select random answer
 
60
  prompt = obj.get_prompt(question)
61
  try:
62
  op_type = {"explanation": "str", "answer": "str"}
63
+ result = self.__call__(prompt=prompt, output_type=op_type)
64
+ response = f"({result['answer']})"
65
  except Exception as e:
66
  print("Error fetching gpt4 response: ", e)
67
  # select random answer
src/backend/run_eval_suite.py CHANGED
@@ -23,7 +23,6 @@ async def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, bat
23
  batch_size, device, no_cache, limit, write_out=True,
24
  output_base_path='logs')
25
  lamini_results = await custom_evaluator.evaluate()
26
- task_names = [task_names[0]]
27
 
28
  print(f"Selected Harness Tasks: {task_names}")
29
  harness_evaluator = HarnessEvaluator(eval_request.model, eval_request.revision, eval_request.precision,
 
23
  batch_size, device, no_cache, limit, write_out=True,
24
  output_base_path='logs')
25
  lamini_results = await custom_evaluator.evaluate()
 
26
 
27
  print(f"Selected Harness Tasks: {task_names}")
28
  harness_evaluator = HarnessEvaluator(eval_request.model, eval_request.revision, eval_request.precision,