Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
ayushi0430
commited on
Commit
•
6ef9a7a
1
Parent(s):
1fba336
cleanup
Browse files
requirements.txt
CHANGED
@@ -11,5 +11,5 @@ python-dateutil==2.8.2
|
|
11 |
requests==2.28.2
|
12 |
tqdm==4.65.0
|
13 |
git+https://github.com/EleutherAI/lm-evaluation-harness.git@a79a7c33d04d7090b94256d2fcd631c5e6fd8631#egg=lm-eval
|
14 |
-
lamini
|
15 |
openai
|
|
|
11 |
requests==2.28.2
|
12 |
tqdm==4.65.0
|
13 |
git+https://github.com/EleutherAI/lm-evaluation-harness.git@a79a7c33d04d7090b94256d2fcd631c5e6fd8631#egg=lm-eval
|
14 |
+
lamini==2.1.4a3
|
15 |
openai
|
src/backend/harness_evaluator.py
CHANGED
@@ -28,5 +28,6 @@ class HarnessEvaluator:
|
|
28 |
batch_size=self.batch_size,
|
29 |
num_fewshot=0,
|
30 |
limit=MAX_EXAMPLES,
|
|
|
31 |
)
|
32 |
return results
|
|
|
28 |
batch_size=self.batch_size,
|
29 |
num_fewshot=0,
|
30 |
limit=MAX_EXAMPLES,
|
31 |
+
random_seed=42
|
32 |
)
|
33 |
return results
|
src/backend/lamini_eval/evaluators/icd_evaluator.py
CHANGED
@@ -118,8 +118,8 @@ class ICDAnswerGenerator(GenerationNode):
|
|
118 |
)
|
119 |
prompt += "You are answering questions about the ICD 11 standard. "
|
120 |
prompt += "Include the most relevant ICD11 code in your answer. "
|
121 |
-
prompt += "Every code starts with a letter, followed by a two digit number. "
|
122 |
-
prompt += "In the ICD-11, the next level of the hierarchy is indicated in the code by a dot and a single number or letter from A to Z."
|
123 |
prompt += "A customer asks\n"
|
124 |
prompt += chunk.data["question"]
|
125 |
prompt += " [/INST]"
|
|
|
118 |
)
|
119 |
prompt += "You are answering questions about the ICD 11 standard. "
|
120 |
prompt += "Include the most relevant ICD11 code in your answer. "
|
121 |
+
# prompt += "Every code starts with a letter, followed by a two digit number. "
|
122 |
+
# prompt += "In the ICD-11, the next level of the hierarchy is indicated in the code by a dot and a single number or letter from A to Z."
|
123 |
prompt += "A customer asks\n"
|
124 |
prompt += chunk.data["question"]
|
125 |
prompt += " [/INST]"
|
src/backend/lamini_eval/models/gpt4_model.py
CHANGED
@@ -48,7 +48,8 @@ class GPT4Model(LM):
|
|
48 |
obj = TruthfulQAEvaluator()
|
49 |
prompt = obj.get_prompt(question)
|
50 |
try:
|
51 |
-
|
|
|
52 |
except Exception as e:
|
53 |
print("Error fetching gpt4 response: ", e)
|
54 |
# select random answer
|
@@ -59,8 +60,8 @@ class GPT4Model(LM):
|
|
59 |
prompt = obj.get_prompt(question)
|
60 |
try:
|
61 |
op_type = {"explanation": "str", "answer": "str"}
|
62 |
-
|
63 |
-
response = f"({
|
64 |
except Exception as e:
|
65 |
print("Error fetching gpt4 response: ", e)
|
66 |
# select random answer
|
|
|
48 |
obj = TruthfulQAEvaluator()
|
49 |
prompt = obj.get_prompt(question)
|
50 |
try:
|
51 |
+
result = self.__call__(prompt=prompt, output_type={"answer": "str"})
|
52 |
+
response = result['answer']
|
53 |
except Exception as e:
|
54 |
print("Error fetching gpt4 response: ", e)
|
55 |
# select random answer
|
|
|
60 |
prompt = obj.get_prompt(question)
|
61 |
try:
|
62 |
op_type = {"explanation": "str", "answer": "str"}
|
63 |
+
result = self.__call__(prompt=prompt, output_type=op_type)
|
64 |
+
response = f"({result['answer']})"
|
65 |
except Exception as e:
|
66 |
print("Error fetching gpt4 response: ", e)
|
67 |
# select random answer
|
src/backend/run_eval_suite.py
CHANGED
@@ -23,7 +23,6 @@ async def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, bat
|
|
23 |
batch_size, device, no_cache, limit, write_out=True,
|
24 |
output_base_path='logs')
|
25 |
lamini_results = await custom_evaluator.evaluate()
|
26 |
-
task_names = [task_names[0]]
|
27 |
|
28 |
print(f"Selected Harness Tasks: {task_names}")
|
29 |
harness_evaluator = HarnessEvaluator(eval_request.model, eval_request.revision, eval_request.precision,
|
|
|
23 |
batch_size, device, no_cache, limit, write_out=True,
|
24 |
output_base_path='logs')
|
25 |
lamini_results = await custom_evaluator.evaluate()
|
|
|
26 |
|
27 |
print(f"Selected Harness Tasks: {task_names}")
|
28 |
harness_evaluator = HarnessEvaluator(eval_request.model, eval_request.revision, eval_request.precision,
|