Update llmeval.py
Browse files- llmeval.py +17 -15
llmeval.py
CHANGED
|
@@ -8,10 +8,8 @@ client = Groq(api_key=AK)
|
|
| 8 |
de=DatabaseEngine()
|
| 9 |
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT=f'''
|
| 15 |
Task:
|
| 16 |
Evaluate the biological quality of a Prompt, Context, and Response from an {agenttype} Agent on a 0–10 continuous scale.
|
| 17 |
|
|
@@ -52,7 +50,7 @@ No extra commentary, no markdown, no explanations before or after.
|
|
| 52 |
Think step by step
|
| 53 |
'''
|
| 54 |
|
| 55 |
-
SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT=f'''
|
| 56 |
Task:
|
| 57 |
Evaluate how well the {agenttype} Response addresses the specific Prompt by leveraging the provided Context on a 0–10 continuous scale.
|
| 58 |
|
|
@@ -93,7 +91,7 @@ Think step by step
|
|
| 93 |
'''
|
| 94 |
|
| 95 |
|
| 96 |
-
SYSTEM_PROMPT_FOR_TRIAD_COHERENCE=f'''
|
| 97 |
Task:
|
| 98 |
Evaluate the logical and semantic coherence of the Prompt, Context, and Response of {agenttype} as a unified set on a 0–10 continuous scale.
|
| 99 |
|
|
@@ -131,7 +129,7 @@ Think step by step
|
|
| 131 |
'''
|
| 132 |
|
| 133 |
|
| 134 |
-
SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY=f'''
|
| 135 |
Task:
|
| 136 |
Evaluate how focused, detailed, and context-aware the {agenttype} Response is with respect to the Prompt and Context on a 0–10 continuous scale.
|
| 137 |
|
|
@@ -170,6 +168,9 @@ Think step by step
|
|
| 170 |
|
| 171 |
'''
|
| 172 |
|
|
|
|
|
|
|
|
|
|
| 173 |
class LLM_as_Evaluator():
|
| 174 |
|
| 175 |
def __init__(self):
|
|
@@ -198,6 +199,7 @@ class LLM_as_Evaluator():
|
|
| 198 |
|
| 199 |
data_to_evaluate=de.GetData(promptversion)
|
| 200 |
|
|
|
|
| 201 |
|
| 202 |
evaluation_responses=[]
|
| 203 |
|
|
@@ -263,7 +265,7 @@ class LLM_as_Evaluator():
|
|
| 263 |
evaluation_responses.append({"unit_coherence":evaluation_response})
|
| 264 |
|
| 265 |
|
| 266 |
-
|
| 267 |
|
| 268 |
|
| 269 |
"promptversion":promptversion,
|
|
@@ -271,17 +273,17 @@ class LLM_as_Evaluator():
|
|
| 271 |
"contextual_relevance_alignment":"",
|
| 272 |
"unit_coherence":"",
|
| 273 |
"response_specificity":""
|
| 274 |
-
|
| 275 |
|
| 276 |
-
|
| 277 |
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
|
| 283 |
|
| 284 |
-
|
| 285 |
|
| 286 |
|
| 287 |
|
|
|
|
| 8 |
de=DatabaseEngine()
|
| 9 |
|
| 10 |
|
| 11 |
+
def PROMPT_UPDATER(agenttype):
|
| 12 |
+
SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT=f'''
|
|
|
|
|
|
|
| 13 |
Task:
|
| 14 |
Evaluate the biological quality of a Prompt, Context, and Response from an {agenttype} Agent on a 0–10 continuous scale.
|
| 15 |
|
|
|
|
| 50 |
Think step by step
|
| 51 |
'''
|
| 52 |
|
| 53 |
+
SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT=f'''
|
| 54 |
Task:
|
| 55 |
Evaluate how well the {agenttype} Response addresses the specific Prompt by leveraging the provided Context on a 0–10 continuous scale.
|
| 56 |
|
|
|
|
| 91 |
'''
|
| 92 |
|
| 93 |
|
| 94 |
+
SYSTEM_PROMPT_FOR_TRIAD_COHERENCE=f'''
|
| 95 |
Task:
|
| 96 |
Evaluate the logical and semantic coherence of the Prompt, Context, and Response of {agenttype} as a unified set on a 0–10 continuous scale.
|
| 97 |
|
|
|
|
| 129 |
'''
|
| 130 |
|
| 131 |
|
| 132 |
+
SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY=f'''
|
| 133 |
Task:
|
| 134 |
Evaluate how focused, detailed, and context-aware the {agenttype} Response is with respect to the Prompt and Context on a 0–10 continuous scale.
|
| 135 |
|
|
|
|
| 168 |
|
| 169 |
'''
|
| 170 |
|
| 171 |
+
return SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT,SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT,SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY,SYSTEM_PROMPT_FOR_TRIAD_COHERENCE
|
| 172 |
+
|
| 173 |
+
|
| 174 |
class LLM_as_Evaluator():
|
| 175 |
|
| 176 |
def __init__(self):
|
|
|
|
| 199 |
|
| 200 |
data_to_evaluate=de.GetData(promptversion)
|
| 201 |
|
| 202 |
+
SYSTEM_FOR_BIO_CONTEXT_ALIGNMENT,SYSTEM_FOR_CONTEXTUAL_RELEVANCE_ALIGNMENT,SYSTEM_PROMPT_FOR_RESPONSE_SPECIFICITY,SYSTEM_PROMPT_FOR_TRIAD_COHERENCE = PROMPT_UPDATER("observation agent")
|
| 203 |
|
| 204 |
evaluation_responses=[]
|
| 205 |
|
|
|
|
| 265 |
evaluation_responses.append({"unit_coherence":evaluation_response})
|
| 266 |
|
| 267 |
|
| 268 |
+
data={
|
| 269 |
|
| 270 |
|
| 271 |
"promptversion":promptversion,
|
|
|
|
| 273 |
"contextual_relevance_alignment":"",
|
| 274 |
"unit_coherence":"",
|
| 275 |
"response_specificity":""
|
| 276 |
+
}
|
| 277 |
|
| 278 |
+
for resp in evaluation_responses:
|
| 279 |
|
| 280 |
+
data["biological_context_alignment"]=resp["biological_context_alignment"]
|
| 281 |
+
data["contextual_relevance_alignment"]=resp["contextual_relevance_alignment"]
|
| 282 |
+
data["unit_coherence"]=resp["unit_coherence"]
|
| 283 |
+
data["response_specificity"]=resp["response_specificity"]
|
| 284 |
|
| 285 |
|
| 286 |
+
de.Update(data=data)
|
| 287 |
|
| 288 |
|
| 289 |
|