Timothy-Vinzent commited on
Commit
122c32d
·
verified ·
1 Parent(s): ebb0c31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -10
app.py CHANGED
@@ -107,22 +107,57 @@ def submit_prompt(email, name, system_prompt):
107
  print("llm answer", answer)
108
  except Exception as e:
109
  answer = f"Error during OpenAI API call: {str(e)}"
110
-
111
- # Simple evaluation: check if the expected output is a substring of the answer (case-insensitive).
112
- if expected.lower() in answer.lower():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  score += 1
114
  verdict = "Correct"
115
- print(f"{expected.lower()} DOES NOT MATCH {answer.lower()}")
116
- else:
117
- verdict = "Incorrect"
118
- print(f"{expected.lower()} MATCHES {answer.lower()}")
119
-
120
  responses.append(
121
  f"Question: {question}\n"
122
- f"Answer: {answer}\n"
123
- f"Expected: {expected}\n"
124
  f"Result: {verdict}\n"
125
  )
 
 
126
 
127
  result_details = "\n".join(responses)
128
 
 
107
  print("llm answer", answer)
108
  except Exception as e:
109
  answer = f"Error during OpenAI API call: {str(e)}"
110
+
111
+ # Step 1: Check if the answer is valid JSON
112
+ try:
113
+ parsed_answer = json.loads(answer)
114
+ except json.JSONDecodeError as e:
115
+ verdict = f"Incorrect (Invalid JSON): {str(e)}"
116
+ responses.append(
117
+ f"Question: {question}\n"
118
+ f"Answer: {answer}\n"
119
+ f"Expected: {json.dumps(expected)}\n"
120
+ f"Result: {verdict}\n"
121
+ )
122
+ print(verdict)
123
+ continue
124
+
125
+ # Step 2: Check if all required keys are present
126
+ required_keys = ["document_level", "clause_level"]
127
+ missing_keys = [key for key in required_keys if key not in parsed_answer]
128
+ if missing_keys:
129
+ verdict = f"Incorrect (Missing Keys: {', '.join(missing_keys)})"
130
+ responses.append(
131
+ f"Question: {question}\n"
132
+ f"Answer: {json.dumps(parsed_answer)}\n"
133
+ f"Expected: {json.dumps(expected)}\n"
134
+ f"Result: {verdict}\n"
135
+ )
136
+ print(verdict)
137
+ continue
138
+
139
+ # Step 3: Check if values for each key match
140
+ incorrect_values = []
141
+ for key in required_keys:
142
+ if parsed_answer[key] != expected[key]:
143
+ incorrect_values.append(key)
144
+
145
+ if len(incorrect_values) == 2:
146
+ verdict = "Incorrect (Both values are incorrect)"
147
+ elif len(incorrect_values) == 1:
148
+ verdict = f"Incorrect (Value for key '{incorrect_values[0]}' is incorrect)"
149
+ else:
150
  score += 1
151
  verdict = "Correct"
152
+
 
 
 
 
153
  responses.append(
154
  f"Question: {question}\n"
155
+ f"Answer: {json.dumps(parsed_answer)}\n"
156
+ f"Expected: {json.dumps(expected)}\n"
157
  f"Result: {verdict}\n"
158
  )
159
+ print(verdict)
160
+
161
 
162
  result_details = "\n".join(responses)
163