ml-visoft commited on
Commit
3630362
1 Parent(s): b7aeae6

ChatGPT linked in.

Browse files
Files changed (3) hide show
  1. eval_code.py +227 -27
  2. main.py +22 -2
  3. requirements.txt +2 -1
eval_code.py CHANGED
@@ -1,12 +1,11 @@
1
- import copy
2
  import json
3
- import time
 
4
 
5
  """
6
  We will handle here the code evaluation phase.
7
 
8
  """
9
- import json
10
 
11
 
12
  EVAL_ANSWER_NOEVAL = 0
@@ -21,6 +20,122 @@ CODE_AUGMENTATIONS=[
21
  ("NAME", "Meaningful names in the code."),
22
  ]
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def clean_prompt_answer(answer):
26
  """
@@ -37,17 +152,80 @@ def clean_prompt_answer(answer):
37
  cleaned.append(l)
38
  return "\n".join(cleaned)
39
 
 
40
  def parse_chatgpt_answer(ans_text):
 
 
 
 
 
 
 
 
41
  try:
 
42
  js_answer = json.loads(ans_text)
43
  except:
44
  # for now we dump the error in console
45
  import traceback
46
  exception = traceback.format_exc()
47
  print(exception)
48
- return {"error":exception}
49
  return js_answer
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def eval_code_by_chatgpt(openai_client, ccode):
52
  """
53
  Will evaluate a piece of code using our heavily tuned prompt!
@@ -58,30 +236,35 @@ def eval_code_by_chatgpt(openai_client, ccode):
58
  """
59
  # time.sleep(3)
60
  try:
61
- return """[
62
- {
63
- "criteria": "DRY",
64
- "explanation": "The memory allocation and initialization for ``p1``, ``p2``, and ``p3`` are repetitive. Consider creating a function like ``allocateAndInitializeMemory``."
65
- },
66
- {
67
- "criteria": "DRY",
68
- "explanation": "Tne second DRY failure, because this is the observed ChatGPT behaviour."
69
- },
70
-
71
- {
72
- "criteria": "SRP",
73
- "explanation": "The ``main`` function handles memory allocation, initialization, and printing. You should separate these responsibilities into different functions like ``allocateMemory``, ``initializeData``, and ``printData``."
74
- },
75
- {
76
- "criteria": "NAME",
77
- "explanation": "``x1`` should be called ``title``, ``y1`` should be called ``author``, ``z1`` should be called ``year``, ``p1`` should be called ``titlePtr``, ``p2`` should be called ``authorPtr``, ``p3`` should be called ``yearPtr``."
78
- }
79
- ]"""
80
  assert openai_client is not None
 
 
 
 
 
81
  except:
82
  import traceback
83
  traceback.print_exc()
84
- return {"error":"There was an error while parsing the answer. Maybe ChatGPT is overloaded?"}
85
 
86
 
87
 
@@ -126,8 +309,25 @@ def eval_the_piece_of_c_code(openai_client, ccode):
126
  :param ccode:
127
  :return:
128
  """
129
- chatgpt_ans = eval_code_by_chatgpt(openai_client, ccode)
130
- chatgpt_js = parse_chatgpt_answer(chatgpt_ans)
131
- enhanced_answer = add_evaluation_fields_on_js_answer(chatgpt_js)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  return enhanced_answer
133
 
 
 
1
  import json
2
+ import copy
3
+ import openai
4
 
5
  """
6
  We will handle here the code evaluation phase.
7
 
8
  """
 
9
 
10
 
11
  EVAL_ANSWER_NOEVAL = 0
 
20
  ("NAME", "Meaningful names in the code."),
21
  ]
22
 
23
+ # GPT prompt, ver 1, OpenAI.
24
+ # TODO Store prompt version, endpoint, endpoint version, etc. once we can afford to do A/B testing.
25
+
26
+ gpt_teacher_prompt = \
27
+ """
28
+ You are a helpful teacher that evaluates pieces of C code based on several clean code principles. The principles will
29
+ be provided below. As output you will provide a list of JSON objects.
30
+
31
+ The language is C. The students will give you a complete C code.
32
+ You will evaluate its cleaniness according to some criterias given below. A C program can have multiple criterias that are infringed.
33
+
34
+ Each principles has a mnemonic and an explanation. You will have to check if the code fits the principles,
35
+ based on the explanations. The principles matches if most of the code is like in the explanation. If this is the case, issue the mnemonic.
36
+
37
+ As an **input**, the student will send you a piece of code.
38
+
39
+ As an **answer**, you should generate a list of JSON objects. Each JSON object should contain two parameters: "criteria" and "explanation".
40
+ The "criteria" should be a programming principle or acronym as above, and the "explanation" should provide a brief list for the student
41
+ on how their code fail.
42
+
43
+
44
+ #################
45
+
46
+ Criterias
47
+
48
+ NO_COMPILE
49
+ The code is not a standalone compilable C code. There are no ``#include`` directives. There is no main function. The C syntax looks
50
+ broken. There are wrong string constants. There are illegal operands or keywords. Only flagrant violations would trigger this criteria.
51
+ This criteria should be checked on "best effort", that is, don't start a compiler and compile the code. As explanations you should return
52
+ a short description on why the code would not compile. Example: "There is no ``main()`` function in the code. Please provide one."
53
+
54
+
55
+ DRY
56
+ Don't repeat yourself. The code has repetitive blocks that could have been isolated into a function. The code has no functions
57
+ but some instructions could have been parametrized. The data is stored in flat variables but a struct or an array would have been
58
+ more natural for that problem. As explanations you should present and a code fragment that is repeated followed by a short list
59
+ of function names or programming patterns that should be present in the code.
60
+
61
+ SRP
62
+ Single "object" Responsability but in our case, Single Function Responsability. The code has functions that do more than one thing.
63
+ For example, reading from a file, then computing something on that data. Allocating, reading, computing and printing in the same function.
64
+ Or the code is stuffend in main(), without any separation in functions. As explanations you should return one or two function names
65
+ and a list of their responsabilities. Tell the student that they should separate the functionalities.
66
+
67
+ MC
68
+ Magic constants. The code has for loops, data allocations, bounds, etc. that are expressed as numeric constants in the place
69
+ they are used. These constants appear in more than one place. If most of the constants are written directly in the code, then
70
+ the given piece of code matches the criteria. If most of the constants are declared as symbolic constants then, the code, does
71
+ not match this criteria. The naming of the constants is not important for this criteria. The naming and its relevance is judged by
72
+ other topics. For this criteria it matters only if the constant is defined or not. As explanations you will propose
73
+ some symbolic constants that should be used in the code.
74
+
75
+ NAME
76
+ Nondescriptive variable/function names. The code has meaningless function and variable names. For example, t, x, y, s, for
77
+ vectors, structures or tables that store meaningful data. Function names like ``add`` or ``get`` without specifying what will be
78
+ added, or what will be got. A good function name is ``addStudentInList`` or ``getStudentsSortedByCriteria``. Variable names used
79
+ in for loops or local to some functions are ok to be less descriptive. If most of the names are bad, then the code matches this criteria.
80
+ As explanations you will return few pairs of original-name suggested-name where the original-name is the original variable in the code
81
+ and the suggested-name is something that would be meaningful given what the code does.
82
+
83
+ #################
84
+
85
+ Example
86
+
87
+ ```
88
+ #include <stdio.h>
89
+ #include <stdlib.h>
90
+
91
+ #define SIZE 10
92
+
93
+ void func(int* arr) {
94
+ arr[0] = 0 * 0; printf("%d ", arr[0]);
95
+ arr[1] = 1 * 1; printf("%d ", arr[1]);
96
+ arr[2] = 2 * 2; printf("%d ", arr[2]);
97
+ arr[3] = 3 * 3; printf("%d ", arr[3]);
98
+ arr[4] = 4 * 4; printf("%d ", arr[4]);
99
+ arr[5] = 5 * 5; printf("%d ", arr[5]);
100
+ arr[6] = 6 * 6; printf("%d ", arr[6]);
101
+ arr[7] = 7 * 7; printf("%d ", arr[7]);
102
+ arr[8] = 8 * 8; printf("%d ", arr[8]);
103
+ arr[9] = 9 * 9; printf("%d ", arr[9]);
104
+ printf("\n");
105
+ }
106
+
107
+ int main() {
108
+ int* arr = (int*)malloc(sizeof(int) * SIZE);
109
+ func(arr);
110
+ free(arr);
111
+ return 0;
112
+ }
113
+ ```
114
+
115
+ Your output:
116
+
117
+ [
118
+ {
119
+ "criteria": "DRY",
120
+ "explanation": "The ``arr[3] = 3 * 3; printf("%d ", arr[3]);`` code repeats a lot. Consider creating functions like ``computeArray``, ``displayArray``. Consider using ``for`` loops."
121
+ },
122
+ {
123
+ "criteria": "SRP",
124
+ "explanation": "The function ``func`` handles the array computations and output. You should separate the responsabilites."
125
+ },
126
+ {
127
+ "criteria": "NAME",
128
+ "explanation": "``func`` should be called ``computeAndPrint``, ``arr`` should be called ``dataArray``."
129
+ }
130
+ ]
131
+ """
132
+
133
+
134
+ def get_the_openai_client(openai_key):
135
+ if openai_key is None or openai_key == "" or openai_key == "(none)":
136
+ return None
137
+ client = openai.Client(api_key=openai_key)
138
+ return client
139
 
140
  def clean_prompt_answer(answer):
141
  """
 
152
  cleaned.append(l)
153
  return "\n".join(cleaned)
154
 
155
+
156
  def parse_chatgpt_answer(ans_text):
157
+ """
158
+ Minimal parsing of chatGPT answer.
159
+
160
+ TODO: Ensure no extra fields, ensure format, etc. (cough pydantic)
161
+
162
+ :param ans_text:
163
+ :return:
164
+ """
165
  try:
166
+ ans_text = clean_prompt_answer(ans_text)
167
  js_answer = json.loads(ans_text)
168
  except:
169
  # for now we dump the error in console
170
  import traceback
171
  exception = traceback.format_exc()
172
  print(exception)
173
+ return {f"ChatPGT answered: {ans_text}.\nError":exception}
174
  return js_answer
175
 
176
+ def make_user_prompt(code_fragment):
177
+ """
178
+ Formats the code for user prompt
179
+
180
+ :param code_fragment:
181
+ :return:
182
+ """
183
+ user_prompt = f"```\n{code_fragment}```"
184
+ return user_prompt
185
+
186
+
187
+ def call_openai(client, system_prompt, user_prompt, model="gpt-3.5-turbo", temperature = 0.1, force_json=False,
188
+ get_full_response=False):
189
+ """
190
+ Constructs a prompt for chatGPT and sends it.
191
+
192
+ Possible models:
193
+ - gpt-3.5-turbo
194
+ - gpt-4-turbo
195
+ - gpt-4o
196
+ - gpt-4o-mini
197
+
198
+ Don't use force_json, will screw common use cases like list of json objects.
199
+
200
+
201
+ :param client:
202
+ :param system_prompt:
203
+ :param user_prompt:
204
+ :param model:
205
+ :param temperature:
206
+ :return:
207
+ """
208
+
209
+ messages = [
210
+ {"role": "system", "content": system_prompt},
211
+ {"role": "user", "content": user_prompt}
212
+ ]
213
+ optional_args = {}
214
+ if force_json:
215
+ optional_args["response_format"] = { "type": "json_object" }
216
+ response = client.chat.completions.create(
217
+ # model="gpt-4-turbo", # Update model name as necessary
218
+ model=model,
219
+ messages=messages,
220
+ temperature = temperature,
221
+ **optional_args
222
+ )
223
+ if get_full_response:
224
+ return response
225
+ else:
226
+ output_content = response.choices[0].message.content
227
+ return output_content
228
+
229
  def eval_code_by_chatgpt(openai_client, ccode):
230
  """
231
  Will evaluate a piece of code using our heavily tuned prompt!
 
236
  """
237
  # time.sleep(3)
238
  try:
239
+ # return """[
240
+ # {
241
+ # "criteria": "DRY",
242
+ # "explanation": "The memory allocation and initialization for ``p1``, ``p2``, and ``p3`` are repetitive. Consider creating a function like ``allocateAndInitializeMemory``."
243
+ # },
244
+ # {
245
+ # "criteria": "DRY",
246
+ # "explanation": "Tne second DRY failure, because this is the observed ChatGPT behaviour."
247
+ # },
248
+ #
249
+ # {
250
+ # "criteria": "SRP",
251
+ # "explanation": "The ``main`` function handles memory allocation, initialization, and printing. You should separate these responsibilities into different functions like ``allocateMemory``, ``initializeData``, and ``printData``."
252
+ # },
253
+ # {
254
+ # "criteria": "NAME",
255
+ # "explanation": "``x1`` should be called ``title``, ``y1`` should be called ``author``, ``z1`` should be called ``year``, ``p1`` should be called ``titlePtr``, ``p2`` should be called ``authorPtr``, ``p3`` should be called ``yearPtr``."
256
+ # }
257
+ # ]"""
258
  assert openai_client is not None
259
+ user_prompt = make_user_prompt(ccode)
260
+ chatgpt_answer = call_openai(openai_client, system_prompt=gpt_teacher_prompt, user_prompt=user_prompt,
261
+ model="gpt-4o",
262
+ temperature=0, force_json=False, get_full_response=False)
263
+ return chatgpt_answer
264
  except:
265
  import traceback
266
  traceback.print_exc()
267
+ return {"error":"There was an error while getting the ChatGPT answer. Maybe ChatGPT is overloaded?"}
268
 
269
 
270
 
 
309
  :param ccode:
310
  :return:
311
  """
312
+ enhanced_answer = {"error":"Not processed"}
313
+ try:
314
+ chatgpt_ans = eval_code_by_chatgpt(openai_client, ccode)
315
+ except:
316
+ import traceback
317
+ traceback.print_exc()
318
+ enhanced_answer = {"error": "There was an error while calling chatGPT."}
319
+ return enhanced_answer
320
+ if "error" in chatgpt_ans:
321
+ # we forward it to caller
322
+ enhanced_answer = chatgpt_ans
323
+ pass
324
+ else:
325
+ try:
326
+ chatgpt_js = parse_chatgpt_answer(chatgpt_ans)
327
+ enhanced_answer = add_evaluation_fields_on_js_answer(chatgpt_js)
328
+ except:
329
+ import traceback
330
+ traceback.print_exc()
331
+ enhanced_answer = {"error": "There was an error while parsing the answer."}
332
  return enhanced_answer
333
 
main.py CHANGED
@@ -27,6 +27,8 @@ OAUTH_CLIENT_SECRET = os.environ.get('OAUTH_CLIENT_SECRET')
27
  OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
28
  SPACE_HOST = os.environ.get('SPACE_HOST')
29
  HF_DATASET_AUTH_TOKEN = os.environ.get('HF_DATASET_AUTH_TOKEN', "(none)")
 
 
30
  try:
31
  DEFAULT_ANONYMOUS_USER_LEVEL = int(os.environ.get('DEFAULT_ANONYMOUS_USER_LEVEL', 0))
32
  except:
@@ -113,7 +115,13 @@ HTML_USER_DATA = "login_user_data"
113
  OAUTH_SECRET_SESSION_NAME = "oauth_secret"
114
  USER_DATA_SESSION_NAME = "user_data"
115
 
 
116
  def get_openid_configuration():
 
 
 
 
 
117
  config_url = OPENID_PROVIDER_URL + "/.well-known/openid-configuration"
118
 
119
  try:
@@ -153,12 +161,16 @@ if IS_LOCALHOST:
153
  print("Localhost detected in SPACE_HOST. App started in debug+live mode!")
154
  app, rt = fast_app(debug=True, live=True, hdrs=hdrs)
155
  REFRESH_TIME = 0.1
156
- EVAL_TIMEOUT_SECONDS = 2
157
  else:
158
  app, rt = fast_app(debug=False, live=False, hdrs=hdrs)
159
  REFRESH_TIME = 1
160
  EVAL_TIMEOUT_SECONDS = 15
161
 
 
 
 
 
162
  ################# STORAGE
163
 
164
  def untyped_save_to_storage(dc, filename):
@@ -479,6 +491,14 @@ def html_render_answer_from_db(session, show_submit_form=True):
479
  if eval_request_status == EVAL_STATE_QUERY:
480
  check_if_query_should_timeout(state_obj)
481
  return html_waiting_for_results(),
 
 
 
 
 
 
 
 
482
  print(f"Unknown state of the code evalation request {state_obj.state}:")
483
  return html_error_results("Some error occured."),
484
 
@@ -509,7 +529,7 @@ def call_gpt_and_store_result(session_obj_id, code_to_check):
509
  local_sess_obj = local_sess_obj_lst[0]
510
 
511
  # Trigger the lenghtly operation
512
- enhanced_answer = eval_code.eval_the_piece_of_c_code(openai_client=None, ccode=code_to_check)
513
 
514
  # we create a new QA entry.
515
  qe_obj = Question_Evaluation_cls(code_text=code_to_check, answer_eval_text=enhanced_answer, submitted=0)
 
27
  OPENID_PROVIDER_URL = os.environ.get("OPENID_PROVIDER_URL", "https://huggingface.co")
28
  SPACE_HOST = os.environ.get('SPACE_HOST')
29
  HF_DATASET_AUTH_TOKEN = os.environ.get('HF_DATASET_AUTH_TOKEN', "(none)")
30
+ OPENAI_KEY = os.environ.get('OPENAI_KEY', "(none)")
31
+
32
  try:
33
  DEFAULT_ANONYMOUS_USER_LEVEL = int(os.environ.get('DEFAULT_ANONYMOUS_USER_LEVEL', 0))
34
  except:
 
115
  OAUTH_SECRET_SESSION_NAME = "oauth_secret"
116
  USER_DATA_SESSION_NAME = "user_data"
117
 
118
+
119
  def get_openid_configuration():
120
+ """
121
+ Nicely read the HF openid endpoints and configure them.
122
+
123
+ :return:
124
+ """
125
  config_url = OPENID_PROVIDER_URL + "/.well-known/openid-configuration"
126
 
127
  try:
 
161
  print("Localhost detected in SPACE_HOST. App started in debug+live mode!")
162
  app, rt = fast_app(debug=True, live=True, hdrs=hdrs)
163
  REFRESH_TIME = 0.1
164
+ EVAL_TIMEOUT_SECONDS = 5
165
  else:
166
  app, rt = fast_app(debug=False, live=False, hdrs=hdrs)
167
  REFRESH_TIME = 1
168
  EVAL_TIMEOUT_SECONDS = 15
169
 
170
+
171
+ openai_client = eval_code.get_the_openai_client(OPENAI_KEY)
172
+
173
+
174
  ################# STORAGE
175
 
176
  def untyped_save_to_storage(dc, filename):
 
491
  if eval_request_status == EVAL_STATE_QUERY:
492
  check_if_query_should_timeout(state_obj)
493
  return html_waiting_for_results(),
494
+ if eval_request_status == EVAL_STATE_ERROR:
495
+ # TODO duplicate code! fix it!
496
+ qe_obj_lst = question_evaluation_table(limit=1, where=f"id == {state_obj.current_qeval}")
497
+ if len(qe_obj_lst) < 1:
498
+ print(f"Object id {state_obj.current_qeval} can't be found in question_evaluation_table")
499
+ return (None,)
500
+ qe_obj = qe_obj_lst[0]
501
+ return html_error_results(qe_obj.answer_eval_text),
502
  print(f"Unknown state of the code evalation request {state_obj.state}:")
503
  return html_error_results("Some error occured."),
504
 
 
529
  local_sess_obj = local_sess_obj_lst[0]
530
 
531
  # Trigger the lenghtly operation
532
+ enhanced_answer = eval_code.eval_the_piece_of_c_code(openai_client=openai_client, ccode=code_to_check)
533
 
534
  # we create a new QA entry.
535
  qe_obj = Question_Evaluation_cls(code_text=code_to_check, answer_eval_text=enhanced_answer, submitted=0)
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  python-fasthtml==0.3.6
2
  fastsql==1.0.1
3
- huggingface_hub==0.24.5
 
 
1
  python-fasthtml==0.3.6
2
  fastsql==1.0.1
3
+ huggingface_hub==0.24.5
4
+ openai==1.35.3