rosemariafontana commited on
Commit
d1c6aca
·
verified ·
1 Parent(s): 9fe6dfb

Update process_data.py

Browse files
Files changed (1) hide show
  1. process_data.py +52 -23
process_data.py CHANGED
@@ -145,13 +145,30 @@ def generate_json_pieces(input_data, parameters):
145
  logs_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["logs_data_input"]
146
  soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
147
  yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
 
 
 
 
 
 
 
 
148
 
 
 
 
 
 
 
 
 
 
149
  try:
150
  # Call OpenAI API to generate structured output based on prompt
151
  field_response = client.beta.chat.completions.parse(
152
  model=model_version, # Use GPT model that supports structured output
153
  messages=[
154
- {"role": "system", "content": "Extract the field information."},
155
  {"role": "user", "content": field_data_specification}
156
  ],
157
  response_format=FarmActivitiesLite,
@@ -160,7 +177,7 @@ def generate_json_pieces(input_data, parameters):
160
  plant_response = client.beta.chat.completions.parse(
161
  model=model_version, # Use GPT model that supports structured output
162
  messages=[
163
- {"role": "system", "content": "Extract the planting information."},
164
  {"role": "user", "content": planting_data_specification}
165
  ],
166
  response_format=PlantingLite,
@@ -169,7 +186,7 @@ def generate_json_pieces(input_data, parameters):
169
  log_response = client.beta.chat.completions.parse(
170
  model=model_version, # Use GPT model that supports structured output
171
  messages=[
172
- {"role": "system", "content": "Extract the log information."},
173
  {"role": "user", "content": logs_data_specification}
174
  ],
175
  response_format=Log,
@@ -178,7 +195,7 @@ def generate_json_pieces(input_data, parameters):
178
  soil_response = client.beta.chat.completions.parse(
179
  model=model_version, # Use GPT model that supports structured output
180
  messages=[
181
- {"role": "system", "content": "Extract the soil information."},
182
  {"role": "user", "content": soil_data_specification}
183
  ],
184
  response_format=Soil,
@@ -187,7 +204,7 @@ def generate_json_pieces(input_data, parameters):
187
  yield_response = client.beta.chat.completions.parse(
188
  model=model_version, # Use GPT model that supports structured output
189
  messages=[
190
- {"role": "system", "content": "Extract the yield information."},
191
  {"role": "user", "content": yield_data_specification}
192
  ],
193
  response_format=Yield,
@@ -231,26 +248,32 @@ def pre_processing(input_data, parameters):
231
  input_data["input_text"] = (str) input text
232
  """
233
  if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
234
- pre_processing_prompts = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
235
- one_giant_preprompt = ""
236
- input_data["input_text_pieces"]["pre_processed_pieces"] = {}
237
-
238
- for pre_prompt in pre_processing_prompts:
239
- if pre_prompt:
240
- one_giant_preprompt += pre_prompt
241
 
242
- for text_label, text_body in input_data["input_text_pieces"].items():
 
 
 
243
 
244
- response = client.chat.completions.create(
245
- model=parameters["model_version"],
246
- messages=[
247
- {"role": "system", "content": one_giant_preprompt},
248
- {"role": "user", "content": text_body}
249
- ]
250
- )
251
-
252
- response_text = response.choices[0].message.content
253
- input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
 
 
 
 
 
 
 
 
 
 
254
 
255
 
256
  if parameters["stepwise_json_creation"] == "singlejsoncreation":
@@ -418,13 +441,16 @@ def parse_survey_stack_data(data):
418
  processed_data
419
  processed_data["input_text"] = (str) the raw input text
420
  """
 
421
  processed_data = {}
422
 
423
  farm_management_inputs = data[0]['data']['group_4']
424
 
425
  processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
 
426
 
427
  if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
 
428
  processed_data["input_text_pieces"] = {}
429
  processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None)
430
  processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None)
@@ -433,6 +459,7 @@ def parse_survey_stack_data(data):
433
  processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None)
434
  processed_data["input_text"] = "EMPTY"
435
  elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
 
436
  processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
437
  processed_data["input_text_pieces"] = {}
438
  processed_data["input_text_pieces"]["field_data_input"] = "EMPTY"
@@ -441,5 +468,7 @@ def parse_survey_stack_data(data):
441
  processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
442
  processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
443
 
 
 
444
 
445
  return processed_data
 
145
  logs_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["logs_data_input"]
146
  soil_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["soil_data_input"]
147
  yield_data_input = input_data["input_text_pieces"]["pre_processed_pieces"]["yield_data_input"]
148
+
149
+ field_prompt = "Extract the field information."
150
+ plant_prompt = "Extract the planting information."
151
+ log_prompt = "Extract the log information."
152
+ soil_prompt = "Extract the soil information."
153
+ yield_prompt = "Extract the yield information."
154
+
155
+ # fix this part
156
 
157
+ # figure out what happens when there's
158
+ # chaining, pre-prompts, context, etc ....
159
+
160
+ #if not parameters["chaining"] and input_data["input_context"]:
161
+ # farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
162
+ # farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
163
+ # farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
164
+ # farm_prompt = input_data["input_text_pieces"]["field_data_input"] + farm_prompt
165
+
166
  try:
167
  # Call OpenAI API to generate structured output based on prompt
168
  field_response = client.beta.chat.completions.parse(
169
  model=model_version, # Use GPT model that supports structured output
170
  messages=[
171
+ {"role": "system", "content": field_prompt},
172
  {"role": "user", "content": field_data_specification}
173
  ],
174
  response_format=FarmActivitiesLite,
 
177
  plant_response = client.beta.chat.completions.parse(
178
  model=model_version, # Use GPT model that supports structured output
179
  messages=[
180
+ {"role": "system", "content": plant_prompt},
181
  {"role": "user", "content": planting_data_specification}
182
  ],
183
  response_format=PlantingLite,
 
186
  log_response = client.beta.chat.completions.parse(
187
  model=model_version, # Use GPT model that supports structured output
188
  messages=[
189
+ {"role": "system", "content": log_prompt},
190
  {"role": "user", "content": logs_data_specification}
191
  ],
192
  response_format=Log,
 
195
  soil_response = client.beta.chat.completions.parse(
196
  model=model_version, # Use GPT model that supports structured output
197
  messages=[
198
+ {"role": "system", "content": soil_prompt},
199
  {"role": "user", "content": soil_data_specification}
200
  ],
201
  response_format=Soil,
 
204
  yield_response = client.beta.chat.completions.parse(
205
  model=model_version, # Use GPT model that supports structured output
206
  messages=[
207
+ {"role": "system", "content": yield_prompt},
208
  {"role": "user", "content": yield_data_specification}
209
  ],
210
  response_format=Yield,
 
248
  input_data["input_text"] = (str) input text
249
  """
250
  if parameters["stepwise_json_creation"] == "stepwisejsoncreation":
 
 
 
 
 
 
 
251
 
252
+ if parameters["chaining"]:
253
+ pre_processing_prompts = [parameters["context_pre_prompt"], parameters["summary_pre_prompt"], parameters["conversation_pre_prompt"], parameters["example_pre_prompt"]]
254
+ one_giant_preprompt = ""
255
+ input_data["input_text_pieces"]["pre_processed_pieces"] = {}
256
 
257
+ for pre_prompt in pre_processing_prompts:
258
+ if pre_prompt:
259
+ one_giant_preprompt += pre_prompt
260
+
261
+ for text_label, text_body in input_data["input_text_pieces"].items():
262
+
263
+ response = client.chat.completions.create(
264
+ model=parameters["model_version"],
265
+ messages=[
266
+ {"role": "system", "content": one_giant_preprompt},
267
+ {"role": "user", "content": text_body}
268
+ ]
269
+ )
270
+
271
+ response_text = response.choices[0].message.content
272
+ input_data["input_text_pieces"]["pre_processed_pieces"][text_label] = response_text
273
+ else:
274
+ input_context = f"You are processing farm activity, interactions, and trial data. Here's important context of the data {parameters['combined_prompt']}. With this context in mind, "
275
+ input_data["input_context"] = input_context
276
+ return input_data
277
 
278
 
279
  if parameters["stepwise_json_creation"] == "singlejsoncreation":
 
441
  processed_data
442
  processed_data["input_text"] = (str) the raw input text
443
  """
444
+ print("PROCESSING SURVEY STACK DATA")
445
  processed_data = {}
446
 
447
  farm_management_inputs = data[0]['data']['group_4']
448
 
449
  processed_data["stepwise_json_creation"] = data[0]['data']['stepwisejsoncreation']['value']
450
+ print("STEPWISE?: " + str(processed_data["stepwise_json_creation"]))
451
 
452
  if processed_data["stepwise_json_creation"] == "stepwisejsoncreation":
453
+ print("IN THE STEP")
454
  processed_data["input_text_pieces"] = {}
455
  processed_data["input_text_pieces"]["field_data_input"] = farm_management_inputs.get('field_data_input', {}).get('value', None)
456
  processed_data["input_text_pieces"]["planting_data_input"] = farm_management_inputs.get('planting_data_input', {}).get('value', None)
 
459
  processed_data["input_text_pieces"]["yield_data_input"] = farm_management_inputs.get('yield_data_input', {}).get('value', None)
460
  processed_data["input_text"] = "EMPTY"
461
  elif processed_data["stepwise_json_creation"] == "singlejsoncreation":
462
+ print("IN THE SINGLE")
463
  processed_data["input_text"] = data[0]['data']['onelonginputtext']['value']
464
  processed_data["input_text_pieces"] = {}
465
  processed_data["input_text_pieces"]["field_data_input"] = "EMPTY"
 
468
  processed_data["input_text_pieces"]["soil_data_input"] = "EMPTY"
469
  processed_data["input_text_pieces"]["yield_data_input"] = "EMPTY"
470
 
471
+ print("RETURNING DATA")
472
+ print(processed_data)
473
 
474
  return processed_data