Update script_for_automation.py
Browse files- script_for_automation.py +43 -4
script_for_automation.py
CHANGED
|
@@ -32,12 +32,15 @@ BASEROW_API_KEY = os.getenv("BASEROW_API_KEY")
|
|
| 32 |
from process_data import process_specifications
|
| 33 |
|
| 34 |
def get_baserow_url(table_id):
|
|
|
|
| 35 |
BASEROW_API_BASE = "https://baserow.f11804a1.federatedcomputer.net/api"
|
| 36 |
return f"{BASEROW_API_BASE}/database/rows/table/{table_id}/?user_field_names=true"
|
| 37 |
|
| 38 |
def get_baserow_data():
|
| 39 |
# This is to get the gold standards from baserow
|
| 40 |
# We will also get the input data
|
|
|
|
|
|
|
| 41 |
|
| 42 |
TABLE_ID = "560"
|
| 43 |
|
|
@@ -47,13 +50,17 @@ def get_baserow_data():
|
|
| 47 |
"Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
|
| 48 |
"Content-Type": "application/json"
|
| 49 |
}
|
| 50 |
-
|
|
|
|
| 51 |
try:
|
| 52 |
response = requests.get(BASEROW_URL, headers=headers)
|
|
|
|
| 53 |
response.raise_for_status()
|
| 54 |
rows = response.json()
|
| 55 |
results = rows.get("results", [])
|
| 56 |
|
|
|
|
|
|
|
| 57 |
for row in results:
|
| 58 |
print(f"Row ID: {row.get('id')}, Data: {row}")
|
| 59 |
|
|
@@ -123,12 +130,19 @@ def get_baserow_data():
|
|
| 123 |
}
|
| 124 |
}
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
return gold_standards, input_data
|
| 127 |
|
| 128 |
except requests.exceptions.RequestException as e:
|
| 129 |
print(f"Failed to fetch rows: {e}")
|
| 130 |
|
| 131 |
def get_recipes():
|
|
|
|
|
|
|
| 132 |
TABLE_ID = "578"
|
| 133 |
|
| 134 |
BASEROW_URL = get_baserow_url(TABLE_ID)
|
|
@@ -137,7 +151,8 @@ def get_recipes():
|
|
| 137 |
"Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
|
| 138 |
"Content-Type": "application/json"
|
| 139 |
}
|
| 140 |
-
|
|
|
|
| 141 |
try:
|
| 142 |
response = requests.get(BASEROW_URL, headers=headers)
|
| 143 |
response.raise_for_status()
|
|
@@ -145,6 +160,7 @@ def get_recipes():
|
|
| 145 |
results = rows.get("results", [])
|
| 146 |
|
| 147 |
my_recipes = []
|
|
|
|
| 148 |
for row in results:
|
| 149 |
print(f"Row ID: {row.get('id')}, Data: {row}")
|
| 150 |
recipe_id = row.get("Recipe ID")
|
|
@@ -173,12 +189,16 @@ def get_recipes():
|
|
| 173 |
|
| 174 |
my_recipes.append(recipe_dict)
|
| 175 |
|
|
|
|
|
|
|
|
|
|
| 176 |
return my_recipes
|
| 177 |
|
| 178 |
except requests.exceptions.RequestException as e:
|
| 179 |
print(f"Failed to fetch rows: {e}")
|
| 180 |
|
| 181 |
def fill_out_survey(recipe_dict, input_data):
|
|
|
|
| 182 |
survey_id = "673b4994aef86f0533b3546c"
|
| 183 |
|
| 184 |
base_url = "https://app.surveystack.io/api/submissions"
|
|
@@ -248,6 +268,7 @@ def fill_out_survey(recipe_dict, input_data):
|
|
| 248 |
"Content-Type": "application/json",
|
| 249 |
}
|
| 250 |
|
|
|
|
| 251 |
try:
|
| 252 |
response = requests.post(base_url, headers=headers, data=json.dumps(submission_data))
|
| 253 |
response.raise_for_status()
|
|
@@ -279,6 +300,7 @@ def get_data_ready(recipe_dict, input_data_piece):
|
|
| 279 |
# "treatments_prompt", treatments_prompt
|
| 280 |
# }
|
| 281 |
#
|
|
|
|
| 282 |
processed_data = {}
|
| 283 |
processed_data["input_style"] = 'big-block-input-text'
|
| 284 |
processed_data["input_text"] = input_data_piece
|
|
@@ -300,6 +322,7 @@ def get_data_ready(recipe_dict, input_data_piece):
|
|
| 300 |
processed_data["parameters"]["preprocessingprompt2"] = ""
|
| 301 |
processed_data["parameters"]["preprocessingprompt3"] = ""
|
| 302 |
|
|
|
|
| 303 |
return processed_data
|
| 304 |
|
| 305 |
def generate_markdown_output(df):
|
|
@@ -370,7 +393,8 @@ def generate_markdown_output(df):
|
|
| 370 |
|
| 371 |
def drive_process():
|
| 372 |
# this is to drive the processing process
|
| 373 |
-
|
|
|
|
| 374 |
# Get the data from baserow (gold standards JSON and Input data)
|
| 375 |
gold_standards, input_data = get_baserow_data()
|
| 376 |
|
|
@@ -384,12 +408,17 @@ def drive_process():
|
|
| 384 |
# "greg_summary": liz_carrot_greg_summary_preprocessing
|
| 385 |
# },
|
| 386 |
|
|
|
|
| 387 |
output_rows = []
|
| 388 |
output_folder = "output_results_" +datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 389 |
os.makedirs(output_folder, exist_ok=True)
|
| 390 |
-
|
|
|
|
| 391 |
for recipe_dict in my_recipes:
|
| 392 |
for key, input_chunks in input_data.items():
|
|
|
|
|
|
|
|
|
|
| 393 |
|
| 394 |
# Get the input data based on the recipe
|
| 395 |
if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
|
|
@@ -399,19 +428,26 @@ def drive_process():
|
|
| 399 |
else:
|
| 400 |
input_data_piece = input_chunks["raw_interview"]
|
| 401 |
|
|
|
|
|
|
|
|
|
|
| 402 |
# Fill out a Surveystack submission
|
| 403 |
fill_out_survey(recipe_dict, input_data)
|
| 404 |
|
| 405 |
# Prepare the data for the structured output setup
|
| 406 |
proc_spec = get_data_ready(recipe_dict, input_data_piece)
|
|
|
|
|
|
|
| 407 |
completed_json = process_specifications(proc_spec)
|
| 408 |
|
| 409 |
|
|
|
|
| 410 |
# Get the gold standard for this input_chunk (liz_carrot, ben_soybean, wally_squash)
|
| 411 |
# Compare the generated JSON to the gold standard
|
| 412 |
gold_standard_json = gold_standard[key]
|
| 413 |
differences = list(diff(gold_standard_json, completed_json))
|
| 414 |
|
|
|
|
| 415 |
# Convert to yaml
|
| 416 |
gold_standard_yaml = yaml.dump(gold_standard_json, default_flow_style=False)
|
| 417 |
comparison_yaml = yaml.dump(completed_json, default_flow_style=False)
|
|
@@ -438,6 +474,8 @@ def drive_process():
|
|
| 438 |
|
| 439 |
df = pd.DataFrame(output_rows)
|
| 440 |
|
|
|
|
|
|
|
| 441 |
markdown_output = generate_markdown_output(df)
|
| 442 |
recipe_folder = os.path.join(output_folder, f"recipe_{recipe_dict['recipe_id']}")
|
| 443 |
os.makedirs(recipe_folder, exist_ok=True)
|
|
@@ -460,6 +498,7 @@ def drive_process():
|
|
| 460 |
with open(differences_file, 'w') as f:
|
| 461 |
json.dump(differences, f, indent=2)
|
| 462 |
|
|
|
|
| 463 |
# Zip the entire output folder
|
| 464 |
zip_filename = f"{output_folder}.zip"
|
| 465 |
shutil.make_archive(output_folder, 'zip', output_folder)
|
|
|
|
| 32 |
from process_data import process_specifications
|
| 33 |
|
| 34 |
def get_baserow_url(table_id):
|
| 35 |
+
print("GETTING BASEROW URL")
|
| 36 |
BASEROW_API_BASE = "https://baserow.f11804a1.federatedcomputer.net/api"
|
| 37 |
return f"{BASEROW_API_BASE}/database/rows/table/{table_id}/?user_field_names=true"
|
| 38 |
|
| 39 |
def get_baserow_data():
|
| 40 |
# This is to get the gold standards from baserow
|
| 41 |
# We will also get the input data
|
| 42 |
+
|
| 43 |
+
print("GETTING BASEROW DATA")
|
| 44 |
|
| 45 |
TABLE_ID = "560"
|
| 46 |
|
|
|
|
| 50 |
"Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
|
| 51 |
"Content-Type": "application/json"
|
| 52 |
}
|
| 53 |
+
|
| 54 |
+
print("STARTING TO TRY RESPONSE REQUEST")
|
| 55 |
try:
|
| 56 |
response = requests.get(BASEROW_URL, headers=headers)
|
| 57 |
+
print("GOT")
|
| 58 |
response.raise_for_status()
|
| 59 |
rows = response.json()
|
| 60 |
results = rows.get("results", [])
|
| 61 |
|
| 62 |
+
print("PARSING ROWS NOW")
|
| 63 |
+
|
| 64 |
for row in results:
|
| 65 |
print(f"Row ID: {row.get('id')}, Data: {row}")
|
| 66 |
|
|
|
|
| 130 |
}
|
| 131 |
}
|
| 132 |
|
| 133 |
+
print("BASEROW DATA DONE GOT")
|
| 134 |
+
print("GOLD STANDARDS HERE")
|
| 135 |
+
print(gold_standards)
|
| 136 |
+
print("INPUT DATA HERE")
|
| 137 |
+
print(input_data)
|
| 138 |
return gold_standards, input_data
|
| 139 |
|
| 140 |
except requests.exceptions.RequestException as e:
|
| 141 |
print(f"Failed to fetch rows: {e}")
|
| 142 |
|
| 143 |
def get_recipes():
|
| 144 |
+
print("GETTING RECIPES FROM BASEROW NOW")
|
| 145 |
+
|
| 146 |
TABLE_ID = "578"
|
| 147 |
|
| 148 |
BASEROW_URL = get_baserow_url(TABLE_ID)
|
|
|
|
| 151 |
"Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
|
| 152 |
"Content-Type": "application/json"
|
| 153 |
}
|
| 154 |
+
|
| 155 |
+
print("TRYING TO GET A RESPONSE")
|
| 156 |
try:
|
| 157 |
response = requests.get(BASEROW_URL, headers=headers)
|
| 158 |
response.raise_for_status()
|
|
|
|
| 160 |
results = rows.get("results", [])
|
| 161 |
|
| 162 |
my_recipes = []
|
| 163 |
+
print("PARSING ROWS")
|
| 164 |
for row in results:
|
| 165 |
print(f"Row ID: {row.get('id')}, Data: {row}")
|
| 166 |
recipe_id = row.get("Recipe ID")
|
|
|
|
| 189 |
|
| 190 |
my_recipes.append(recipe_dict)
|
| 191 |
|
| 192 |
+
print("FINISHED GETTING THE RECIPE DATA")
|
| 193 |
+
print("RECIPES HERE")
|
| 194 |
+
print(my_recipes)
|
| 195 |
return my_recipes
|
| 196 |
|
| 197 |
except requests.exceptions.RequestException as e:
|
| 198 |
print(f"Failed to fetch rows: {e}")
|
| 199 |
|
| 200 |
def fill_out_survey(recipe_dict, input_data):
|
| 201 |
+
print("filling out survey")
|
| 202 |
survey_id = "673b4994aef86f0533b3546c"
|
| 203 |
|
| 204 |
base_url = "https://app.surveystack.io/api/submissions"
|
|
|
|
| 268 |
"Content-Type": "application/json",
|
| 269 |
}
|
| 270 |
|
| 271 |
+
print("GETTING SURVEY RESPONSE")
|
| 272 |
try:
|
| 273 |
response = requests.post(base_url, headers=headers, data=json.dumps(submission_data))
|
| 274 |
response.raise_for_status()
|
|
|
|
| 300 |
# "treatments_prompt", treatments_prompt
|
| 301 |
# }
|
| 302 |
#
|
| 303 |
+
print("GETTING DATA READY")
|
| 304 |
processed_data = {}
|
| 305 |
processed_data["input_style"] = 'big-block-input-text'
|
| 306 |
processed_data["input_text"] = input_data_piece
|
|
|
|
| 322 |
processed_data["parameters"]["preprocessingprompt2"] = ""
|
| 323 |
processed_data["parameters"]["preprocessingprompt3"] = ""
|
| 324 |
|
| 325 |
+
print("DID THAT NOW")
|
| 326 |
return processed_data
|
| 327 |
|
| 328 |
def generate_markdown_output(df):
|
|
|
|
| 393 |
|
| 394 |
def drive_process():
|
| 395 |
# this is to drive the processing process
|
| 396 |
+
print("We are starting to DRIVE PROCESS")
|
| 397 |
+
|
| 398 |
# Get the data from baserow (gold standards JSON and Input data)
|
| 399 |
gold_standards, input_data = get_baserow_data()
|
| 400 |
|
|
|
|
| 408 |
# "greg_summary": liz_carrot_greg_summary_preprocessing
|
| 409 |
# },
|
| 410 |
|
| 411 |
+
print("Making the OUTPUT STUFF")
|
| 412 |
output_rows = []
|
| 413 |
output_folder = "output_results_" +datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 414 |
os.makedirs(output_folder, exist_ok=True)
|
| 415 |
+
|
| 416 |
+
print("GOING THROUGH RECIPES NOW")
|
| 417 |
for recipe_dict in my_recipes:
|
| 418 |
for key, input_chunks in input_data.items():
|
| 419 |
+
print("RECIPE INFO")
|
| 420 |
+
print(key)
|
| 421 |
+
print(recipe_dict["recipe_id")
|
| 422 |
|
| 423 |
# Get the input data based on the recipe
|
| 424 |
if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
|
|
|
|
| 428 |
else:
|
| 429 |
input_data_piece = input_chunks["raw_interview"]
|
| 430 |
|
| 431 |
+
print("DECIDED INPUT DATA")
|
| 432 |
+
print(input_data_piece)
|
| 433 |
+
|
| 434 |
# Fill out a Surveystack submission
|
| 435 |
fill_out_survey(recipe_dict, input_data)
|
| 436 |
|
| 437 |
# Prepare the data for the structured output setup
|
| 438 |
proc_spec = get_data_ready(recipe_dict, input_data_piece)
|
| 439 |
+
|
| 440 |
+
print("PROCESSING SPECIFICATIONS!!!!!!!!!!!!!!!")
|
| 441 |
completed_json = process_specifications(proc_spec)
|
| 442 |
|
| 443 |
|
| 444 |
+
print("Gold Standard diff and stuff")
|
| 445 |
# Get the gold standard for this input_chunk (liz_carrot, ben_soybean, wally_squash)
|
| 446 |
# Compare the generated JSON to the gold standard
|
| 447 |
gold_standard_json = gold_standard[key]
|
| 448 |
differences = list(diff(gold_standard_json, completed_json))
|
| 449 |
|
| 450 |
+
print("yaml world")
|
| 451 |
# Convert to yaml
|
| 452 |
gold_standard_yaml = yaml.dump(gold_standard_json, default_flow_style=False)
|
| 453 |
comparison_yaml = yaml.dump(completed_json, default_flow_style=False)
|
|
|
|
| 474 |
|
| 475 |
df = pd.DataFrame(output_rows)
|
| 476 |
|
| 477 |
+
print("dataframe done now onto markdown")
|
| 478 |
+
|
| 479 |
markdown_output = generate_markdown_output(df)
|
| 480 |
recipe_folder = os.path.join(output_folder, f"recipe_{recipe_dict['recipe_id']}")
|
| 481 |
os.makedirs(recipe_folder, exist_ok=True)
|
|
|
|
| 498 |
with open(differences_file, 'w') as f:
|
| 499 |
json.dump(differences, f, indent=2)
|
| 500 |
|
| 501 |
+
print("ZIPPING UP WHOLE THING")
|
| 502 |
# Zip the entire output folder
|
| 503 |
zip_filename = f"{output_folder}.zip"
|
| 504 |
shutil.make_archive(output_folder, 'zip', output_folder)
|