Spaces:

our-sci
/

data-translation-experiments

Sleeping

App Files Files Community

rosemariafontana commited on Dec 18, 2024

Commit

1618d8d

verified ·

1 Parent(s): ef5da06

Update script_for_automation.py

Browse files

Files changed (1) hide show

script_for_automation.py +43 -4

script_for_automation.py CHANGED Viewed

@@ -32,12 +32,15 @@ BASEROW_API_KEY = os.getenv("BASEROW_API_KEY")
 from process_data import process_specifications
 def get_baserow_url(table_id):
     BASEROW_API_BASE = "https://baserow.f11804a1.federatedcomputer.net/api"
     return f"{BASEROW_API_BASE}/database/rows/table/{table_id}/?user_field_names=true"
 def get_baserow_data():
     # This is to get the gold standards from baserow
     # We will also get the input data
     TABLE_ID = "560"
@@ -47,13 +50,17 @@ def get_baserow_data():
         "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
         "Content-Type": "application/json"
     }
     try:
         response = requests.get(BASEROW_URL, headers=headers)
         response.raise_for_status()
         rows = response.json()
         results = rows.get("results", [])
         for row in results:
             print(f"Row ID: {row.get('id')}, Data: {row}")
@@ -123,12 +130,19 @@ def get_baserow_data():
             }
         }
         return gold_standards, input_data
     except requests.exceptions.RequestException as e:
         print(f"Failed to fetch rows: {e}")
 def get_recipes():
     TABLE_ID = "578"
     BASEROW_URL = get_baserow_url(TABLE_ID)
@@ -137,7 +151,8 @@ def get_recipes():
         "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
         "Content-Type": "application/json"
     }
     try:
         response = requests.get(BASEROW_URL, headers=headers)
         response.raise_for_status()
@@ -145,6 +160,7 @@ def get_recipes():
         results = rows.get("results", [])
         my_recipes = []
         for row in results:
             print(f"Row ID: {row.get('id')}, Data: {row}")
             recipe_id = row.get("Recipe ID")
@@ -173,12 +189,16 @@ def get_recipes():
             my_recipes.append(recipe_dict)
         return my_recipes
     except requests.exceptions.RequestException as e:
         print(f"Failed to fetch rows: {e}")
 def fill_out_survey(recipe_dict, input_data):
     survey_id = "673b4994aef86f0533b3546c"
     base_url = "https://app.surveystack.io/api/submissions"
@@ -248,6 +268,7 @@ def fill_out_survey(recipe_dict, input_data):
         "Content-Type": "application/json",
     }
     try:
         response = requests.post(base_url, headers=headers, data=json.dumps(submission_data))
         response.raise_for_status()
@@ -279,6 +300,7 @@ def get_data_ready(recipe_dict, input_data_piece):
     #            "treatments_prompt", treatments_prompt
     #        }
     #
     processed_data = {}
     processed_data["input_style"] = 'big-block-input-text'
     processed_data["input_text"] = input_data_piece
@@ -300,6 +322,7 @@ def get_data_ready(recipe_dict, input_data_piece):
         processed_data["parameters"]["preprocessingprompt2"] = ""
         processed_data["parameters"]["preprocessingprompt3"] = ""
     return processed_data
 def generate_markdown_output(df):
@@ -370,7 +393,8 @@ def generate_markdown_output(df):
 def drive_process():
     # this is to drive the processing process
     # Get the data from baserow (gold standards JSON and Input data)
     gold_standards, input_data = get_baserow_data()
@@ -384,12 +408,17 @@ def drive_process():
     #            "greg_summary": liz_carrot_greg_summary_preprocessing
     #        },
     output_rows = []
     output_folder = "output_results_" +datetime.now().strftime("%Y%m%d_%H%M%S")
     os.makedirs(output_folder, exist_ok=True)
     for recipe_dict in my_recipes:
         for key, input_chunks in input_data.items():
             # Get the input data based on the recipe
             if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
@@ -399,19 +428,26 @@ def drive_process():
             else:
                 input_data_piece = input_chunks["raw_interview"]
             # Fill out a Surveystack submission
             fill_out_survey(recipe_dict, input_data)
             # Prepare the data for the structured output setup
             proc_spec = get_data_ready(recipe_dict, input_data_piece)
             completed_json = process_specifications(proc_spec)
             # Get the gold standard for this input_chunk (liz_carrot, ben_soybean, wally_squash)
             # Compare the generated JSON to the gold standard
             gold_standard_json = gold_standard[key]
             differences = list(diff(gold_standard_json, completed_json))
             # Convert to yaml
             gold_standard_yaml = yaml.dump(gold_standard_json, default_flow_style=False)
             comparison_yaml = yaml.dump(completed_json, default_flow_style=False)
@@ -438,6 +474,8 @@ def drive_process():
             df = pd.DataFrame(output_rows)
             markdown_output = generate_markdown_output(df)
             recipe_folder = os.path.join(output_folder, f"recipe_{recipe_dict['recipe_id']}")
             os.makedirs(recipe_folder, exist_ok=True)
@@ -460,6 +498,7 @@ def drive_process():
             with open(differences_file, 'w') as f:
                 json.dump(differences, f, indent=2)
     # Zip the entire output folder
     zip_filename = f"{output_folder}.zip"
     shutil.make_archive(output_folder, 'zip', output_folder)

 from process_data import process_specifications
 def get_baserow_url(table_id):
+    print("GETTING BASEROW URL")
     BASEROW_API_BASE = "https://baserow.f11804a1.federatedcomputer.net/api"
     return f"{BASEROW_API_BASE}/database/rows/table/{table_id}/?user_field_names=true"
 def get_baserow_data():
     # This is to get the gold standards from baserow
     # We will also get the input data
+    print("GETTING BASEROW DATA")
     TABLE_ID = "560"
         "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
         "Content-Type": "application/json"
     }
+    print("STARTING TO TRY RESPONSE REQUEST")
     try:
         response = requests.get(BASEROW_URL, headers=headers)
+        print("GOT")
         response.raise_for_status()
         rows = response.json()
         results = rows.get("results", [])
+        print("PARSING ROWS NOW")
         for row in results:
             print(f"Row ID: {row.get('id')}, Data: {row}")
             }
         }
+        print("BASEROW DATA DONE GOT")
+        print("GOLD STANDARDS HERE")
+        print(gold_standards)
+        print("INPUT DATA HERE")
+        print(input_data)
         return gold_standards, input_data
     except requests.exceptions.RequestException as e:
         print(f"Failed to fetch rows: {e}")
 def get_recipes():
+    print("GETTING RECIPES FROM BASEROW NOW")
     TABLE_ID = "578"
     BASEROW_URL = get_baserow_url(TABLE_ID)
         "Authorization": f"Token {os.environ['BASEROW_API_KEY']}",
         "Content-Type": "application/json"
     }
+    print("TRYING TO GET A RESPONSE")
     try:
         response = requests.get(BASEROW_URL, headers=headers)
         response.raise_for_status()
         results = rows.get("results", [])
         my_recipes = []
+        print("PARSING ROWS")
         for row in results:
             print(f"Row ID: {row.get('id')}, Data: {row}")
             recipe_id = row.get("Recipe ID")
             my_recipes.append(recipe_dict)
+        print("FINISHED GETTING THE RECIPE DATA")
+        print("RECIPES HERE")
+        print(my_recipes)
         return my_recipes
     except requests.exceptions.RequestException as e:
         print(f"Failed to fetch rows: {e}")
 def fill_out_survey(recipe_dict, input_data):
+    print("filling out survey")
     survey_id = "673b4994aef86f0533b3546c"
     base_url = "https://app.surveystack.io/api/submissions"
         "Content-Type": "application/json",
     }
+    print("GETTING SURVEY RESPONSE")
     try:
         response = requests.post(base_url, headers=headers, data=json.dumps(submission_data))
         response.raise_for_status()
     #            "treatments_prompt", treatments_prompt
     #        }
     #
+    print("GETTING DATA READY")
     processed_data = {}
     processed_data["input_style"] = 'big-block-input-text'
     processed_data["input_text"] = input_data_piece
         processed_data["parameters"]["preprocessingprompt2"] = ""
         processed_data["parameters"]["preprocessingprompt3"] = ""
+    print("DID THAT NOW")
     return processed_data
 def generate_markdown_output(df):
 def drive_process():
     # this is to drive the processing process
+    print("We are starting to DRIVE PROCESS")
     # Get the data from baserow (gold standards JSON and Input data)
     gold_standards, input_data = get_baserow_data()
     #            "greg_summary": liz_carrot_greg_summary_preprocessing
     #        },
+    print("Making the OUTPUT STUFF")
     output_rows = []
     output_folder = "output_results_" +datetime.now().strftime("%Y%m%d_%H%M%S")
     os.makedirs(output_folder, exist_ok=True)
+    print("GOING THROUGH RECIPES NOW")
     for recipe_dict in my_recipes:
         for key, input_chunks in input_data.items():
+            print("RECIPE INFO")
+            print(key)
+            print(recipe_dict["recipe_id")
             # Get the input data based on the recipe
             if recipe_dict["pre_processing_strategy"] == "Otter.ai Summary":
             else:
                 input_data_piece = input_chunks["raw_interview"]
+            print("DECIDED INPUT DATA")
+            print(input_data_piece)
             # Fill out a Surveystack submission
             fill_out_survey(recipe_dict, input_data)
             # Prepare the data for the structured output setup
             proc_spec = get_data_ready(recipe_dict, input_data_piece)
+            print("PROCESSING SPECIFICATIONS!!!!!!!!!!!!!!!")
             completed_json = process_specifications(proc_spec)
+            print("Gold Standard diff and stuff")
             # Get the gold standard for this input_chunk (liz_carrot, ben_soybean, wally_squash)
             # Compare the generated JSON to the gold standard
             gold_standard_json = gold_standard[key]
             differences = list(diff(gold_standard_json, completed_json))
+            print("yaml world")
             # Convert to yaml
             gold_standard_yaml = yaml.dump(gold_standard_json, default_flow_style=False)
             comparison_yaml = yaml.dump(completed_json, default_flow_style=False)
             df = pd.DataFrame(output_rows)
+            print("dataframe done now onto markdown")
             markdown_output = generate_markdown_output(df)
             recipe_folder = os.path.join(output_folder, f"recipe_{recipe_dict['recipe_id']}")
             os.makedirs(recipe_folder, exist_ok=True)
             with open(differences_file, 'w') as f:
                 json.dump(differences, f, indent=2)
+    print("ZIPPING UP WHOLE THING")
     # Zip the entire output folder
     zip_filename = f"{output_folder}.zip"
     shutil.make_archive(output_folder, 'zip', output_folder)