Spaces:

surfiniaburger
/

aura-mind-glow

Sleeping

App Files Files Community

surfiniaburger commited on Aug 17

Commit

603bc54

1 Parent(s): 78dccad

update

Browse files

Files changed (1) hide show

bigquery_uploader.py +67 -16

bigquery_uploader.py CHANGED Viewed

@@ -5,6 +5,7 @@ from google.cloud.exceptions import NotFound
 import uuid
 from datetime import datetime
 PROJECT_ID = "gem-creation"
 DATASET_ID = "aura_mind_glow_data"
 TABLE_ID = "farm_analysis"
@@ -13,10 +14,10 @@ def get_bigquery_client():
     """Returns an authenticated BigQuery client."""
     try:
         client = bigquery.Client(project=PROJECT_ID)
-        print("Successfully authenticated with BigQuery.")
         return client
     except Exception as e:
-        print(f"Error authenticating with BigQuery: {e}")
         return None
 def create_dataset_if_not_exists(client):
@@ -24,13 +25,13 @@ def create_dataset_if_not_exists(client):
     dataset_id = f"{PROJECT_ID}.{DATASET_ID}"
     try:
         client.get_dataset(dataset_id)  # Make an API request.
-        print(f"Dataset {dataset_id} already exists.")
     except NotFound:
-        print(f"Dataset {dataset_id} is not found. Creating dataset...")
         dataset = bigquery.Dataset(dataset_id)
-        dataset.location = "US"
         dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
-        print(f"Created dataset {client.project}.{dataset.dataset_id}")
 def create_table_if_not_exists(client):
@@ -38,9 +39,9 @@ def create_table_if_not_exists(client):
     table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
     try:
         client.get_table(table_id)  # Make an API request.
-        print(f"Table {table_id} already exists.")
     except NotFound:
-        print(f"Table {table_id} is not found. Creating table...")
         schema = [
             bigquery.SchemaField("analysis_id", "STRING", mode="REQUIRED"),
             bigquery.SchemaField("timestamp", "TIMESTAMP", mode="REQUIRED"),
@@ -58,13 +59,13 @@ def create_table_if_not_exists(client):
         ]
         table = bigquery.Table(table_id, schema=schema)
         table = client.create_table(table)  # Make an API request.
-        print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}")
 def upload_diagnosis_to_bigquery(diagnosis_data: dict):
-    """Uploads a single diagnosis record to BigQuery."""
     client = get_bigquery_client()
     if client is None:
-        print("BigQuery client not available. Cannot upload diagnosis.")
         return "BigQuery client not available."
     create_dataset_if_not_exists(client)
@@ -72,7 +73,6 @@ def upload_diagnosis_to_bigquery(diagnosis_data: dict):
     table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
-    # Add required fields if not present
     if "analysis_id" not in diagnosis_data:
         diagnosis_data["analysis_id"] = str(uuid.uuid4())
     if "timestamp" not in diagnosis_data:
@@ -81,9 +81,60 @@ def upload_diagnosis_to_bigquery(diagnosis_data: dict):
     rows_to_insert = [diagnosis_data]
     errors = client.insert_rows_json(table_id, rows_to_insert)
-    if errors == []:
-        print(f"Diagnosis record {diagnosis_data.get('analysis_id')} uploaded successfully to BigQuery.")
         return "Diagnosis uploaded successfully."
     else:
-        print(f"Encountered errors while inserting diagnosis record: {errors}")
-        return f"Error uploading diagnosis: {errors}"

 import uuid
 from datetime import datetime
+# --- Configuration: Set your project, dataset, and table details here ---
 PROJECT_ID = "gem-creation"
 DATASET_ID = "aura_mind_glow_data"
 TABLE_ID = "farm_analysis"
     """Returns an authenticated BigQuery client."""
     try:
         client = bigquery.Client(project=PROJECT_ID)
+        print("✅ Successfully authenticated with BigQuery.")
         return client
     except Exception as e:
+        print(f"❌ Error authenticating with BigQuery: {e}")
         return None
 def create_dataset_if_not_exists(client):
     dataset_id = f"{PROJECT_ID}.{DATASET_ID}"
     try:
         client.get_dataset(dataset_id)  # Make an API request.
+        print(f"ℹ️ Dataset {dataset_id} already exists.")
     except NotFound:
+        print(f"🟡 Dataset {dataset_id} not found. Creating dataset...")
         dataset = bigquery.Dataset(dataset_id)
+        dataset.location = "US"  # You can change the location if needed
         dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
+        print(f"✅ Created dataset {client.project}.{dataset.dataset_id}")
 def create_table_if_not_exists(client):
     table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
     try:
         client.get_table(table_id)  # Make an API request.
+        print(f"ℹ️ Table {table_id} already exists.")
     except NotFound:
+        print(f"🟡 Table {table_id} not found. Creating table...")
         schema = [
             bigquery.SchemaField("analysis_id", "STRING", mode="REQUIRED"),
             bigquery.SchemaField("timestamp", "TIMESTAMP", mode="REQUIRED"),
         ]
         table = bigquery.Table(table_id, schema=schema)
         table = client.create_table(table)  # Make an API request.
+        print(f"✅ Created table {table.project}.{table.dataset_id}.{table.table_id}")
 def upload_diagnosis_to_bigquery(diagnosis_data: dict):
+    """Uploads a single diagnosis record (from a dictionary) to BigQuery."""
     client = get_bigquery_client()
     if client is None:
+        print("❌ BigQuery client not available. Cannot upload diagnosis.")
         return "BigQuery client not available."
     create_dataset_if_not_exists(client)
     table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
     if "analysis_id" not in diagnosis_data:
         diagnosis_data["analysis_id"] = str(uuid.uuid4())
     if "timestamp" not in diagnosis_data:
     rows_to_insert = [diagnosis_data]
     errors = client.insert_rows_json(table_id, rows_to_insert)
+    if not errors:
+        print(f"✅ Diagnosis record {diagnosis_data.get('analysis_id')} uploaded successfully.")
         return "Diagnosis uploaded successfully."
     else:
+        print(f"❌ Encountered errors while inserting diagnosis record: {errors}")
+        return f"Error uploading diagnosis: {errors}"
+def upload_csv_to_bigquery(csv_file_path: str):
+    """
+    Uploads the contents of a CSV file to the specified BigQuery table.
+    Args:
+        csv_file_path (str): The local path to the CSV file.
+    """
+    client = get_bigquery_client()
+    if client is None:
+        print("❌ BigQuery client not available. Cannot upload CSV.")
+        return
+    create_dataset_if_not_exists(client)
+    create_table_if_not_exists(client)
+    table_id = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"
+    # Configure the load job
+    job_config = bigquery.LoadJobConfig(
+        source_format=bigquery.SourceFormat.CSV,
+        skip_leading_rows=1,  # Skip the header row
+        # We REMOVE autodetect=True. The job will now use the table's existing schema.
+        write_disposition=bigquery.WriteDisposition.WRITE_APPEND,
+    )
+    print(f"🚀 Starting CSV upload from '{csv_file_path}' to table '{table_id}'...")
+    try:
+        with open(csv_file_path, "rb") as source_file:
+            load_job = client.load_table_from_file(source_file, table_id, job_config=job_config)
+        load_job.result()  # Wait for the job to complete
+        destination_table = client.get_table(table_id)
+        # To get the number of rows uploaded in this job, we look at the job's output statistics
+        rows_uploaded = load_job.output_rows
+        print(f"✅ Job finished. Loaded {rows_uploaded} new rows. The table '{table_id}' now has a total of {destination_table.num_rows} rows.")
+        return "CSV upload successful."
+    except Exception as e:
+        print(f"❌ An error occurred during the CSV upload: {e}")
+        return f"Error during CSV upload: {e}"
+if __name__ == "__main__":
+    csv_file_to_upload = "farm_analysis_data.csv"
+    print("--- Running BigQuery CSV Uploader Test ---")
+    upload_csv_to_bigquery(csv_file_to_upload)
+    print("--- Test complete ---")