Spaces:
Sleeping
Sleeping
| import gspread | |
| import logging | |
| import numpy as np | |
| import os, sys | |
| import pandas as pd | |
| import requests | |
| import subprocess | |
| from oauth2client.service_account import ServiceAccountCredentials | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| def read_existing_data(file_path): | |
| """Reads existing data from a CSV file to compare with newly fetched data.""" | |
| if os.path.exists(file_path): | |
| logging.info("Existing data file found. Reading data...") | |
| e_df = pd.read_csv(file_path) | |
| logging.info(f"Data read successfully.") | |
| return e_df | |
| else: | |
| logging.info("No existing data file found.") | |
| return pd.DataFrame() | |
| def download_sheet(sheet_id, range_name): | |
| """Downloads data from Google Sheets and returns a DataFrame.""" | |
| try: | |
| # Define the scope | |
| scope = ['https://www.googleapis.com/auth/spreadsheets'] | |
| # Add credentials to the account | |
| creds = ServiceAccountCredentials.from_json_keyfile_name('omoku-analysis-cred_key.json', scope) | |
| # Authorize the clientsheet | |
| client = gspread.authorize(creds) | |
| # Get the instance of the Spreadsheet | |
| sheet = client.open_by_key(sheet_id) | |
| # Get the sheet by name | |
| worksheet = sheet.get_worksheet(0) | |
| # Get all records of the data | |
| data = worksheet.get_all_records() | |
| # Convert to DataFrame | |
| df = pd.DataFrame(data).astype(str) | |
| df.replace('', np.nan, inplace=True) | |
| df = df.astype({'Power_time': 'float', 'Outages':'float'}) | |
| logging.info(f"New data downloaded successfully.") | |
| return df | |
| except gspread.exceptions.APIError as e: | |
| logging.error(f"API error occurred: {e}") | |
| raise | |
| except requests.exceptions.RequestException as e: | |
| logging.error(f"Request failed: {e}") | |
| raise | |
| def save_to_csv(df, file_path): | |
| if df.empty: | |
| logging.error("No data to save: DataFrame is empty.") | |
| raise ValueError("Empty DataFrame") | |
| try: | |
| df.to_csv(file_path, index=False) | |
| logging.info(f"Data saved successfully.") | |
| except Exception as e: | |
| logging.error(f"Failed to save data: {e}") | |
| raise | |
| def git_commit_push(): | |
| """Sets git configurations, commits, and pushes updated CSV file to GitHub.""" | |
| try: | |
| subprocess.run(['git', 'config', '--global', 'user.name', 'github-actions'], check=True) | |
| subprocess.run(['git', 'config', '--global', 'user.email', 'github-actions@github.com'], check=True) | |
| subprocess.run(['git', 'add', 'omoku_data.csv'], check=True) | |
| subprocess.run(['git', 'commit', '-m', 'Update dataset'], check=True) | |
| subprocess.run(['git', 'push'], check=True) | |
| logging.info(f"Data updated successfully.") | |
| except subprocess.CalledProcessError as e: | |
| if "nothing to commit" in str(e.stderr): | |
| logging.info("No changes to commit.") | |
| else: | |
| logging.error(f"Failed to commit or push changes: {e}") | |
| raise | |
| if __name__ == "__main__": | |
| try: | |
| SHEET_ID = '1dVa6SGm1j-z20NUDUlWSJgQffXzvJZ_a33wT_O5EOUk' | |
| RANGE_NAME = 'data' | |
| FILE_PATH = 'omoku_data.csv' | |
| existing_data = read_existing_data(FILE_PATH) | |
| new_data = download_sheet(SHEET_ID, RANGE_NAME) | |
| if new_data.equals(existing_data): | |
| logging.info("No new entry for new data. Nothing to update.") | |
| sys.exit(0) | |
| else: | |
| save_to_csv(new_data, FILE_PATH) | |
| git_commit_push() | |
| except Exception as e: | |
| logging.critical(f"An unexpected error occurred: {e}") | |
| raise | |