image-eval / utils /data_handling.py
VOIDER's picture
Upload 11 files
f89e218 verified
"""
Utility functions for data handling and export.
"""
import os
import json
import csv
import pandas as pd
from datetime import datetime
def save_json(data, file_path):
"""
Save data to a JSON file.
Args:
data: data to save
file_path: path to the output file
Returns:
bool: True if successful, False otherwise
"""
try:
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
return True
except Exception as e:
print(f"Error saving JSON: {e}")
return False
def load_json(file_path):
"""
Load data from a JSON file.
Args:
file_path: path to the JSON file
Returns:
dict: loaded data, or None if an error occurred
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception as e:
print(f"Error loading JSON: {e}")
return None
def save_csv(data, file_path, headers=None):
"""
Save data to a CSV file.
Args:
data: list of dictionaries or list of lists
file_path: path to the output file
headers: optional list of column headers
Returns:
bool: True if successful, False otherwise
"""
try:
if isinstance(data, list) and len(data) > 0:
if isinstance(data[0], dict):
# List of dictionaries
if headers is None:
headers = list(data[0].keys())
with open(file_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=headers)
writer.writeheader()
writer.writerows(data)
else:
# List of lists
with open(file_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
if headers:
writer.writerow(headers)
writer.writerows(data)
return True
except Exception as e:
print(f"Error saving CSV: {e}")
return False
def dataframe_to_formats(df, base_path, formats=None):
"""
Export a pandas DataFrame to multiple formats.
Args:
df: pandas DataFrame
base_path: base path for output files (without extension)
formats: list of formats to export to ('csv', 'excel', 'html', 'json')
Returns:
dict: dictionary with format names as keys and file paths as values
"""
if formats is None:
formats = ['csv', 'excel', 'html']
result = {}
try:
for fmt in formats:
if fmt == 'csv':
file_path = f"{base_path}.csv"
df.to_csv(file_path)
result['csv'] = file_path
elif fmt == 'excel':
file_path = f"{base_path}.xlsx"
df.to_excel(file_path)
result['excel'] = file_path
elif fmt == 'html':
file_path = f"{base_path}.html"
df.to_html(file_path)
result['html'] = file_path
elif fmt == 'json':
file_path = f"{base_path}.json"
df.to_json(file_path, orient='records', indent=2)
result['json'] = file_path
except Exception as e:
print(f"Error exporting DataFrame: {e}")
return result
def generate_timestamp():
"""
Generate a timestamp string for file naming.
Returns:
str: timestamp string
"""
return datetime.now().strftime("%Y%m%d_%H%M%S")
def create_results_filename(prefix="evaluation", extension=""):
"""
Create a filename for results with timestamp.
Args:
prefix: prefix for the filename
extension: file extension (with or without dot)
Returns:
str: filename with timestamp
"""
timestamp = generate_timestamp()
if extension:
if not extension.startswith('.'):
extension = f".{extension}"
return f"{prefix}_{timestamp}{extension}"
else:
return f"{prefix}_{timestamp}"