Spaces:
Running
Running
import pandas as pd | |
from pathlib import Path | |
import json | |
from typing import List | |
def convert_json_to_df(json_data: str) -> pd.DataFrame: | |
""" | |
Convert a JSON string into a pandas DataFrame. | |
Automatically extracts the first top-level list if present. | |
""" | |
data = json.loads(json_data) | |
# Try to extract the list of transactions if it's wrapped | |
list_name = None | |
for key, value in data.items(): | |
if isinstance(value, list): | |
list_name = key | |
break | |
if list_name: | |
data = data[list_name] | |
return pd.DataFrame(data) | |
def combine_json_data_into_df(json_data_objects: List[str]) -> pd.DataFrame: | |
json_dfs = list() | |
for json_object in json_data_objects: | |
json_dfs.append(convert_json_to_df(json_object)) | |
return pd.concat(json_dfs) | |
def export_as_csv(df: pd.DataFrame, output_folder: str, output_file_name: str) -> str: | |
""" | |
Save a DataFrame as a CSV file, avoiding overwriting by incrementing filenames. | |
""" | |
output_folder_path = Path(output_folder) | |
if not output_folder_path.is_dir(): | |
print(f"Creating path {output_folder}") | |
output_folder_path.mkdir(parents=True) | |
file_index = 0 | |
while True: | |
full_output_path = output_folder_path / f"{output_file_name}{file_index}.csv" | |
if not full_output_path.exists(): | |
break | |
file_index += 1 | |
df.to_csv(full_output_path, index=False) | |
print(f"Saved CSV to {full_output_path}") | |
return df.to_csv(path_or_buf=None, index=False) | |
def export_as_json(df: pd.DataFrame, output_folder: str, output_file_name: str) -> str: | |
""" | |
Save raw JSON string to a file, avoiding overwriting by incrementing filenames. | |
""" | |
output_folder_path = Path(output_folder) | |
if not output_folder_path.is_dir(): | |
print(f"Creating path {output_folder}") | |
output_folder_path.mkdir(parents=True) | |
file_index = 0 | |
while True: | |
full_output_path = output_folder_path / f"{output_file_name}{file_index}.json" | |
if not full_output_path.exists(): | |
break | |
file_index += 1 | |
df.to_json(full_output_path, orient='records') | |
print(f"Saved JSON to {full_output_path}") | |
return df.to_json(orient='records') or "" | |