salverz's picture
Update file structure
4b3616f
import pandas as pd
from pathlib import Path
import json
from typing import List
def convert_json_to_df(json_data: str) -> pd.DataFrame:
"""
Convert a JSON string into a pandas DataFrame.
Automatically extracts the first top-level list if present.
"""
data = json.loads(json_data)
# Try to extract the list of transactions if it's wrapped
list_name = None
for key, value in data.items():
if isinstance(value, list):
list_name = key
break
if list_name:
data = data[list_name]
return pd.DataFrame(data)
def combine_json_data_into_df(json_data_objects: List[str]) -> pd.DataFrame:
json_dfs = list()
for json_object in json_data_objects:
json_dfs.append(convert_json_to_df(json_object))
return pd.concat(json_dfs)
def export_as_csv(df: pd.DataFrame, output_folder: str, output_file_name: str) -> str:
"""
Save a DataFrame as a CSV file, avoiding overwriting by incrementing filenames.
"""
output_folder_path = Path(output_folder)
if not output_folder_path.is_dir():
print(f"Creating path {output_folder}")
output_folder_path.mkdir(parents=True)
file_index = 0
while True:
full_output_path = output_folder_path / f"{output_file_name}{file_index}.csv"
if not full_output_path.exists():
break
file_index += 1
df.to_csv(full_output_path, index=False)
print(f"Saved CSV to {full_output_path}")
return df.to_csv(path_or_buf=None, index=False)
def export_as_json(df: pd.DataFrame, output_folder: str, output_file_name: str) -> str:
"""
Save raw JSON string to a file, avoiding overwriting by incrementing filenames.
"""
output_folder_path = Path(output_folder)
if not output_folder_path.is_dir():
print(f"Creating path {output_folder}")
output_folder_path.mkdir(parents=True)
file_index = 0
while True:
full_output_path = output_folder_path / f"{output_file_name}{file_index}.json"
if not full_output_path.exists():
break
file_index += 1
df.to_json(full_output_path, orient='records')
print(f"Saved JSON to {full_output_path}")
return df.to_json(orient='records') or ""