Spaces:
Running
Running
File size: 2,257 Bytes
3f7e152 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import pandas as pd
from pathlib import Path
import json
from typing import List
def convert_json_to_df(json_data: str) -> pd.DataFrame:
"""
Convert a JSON string into a pandas DataFrame.
Automatically extracts the first top-level list if present.
"""
data = json.loads(json_data)
# Try to extract the list of transactions if it's wrapped
list_name = None
for key, value in data.items():
if isinstance(value, list):
list_name = key
break
if list_name:
data = data[list_name]
return pd.DataFrame(data)
def combine_json_data_into_df(json_data_objects: List[str]) -> pd.DataFrame:
json_dfs = list()
for json_object in json_data_objects:
json_dfs.append(convert_json_to_df(json_object))
return pd.concat(json_dfs)
def export_as_csv(df: pd.DataFrame, output_folder: str, output_file_name: str) -> str:
"""
Save a DataFrame as a CSV file, avoiding overwriting by incrementing filenames.
"""
output_folder_path = Path(output_folder)
if not output_folder_path.is_dir():
print(f"Creating path {output_folder}")
output_folder_path.mkdir(parents=True)
file_index = 0
while True:
full_output_path = output_folder_path / f"{output_file_name}{file_index}.csv"
if not full_output_path.exists():
break
file_index += 1
df.to_csv(full_output_path, index=False)
print(f"Saved CSV to {full_output_path}")
return df.to_csv(path_or_buf=None, index=False)
def export_as_json(df: pd.DataFrame, output_folder: str, output_file_name: str) -> str:
"""
Save raw JSON string to a file, avoiding overwriting by incrementing filenames.
"""
output_folder_path = Path(output_folder)
if not output_folder_path.is_dir():
print(f"Creating path {output_folder}")
output_folder_path.mkdir(parents=True)
file_index = 0
while True:
full_output_path = output_folder_path / f"{output_file_name}{file_index}.json"
if not full_output_path.exists():
break
file_index += 1
df.to_json(full_output_path, orient='records')
print(f"Saved JSON to {full_output_path}")
return df.to_json(orient='records') or ""
|