Spaces:
Runtime error
Runtime error
| from datasets import load_dataset | |
| DATA_PATH = "Account_Balance-main/Account Balance Queries/Account _Balance.json" | |
| def dash_line(): | |
| print("-"*100) | |
| def transform_raw_data(file_path: str): | |
| dash_line() | |
| print("Loading Data") | |
| if file_path.endswith(".json") or file_path.endswith(".jsonl"): | |
| data = load_dataset("json", data_files=file_path, split="train") | |
| dash_line() | |
| print("Transforming Data") | |
| keyword_AI = "[|AI|]" | |
| keyword_user = "[|User|]" | |
| formatted_data = [] | |
| for feature in data: | |
| row = {} | |
| row["topic"] = feature['topic'] | |
| row["conversation"] = [] | |
| conversation = feature['input'].split("\n") | |
| for entries in conversation: | |
| if keyword_user in entries: | |
| row["conversation"].append( | |
| { | |
| "role": "user", | |
| "content": entries.replace(keyword_user, "").strip() | |
| } | |
| ) | |
| elif keyword_AI in entries: | |
| row["conversation"].append( | |
| { | |
| "role": "AI", | |
| "content": entries.replace(keyword_AI, "").strip() | |
| } | |
| ) | |
| formatted_data.append(row) | |
| dash_line() | |
| print("Data transformation completed!") | |
| dash_line() | |
| return formatted_data | |
| if __name__ == "__main__": | |
| transform_raw_data(DATA_PATH) | |