Einstein-v6-7B / data /remove_empty_output.py
Weyaxi's picture
adding the exact data used to train this model
2fb8b40 verified
raw
history blame
284 Bytes
import json
with open('merged_all.json', 'r') as file:
data = json.load(file)
print(f"Normal len: {len(data)}")
data = [row for row in data if row["output"] != ""]
print(f"After len: {len(data)}")
with open('merged_all.json', 'w') as file:
json.dump(data, file, indent=1)