import pandas as pd filename = 'logs/mamba_lc0lvl0_50M/anneal/ckpt_85965050g_30731830229t_pt_vs_lc0_lvl0_sweep.csv' #filename = 'logs/11M/ckpt_1188012b_pt_vs_lc0_sweep.csv' #filename = 'logs/11M/Round 1/ckpt_2608480_pt_vs_lc0_sweep.csv' # Read in the CSV file df = pd.read_csv(filename) # Count the original total original_total = df.shape[0] # Filter out duplicates in the 'transcript' column df = df.drop_duplicates(subset='transcript') # Count the remove, and remaining rows removed = original_total - df.shape[0] remaining = df.shape[0] # Print out the results print("Original total rows:", original_total) print("Removed rows:", removed) print("Remaining rows:", remaining) # Write the filtered data to a new CSV file df.to_csv(filename, index=False)