import config from api_wrappers import hf_data_loader from generation_steps import synthetic_end_to_start, examples, synthetic_start_to_end def run(): df = hf_data_loader.load_processed_rewriting_dataset_as_pandas() print(f"NUMBER OF EXAMPLES PER PROMPT = {examples.N_EXAMPLES}") print() print(f"End -> start synthesis:") print(f"GENERATION_MULTIPLIER = {synthetic_end_to_start.GENERATION_MULTIPLIER}") print(f"REL_INSERTIONS_THRESHOLD = {synthetic_end_to_start.REL_INSERTIONS_THRESHOLD}") print(f"GENERATION_ATTEMPTS = {synthetic_end_to_start.GENERATION_ATTEMPTS}") df = synthetic_end_to_start.transform(df) print("Done") print(f"Start -> send synthesis:") print(f"GENERATION_MULTIPLIER = {synthetic_start_to_end.GENERATION_MULTIPLIER}") print(f"REL_DELETIONS_THRESHOLD = {synthetic_start_to_end.REL_DELETIONS_THRESHOLD}") print(f"GENERATION_ATTEMPTS = {synthetic_start_to_end.GENERATION_ATTEMPTS}") df = synthetic_start_to_end.transform(df) print("Done") df.to_csv(config.SYNTHETIC_DATASET_ARTIFACT) if __name__ == '__main__': run()