import os import glob import argparse from code_efficiency_calculator import run_model_task def calculate_memory_usage(dat_file_path): with open(dat_file_path, 'r') as file: prev_time = 0 prev_mem_mb = 0 mem_time_mb_s = 0 next(file) for line in file: if "__main__." in line: continue parts = line.split() mem_in_mb = float(parts[1]) timestamp = float(parts[2]) if prev_time > 0: time_interval_s = timestamp - prev_time mem_time_mb_s += (prev_mem_mb + mem_in_mb) / 2 * time_interval_s prev_time = timestamp prev_mem_mb = mem_in_mb return mem_time_mb_s def calculate_runtime(dat_file_path): with open(dat_file_path, 'r') as file: start_time = float("inf") end_time = float("-inf") next(file) for line in file: if "__main__." in line: continue parts = line.split() timestamp = float(parts[2]) start_time = min(start_time, timestamp) end_time = max(end_time, timestamp) return max(end_time - start_time,0) def report_max_memory_usage(dat_file_path): max_memory_usage = 0 with open(dat_file_path, 'r') as file: next(file) for line in file: if "__main__." in line: continue parts = line.split() mem_in_mb = float(parts[1]) max_memory_usage = max(max_memory_usage, mem_in_mb) return max_memory_usage def report_results(task, model, file): run_model_task(task, model, file) dat_directory = f"./results/{task}_{model}" canonical_solution_directory = f"./results/{task}_canonical_solution" canonical_solution_memory_usage = {} canonical_solution_execution_time = {} canonical_solution_max_memory_usage = {} for dat_file in glob.glob(os.path.join(canonical_solution_directory, "*.dat")): try: problem_idx = os.path.basename(dat_file).split('.')[0] canonical_solution_memory_usage[int(problem_idx)] = calculate_memory_usage(dat_file) canonical_solution_execution_time[int(problem_idx)] = calculate_runtime(dat_file) canonical_solution_max_memory_usage[int(problem_idx)] = report_max_memory_usage(dat_file) except: pass global_result = {} completion_memory_usage = {} execution_time = {} max_memory_usage = {} task_idx = {} for dat_file in glob.glob(os.path.join(dat_directory, "*.dat")): try: problem_idx = os.path.basename(dat_file).split('.')[0] execution_time_result = calculate_runtime(dat_file) completion_memory_usage[int(problem_idx)] = calculate_memory_usage(dat_file) execution_time[int(problem_idx)] = calculate_runtime(dat_file) max_memory_usage[int(problem_idx)] = report_max_memory_usage(dat_file) task_idx[int(problem_idx)] = dat_file except Exception as e: print(dat_file) global_result[model] = {"completion_memory_usage":completion_memory_usage,"execution_time":execution_time,"max_memory_usage":max_memory_usage,"task_idx":task_idx} save_results = [] max_net_lists = {} max_nmu_lists = {} max_ntmu_lists = {} for model in global_result.keys(): completion_memory_usage = global_result[model]["completion_memory_usage"] execution_time = global_result[model]["execution_time"] max_memory_usage = global_result[model]["max_memory_usage"] # report execution time total_execution_time = 0 # report normalized execution time normalized_execution_time = 0 # report max memory usage total_max_memory_usage = 0 # report normalized max memory usage normalized_max_memory_usage = 0 # report memory usage total_memory_usage = 0 total_canonical_solution_max_memory_usage = 0 total_canonical_solution_execution_time = 0 total_canonical_solution_memory_usage = 0 # report normalized memory usage normalized_memory_usage = 0 total_codes = 0 normalized_execution_time_list = [] normalized_max_memory_usage_list = [] normalized_memory_usage_list = [] total_fast = 0 total_95 = 0 total_97=0 total_99=0 total_100=0 total_101=0 total_1000=0 total_500=0 category_tmp = {} total_10000=0 max_net = float("-inf") max_nmu = float("-inf") max_tmu = float("-inf") total_500_net = 0 total_500_nmu = 0 total_500_tmu = 0 # print(len(completion_memory_usage)) for idx in completion_memory_usage.keys(): if idx not in canonical_solution_memory_usage.keys(): continue total_memory_usage += completion_memory_usage[idx] total_execution_time += execution_time[idx] total_max_memory_usage += max_memory_usage[idx] total_canonical_solution_max_memory_usage+=canonical_solution_max_memory_usage[idx] total_canonical_solution_memory_usage+=canonical_solution_memory_usage[idx] total_canonical_solution_execution_time+=canonical_solution_execution_time[idx] if execution_time[idx]/canonical_solution_execution_time[idx]>5: total_500_net+=1 if max_net5: total_500_nmu+=1 if max_nmu5: total_500_tmu+=1 net = execution_time[idx] / canonical_solution_execution_time[idx] nmu = completion_memory_usage[idx] / canonical_solution_memory_usage[idx] ntmu = max_memory_usage[idx] / canonical_solution_max_memory_usage[idx] normalized_memory_usage += completion_memory_usage[idx]/canonical_solution_memory_usage[idx] normalized_memory_usage_list.append(completion_memory_usage[idx]/canonical_solution_memory_usage[idx]) if len(max_net_lists) < 10 or net > min(max_net_lists.keys()): if len(max_net_lists) >= 10: min_key = min(max_net_lists.keys()) del max_net_lists[min_key] max_net_lists[net] = (model, idx) if len(max_nmu_lists) < 10 or nmu > min(max_nmu_lists.keys()): if len(max_nmu_lists) >= 10: min_key = min(max_nmu_lists.keys()) del max_nmu_lists[min_key] max_nmu_lists[nmu] = (model, idx) if len(max_ntmu_lists) < 10 or ntmu > min(max_ntmu_lists.keys()): if len(max_ntmu_lists) >= 10: min_key = min(max_ntmu_lists.keys()) del max_ntmu_lists[min_key] max_ntmu_lists[ntmu] = (model, idx) max_tmu = max(max_tmu,completion_memory_usage[idx]/canonical_solution_memory_usage[idx]) total_codes+=1 if len(normalized_execution_time_list)==0: print(model) continue normalized_execution_time = normalized_execution_time/len(normalized_execution_time_list) normalized_max_memory_usage = normalized_max_memory_usage/len(normalized_execution_time_list) normalized_memory_usage = normalized_memory_usage/len(normalized_execution_time_list) total_execution_time = total_execution_time/len(normalized_execution_time_list) total_memory_usage = total_memory_usage/len(normalized_execution_time_list) total_max_memory_usage = total_max_memory_usage/len(normalized_execution_time_list) pass1 = len(completion_memory_usage)/1000*100 total_500_net = total_500_net/len(normalized_execution_time_list)*100 total_500_nmu = total_500_nmu/len(normalized_execution_time_list)*100 total_500_tmu = total_500_tmu/len(normalized_execution_time_list)*100 return f"{model}&{total_execution_time:.2f}&{normalized_execution_time:.2f}&{max_net:.2f}&{total_500_net:.1f}&{total_max_memory_usage:.2f}&{normalized_max_memory_usage:.2f}&{max_nmu:.2f}&{total_500_nmu:.1f}&{total_memory_usage:.2f}&{normalized_memory_usage:.2f}&{max_tmu:.2f}&{total_500_tmu:.1f}&{pass1:.1f}\\\\" if __name__ == "__main__": parse = argparse.ArgumentParser() parse.add_argument("--task", type=str, default="EffiBench") parse.add_argument("--model", type=str, default="gpt-4") parse.add_argument("--file", type=str, default="") args = parse.parse_args() if not args.file: args.file = f"./{args.task}_{args.model}.json" report_results(args.task,args.model, args.file)