import re from huggingface_hub import ModelCard import pandas as pd # Load the CSV data df = pd.read_csv('/tmp/models.csv') # Sort the data by the second column (assuming the column name is 'Average') df_sorted = df.sort_values(by='Average', ascending=False) # Open the file in append mode with open('configurations.txt', 'a') as file: # Get model cards for the top 20 entries for index, row in df_sorted.head(20).iterrows(): model_name = row['Model'].rstrip() card = ModelCard.load(model_name) file.write(f'Model Name: {model_name}\n') file.write(f'Scores: {row["Average"]}\n') # Assuming 'Average' is the benchmark score file.write(f'AGIEval: {row["AGIEval"]}\n') file.write(f'GPT4All: {row["GPT4All"]}\n') file.write(f'TruthfulQA: {row["TruthfulQA"]}\n') file.write(f'Bigbench: {row["Bigbench"]}\n') file.write(f'Model Card: {card}\n') # Open the second file in read mode with open('configurations.txt', 'r') as file: # Read the content content = file.read() # Find all text between 'yaml' and '```' matches = re.findall(r'yaml(.*?)```', content, re.DOTALL) # Open the file 'configurations2.txt' in write mode with open('configurations2.txt', 'w') as file: # Write the matches to the file for row, match in zip(df_sorted[['Model', 'Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']].head(20).values, matches): file.write(f'Model Name: {row[0]}\n') file.write(f'Scores: {row[1]}\n') file.write(f'AGIEval: {row[2]}\n') file.write(f'GPT4All: {row[3]}\n') file.write(f'TruthfulQA: {row[4]}\n') file.write(f'Bigbench: {row[5]}\n') file.write('yaml' + match + '```\n')