import re
from huggingface_hub import ModelCard
import pandas as pd

# Load the CSV data
df = pd.read_csv('/tmp/models.csv')

# Sort the data by the second column (assuming the column name is 'Average')
df_sorted = df.sort_values(by='Average', ascending=False)

# Open the file in append mode
with open('configurations.txt', 'a') as file:
    # Get model cards for the top 20 entries
    for index, row in df_sorted.head(20).iterrows():
        model_name = row['Model'].rstrip()
        card = ModelCard.load(model_name)
        file.write(f'Model Name: {model_name}\n')
        file.write(f'Scores: {row["Average"]}\n')  # Assuming 'Average' is the benchmark score
        file.write(f'AGIEval: {row["AGIEval"]}\n')
        file.write(f'GPT4All: {row["GPT4All"]}\n')
        file.write(f'TruthfulQA: {row["TruthfulQA"]}\n')
        file.write(f'Bigbench: {row["Bigbench"]}\n')
        file.write(f'Model Card: {card}\n')

# Open the second file in read mode
with open('configurations.txt', 'r') as file:
    # Read the content
    content = file.read()

    # Find all text between 'yaml' and '```'
    matches = re.findall(r'yaml(.*?)```', content, re.DOTALL)

# Open the file 'configurations2.txt' in write mode
with open('configurations2.txt', 'w') as file:
    # Write the matches to the file
    for row, match in zip(df_sorted[['Model', 'Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']].head(20).values, matches):
        file.write(f'Model Name: {row[0]}\n')
        file.write(f'Scores: {row[1]}\n')
        file.write(f'AGIEval: {row[2]}\n')
        file.write(f'GPT4All: {row[3]}\n')
        file.write(f'TruthfulQA: {row[4]}\n')
        file.write(f'Bigbench: {row[5]}\n')
        file.write('yaml' + match + '```\n')