Alt_LLM_LeaderBoard / get-hf-merge-configs.py
CultriX's picture
Create get-hf-merge-configs.py
76d602e verified
import re
from huggingface_hub import ModelCard
import pandas as pd
# Load the CSV data
df = pd.read_csv('/tmp/models.csv')
# Sort the data by the second column (assuming the column name is 'Average')
df_sorted = df.sort_values(by='Average', ascending=False)
# Open the file in append mode
with open('configurations.txt', 'a') as file:
# Get model cards for the top 20 entries
for index, row in df_sorted.head(20).iterrows():
model_name = row['Model'].rstrip()
card = ModelCard.load(model_name)
file.write(f'Model Name: {model_name}\n')
file.write(f'Scores: {row["Average"]}\n') # Assuming 'Average' is the benchmark score
file.write(f'AGIEval: {row["AGIEval"]}\n')
file.write(f'GPT4All: {row["GPT4All"]}\n')
file.write(f'TruthfulQA: {row["TruthfulQA"]}\n')
file.write(f'Bigbench: {row["Bigbench"]}\n')
file.write(f'Model Card: {card}\n')
# Open the second file in read mode
with open('configurations.txt', 'r') as file:
# Read the content
content = file.read()
# Find all text between 'yaml' and '```'
matches = re.findall(r'yaml(.*?)```', content, re.DOTALL)
# Open the file 'configurations2.txt' in write mode
with open('configurations2.txt', 'w') as file:
# Write the matches to the file
for row, match in zip(df_sorted[['Model', 'Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']].head(20).values, matches):
file.write(f'Model Name: {row[0]}\n')
file.write(f'Scores: {row[1]}\n')
file.write(f'AGIEval: {row[2]}\n')
file.write(f'GPT4All: {row[3]}\n')
file.write(f'TruthfulQA: {row[4]}\n')
file.write(f'Bigbench: {row[5]}\n')
file.write('yaml' + match + '```\n')