CultriX commited on
Commit
fc68f79
1 Parent(s): 8a0dd37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -58
app.py CHANGED
@@ -11,6 +11,18 @@ from yall import create_yall
11
  from functools import cache
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Function to get model info from Hugging Face API using caching
16
  @cache
@@ -20,11 +32,20 @@ def cached_model_info(api, model):
20
  except (RepositoryNotFoundError, RevisionNotFoundError):
21
  return None
22
 
23
- # Function to get model info from DataFrame and update it with likes and tags
24
- @st.cache
25
- def get_model_info(df):
26
- api = HfApi()
 
 
 
 
 
 
27
 
 
 
 
28
  for index, row in df.iterrows():
29
  model_info = cached_model_info(api, row['Model'].strip())
30
  if model_info:
@@ -35,57 +56,39 @@ def get_model_info(df):
35
  df.loc[index, 'Tags'] = ''
36
  return df
37
 
38
- # Function to convert markdown table to DataFrame and extract Hugging Face URLs
39
- def convert_markdown_table_to_dataframe(md_content):
40
- """
41
- Converts markdown table to Pandas DataFrame, handling special characters and links,
42
- extracts Hugging Face URLs, and adds them to a new column.
43
- """
44
- # Remove leading and trailing | characters
45
- cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
46
-
47
- # Create DataFrame from cleaned content
48
- df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
49
-
50
- # Remove the first row after the header
51
- df = df.drop(0, axis=0)
52
-
53
- # Strip whitespace from column names
54
- df.columns = df.columns.str.strip()
55
 
56
- # Extract Hugging Face URLs and add them to a new column
57
- model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
58
- df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
 
 
 
 
 
 
 
 
59
 
60
- # Clean Model column to have only the model link text
61
- df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
 
 
 
 
 
 
 
 
62
 
63
- return df
64
- # Function to get model info from Hugging Face API using caching
65
- @cache
66
- def cached_model_info(api, model):
67
- try:
68
- return api.model_info(repo_id=str(model))
69
- except (RepositoryNotFoundError, RevisionNotFoundError):
70
- return None
71
 
72
- # Function to convert markdown table to DataFrame and extract Hugging Face URLs
73
- def convert_markdown_table_to_dataframe(md_content):
74
- cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
75
- df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
76
- df = df.drop(0, axis=0)
77
- df.columns = df.columns.str.strip()
78
- model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
79
- df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
80
- df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
81
- return df
82
 
83
  # Function to get model info from DataFrame and update it with likes and tags
84
  @st.cache
85
  def get_model_info(df):
86
  api = HfApi()
87
- df['Likes'] = None
88
- df['Tags'] = None
89
  for index, row in df.iterrows():
90
  model_info = cached_model_info(api, row['Model'].strip())
91
  if model_info:
@@ -96,8 +99,13 @@ def get_model_info(df):
96
  df.loc[index, 'Tags'] = ''
97
  return df
98
 
99
- # Define the score columns (global for use in calculations)
100
- score_columns = ['Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']
 
 
 
 
 
101
 
102
  # Function to calculate the highest combined score for a given column
103
  def calculate_highest_combined_score(data, column):
@@ -111,16 +119,6 @@ def calculate_highest_combined_score(data, column):
111
  top_combinations[r] = sorted(top_combinations[r], key=lambda x: x[0], reverse=True)[:3]
112
  return column, top_combinations
113
 
114
- # Function to display the results of the highest combined scores
115
- def display_highest_combined_scores(data):
116
- with st.spinner('Calculating highest combined scores...'):
117
- results = [calculate_highest_combined_score(data, col) for col in score_columns]
118
- for column, top_combinations in results:
119
- st.subheader(f"Top Combinations for {column}")
120
- for r, combinations in top_combinations.items():
121
- st.write(f"**Number of Models: {r}**")
122
- for score, combination in combinations:
123
- st.write(f"Score: {score}, Models: {', '.join(combination)}")
124
 
125
  # Function to create and display charts (existing functions can be reused or modified as needed)
126
 
 
11
  from functools import cache
12
 
13
 
14
+ # Importing necessary libraries
15
+ import streamlit as st
16
+ import pandas as pd
17
+ from io import StringIO
18
+ import plotly.graph_objs as go
19
+ from huggingface_hub import HfApi
20
+ from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
21
+ from itertools import combinations
22
+ import time
23
+ from collections import Counter
24
+ import re
25
+ from functools import cache
26
 
27
  # Function to get model info from Hugging Face API using caching
28
  @cache
 
32
  except (RepositoryNotFoundError, RevisionNotFoundError):
33
  return None
34
 
35
+ # Function to convert markdown table to DataFrame and extract Hugging Face URLs
36
+ def convert_markdown_table_to_dataframe(md_content):
37
+ cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
38
+ df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
39
+ df = df.drop(0, axis=0)
40
+ df.columns = df.columns.str.strip()
41
+ model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
42
+ df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
43
+ df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
44
+ return df
45
 
46
+ # Function to get and update model info in the DataFrame
47
+ def get_and_update_model_info(df):
48
+ api = HfApi()
49
  for index, row in df.iterrows():
50
  model_info = cached_model_info(api, row['Model'].strip())
51
  if model_info:
 
56
  df.loc[index, 'Tags'] = ''
57
  return df
58
 
59
+ # Define the score columns
60
+ score_columns = ['Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ # Function to calculate the highest combined score for a given column
63
+ def calculate_highest_combined_score(data, column):
64
+ scores = data[column].dropna().tolist() # Ensure to drop NaN values to avoid calculation errors
65
+ models = data['Model'].dropna().tolist()
66
+ top_combinations = {2: [], 3: [], 4: [], 5: [], 6: []}
67
+ for r in range(2, 7):
68
+ for combination in combinations(zip(scores, models), r):
69
+ combined_score = sum(score for score, _ in combination)
70
+ top_combinations[r].append((combined_score, tuple(model for _, model in combination)))
71
+ top_combinations[r] = sorted(top_combinations[r], key=lambda x: x[0], reverse=True)[:3]
72
+ return column, top_combinations
73
 
74
+ # Function to display the results of the highest combined scores
75
+ def display_highest_combined_scores(data):
76
+ for column in score_columns:
77
+ if column in data:
78
+ _, top_combinations = calculate_highest_combined_score(data, column)
79
+ st.subheader(f"Top Combinations for {column}")
80
+ for r, combinations in top_combinations.items():
81
+ st.write(f"**Number of Models: {r}**")
82
+ for score, combination in combinations:
83
+ st.write(f"Score: {score}, Models: {', '.join(combination)}")
84
 
 
 
 
 
 
 
 
 
85
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  # Function to get model info from DataFrame and update it with likes and tags
88
  @st.cache
89
  def get_model_info(df):
90
  api = HfApi()
91
+
 
92
  for index, row in df.iterrows():
93
  model_info = cached_model_info(api, row['Model'].strip())
94
  if model_info:
 
99
  df.loc[index, 'Tags'] = ''
100
  return df
101
 
102
+ # Function to get model info from Hugging Face API using caching
103
+ @cache
104
+ def cached_model_info(api, model):
105
+ try:
106
+ return api.model_info(repo_id=str(model))
107
+ except (RepositoryNotFoundError, RevisionNotFoundError):
108
+ return None
109
 
110
  # Function to calculate the highest combined score for a given column
111
  def calculate_highest_combined_score(data, column):
 
119
  top_combinations[r] = sorted(top_combinations[r], key=lambda x: x[0], reverse=True)[:3]
120
  return column, top_combinations
121
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  # Function to create and display charts (existing functions can be reused or modified as needed)
124