CultriX commited on
Commit
b005e3f
1 Parent(s): 0391a70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -16
app.py CHANGED
@@ -8,6 +8,7 @@ from functools import cache
8
  from io import StringIO
9
  from yall import create_yall
10
 
 
11
  @cache
12
  def cached_model_info(api, model):
13
  try:
@@ -15,41 +16,73 @@ def cached_model_info(api, model):
15
  except (RepositoryNotFoundError, RevisionNotFoundError):
16
  return None
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def convert_markdown_table_to_dataframe(md_content):
 
 
 
 
 
19
  cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
 
 
20
  df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
21
- df.columns = [col.strip() for col in df.columns]
22
 
23
- # Initialize empty columns for URL and Model if not already present
24
- if 'Model' not in df.columns:
25
- df['Model'] = None
26
- if 'URL' not in df.columns:
27
- df['URL'] = None
28
 
29
- model_link_pattern = r'\[(.*?)\]\((.*?)\)'
30
- for index, row in df.iterrows():
31
- match = re.search(model_link_pattern, row['Model'])
32
- if match:
33
- df.at[index, 'Model'] = match.group(1)
34
- df.at[index, 'URL'] = match.group(2)
 
 
 
35
 
36
  return df
37
 
38
- @st.cache
39
  def get_model_info(df):
40
  api = HfApi()
41
 
 
 
 
 
 
42
  for index, row in df.iterrows():
43
- model_info = cached_model_info(api, row['Model'].strip())
44
- if model_info:
 
45
  df.loc[index, 'Likes'] = model_info.likes
46
  df.loc[index, 'Tags'] = ', '.join(model_info.tags)
47
- else:
 
48
  df.loc[index, 'Likes'] = -1
49
  df.loc[index, 'Tags'] = ''
 
50
  return df
51
 
52
 
 
 
53
  def calculate_highest_combined_score(data, column):
54
  # Ensure the column exists and has numeric data
55
  if column not in data.columns or not pd.api.types.is_numeric_dtype(data[column]):
 
8
  from io import StringIO
9
  from yall import create_yall
10
 
11
+ # Function to get model info from Hugging Face API using caching
12
  @cache
13
  def cached_model_info(api, model):
14
  try:
 
16
  except (RepositoryNotFoundError, RevisionNotFoundError):
17
  return None
18
 
19
+ # Function to get model info from DataFrame and update it with likes and tags
20
+ @st.cache
21
+ def get_model_info(df):
22
+ api = HfApi()
23
+
24
+ for index, row in df.iterrows():
25
+ model_info = cached_model_info(api, row['Model'].strip())
26
+ if model_info:
27
+ df.loc[index, 'Likes'] = model_info.likes
28
+ df.loc[index, 'Tags'] = ', '.join(model_info.tags)
29
+ else:
30
+ df.loc[index, 'Likes'] = -1
31
+ df.loc[index, 'Tags'] = ''
32
+ return df
33
+
34
+ # Function to convert markdown table to DataFrame and extract Hugging Face URLs
35
  def convert_markdown_table_to_dataframe(md_content):
36
+ """
37
+ Converts markdown table to Pandas DataFrame, handling special characters and links,
38
+ extracts Hugging Face URLs, and adds them to a new column.
39
+ """
40
+ # Remove leading and trailing | characters
41
  cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
42
+
43
+ # Create DataFrame from cleaned content
44
  df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
 
45
 
46
+ # Remove the first row after the header
47
+ df = df.drop(0, axis=0)
 
 
 
48
 
49
+ # Strip whitespace from column names
50
+ df.columns = df.columns.str.strip()
51
+
52
+ # Extract Hugging Face URLs and add them to a new column
53
+ model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
54
+ df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
55
+
56
+ # Clean Model column to have only the model link text
57
+ df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
58
 
59
  return df
60
 
61
+ @st.cache_data
62
  def get_model_info(df):
63
  api = HfApi()
64
 
65
+ # Initialize new columns for likes and tags
66
+ df['Likes'] = None
67
+ df['Tags'] = None
68
+
69
+ # Iterate through DataFrame rows
70
  for index, row in df.iterrows():
71
+ model = row['Model'].strip()
72
+ try:
73
+ model_info = api.model_info(repo_id=str(model))
74
  df.loc[index, 'Likes'] = model_info.likes
75
  df.loc[index, 'Tags'] = ', '.join(model_info.tags)
76
+
77
+ except (RepositoryNotFoundError, RevisionNotFoundError):
78
  df.loc[index, 'Likes'] = -1
79
  df.loc[index, 'Tags'] = ''
80
+
81
  return df
82
 
83
 
84
+
85
+
86
  def calculate_highest_combined_score(data, column):
87
  # Ensure the column exists and has numeric data
88
  if column not in data.columns or not pd.api.types.is_numeric_dtype(data[column]):