yonikremer
commited on
Commit
•
da1451c
1
Parent(s):
3a9aacf
parameterized min number of likes and downloads
Browse files- supported_models.py +38 -11
supported_models.py
CHANGED
@@ -59,7 +59,12 @@ def get_page(page_index: int):
|
|
59 |
return None
|
60 |
|
61 |
|
62 |
-
def card_filter(
|
|
|
|
|
|
|
|
|
|
|
63 |
if model_name in BLACKLISTED_MODEL_NAMES:
|
64 |
return False
|
65 |
numeric_contents = get_numeric_contents(model_card)
|
@@ -68,33 +73,55 @@ def card_filter(model_card: Tag, model_name: str) -> bool:
|
|
68 |
# It means that he doesn't have any downloads/likes, so it's not a valid model card.
|
69 |
return False
|
70 |
number_of_downloads = convert_to_int(numeric_contents[0])
|
71 |
-
if number_of_downloads <
|
72 |
return False
|
73 |
number_of_likes = convert_to_int(numeric_contents[1])
|
74 |
-
if number_of_likes <
|
75 |
return False
|
76 |
return True
|
77 |
|
78 |
|
79 |
-
def get_model_names(
|
|
|
|
|
|
|
|
|
80 |
model_cards: List[Tag] = soup.find_all("article", class_="overview-card-wrapper group", recursive=True)
|
81 |
for model_card in model_cards:
|
82 |
model_name = get_model_name(model_card)
|
83 |
-
if card_filter(
|
|
|
|
|
|
|
|
|
|
|
84 |
yield model_name
|
85 |
|
86 |
|
87 |
-
def generate_supported_model_names(
|
|
|
|
|
|
|
88 |
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
89 |
future_to_index = {executor.submit(get_page, index): index for index in range(100)}
|
90 |
for future in as_completed(future_to_index):
|
91 |
soup = future.result()
|
92 |
if soup:
|
93 |
-
yield from get_model_names(
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
|
100 |
if __name__ == "__main__":
|
|
|
59 |
return None
|
60 |
|
61 |
|
62 |
+
def card_filter(
|
63 |
+
model_card: Tag,
|
64 |
+
model_name: str,
|
65 |
+
min_number_of_downloads: int,
|
66 |
+
min_number_of_likes: int,
|
67 |
+
) -> bool:
|
68 |
if model_name in BLACKLISTED_MODEL_NAMES:
|
69 |
return False
|
70 |
numeric_contents = get_numeric_contents(model_card)
|
|
|
73 |
# It means that he doesn't have any downloads/likes, so it's not a valid model card.
|
74 |
return False
|
75 |
number_of_downloads = convert_to_int(numeric_contents[0])
|
76 |
+
if number_of_downloads < min_number_of_downloads:
|
77 |
return False
|
78 |
number_of_likes = convert_to_int(numeric_contents[1])
|
79 |
+
if number_of_likes < min_number_of_likes:
|
80 |
return False
|
81 |
return True
|
82 |
|
83 |
|
84 |
+
def get_model_names(
|
85 |
+
soup: BeautifulSoup,
|
86 |
+
min_number_of_downloads: int,
|
87 |
+
min_number_of_likes: int,
|
88 |
+
) -> Generator[str, None, None]:
|
89 |
model_cards: List[Tag] = soup.find_all("article", class_="overview-card-wrapper group", recursive=True)
|
90 |
for model_card in model_cards:
|
91 |
model_name = get_model_name(model_card)
|
92 |
+
if card_filter(
|
93 |
+
model_card=model_card,
|
94 |
+
model_name=model_name,
|
95 |
+
min_number_of_downloads=min_number_of_downloads,
|
96 |
+
min_number_of_likes=min_number_of_likes
|
97 |
+
):
|
98 |
yield model_name
|
99 |
|
100 |
|
101 |
+
def generate_supported_model_names(
|
102 |
+
min_number_of_downloads: int,
|
103 |
+
min_number_of_likes: int,
|
104 |
+
) -> Generator[str, None, None]:
|
105 |
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
106 |
future_to_index = {executor.submit(get_page, index): index for index in range(100)}
|
107 |
for future in as_completed(future_to_index):
|
108 |
soup = future.result()
|
109 |
if soup:
|
110 |
+
yield from get_model_names(
|
111 |
+
soup=soup,
|
112 |
+
min_number_of_downloads=min_number_of_downloads,
|
113 |
+
min_number_of_likes=min_number_of_likes,
|
114 |
+
)
|
115 |
+
|
116 |
+
|
117 |
+
def get_supported_model_names(
|
118 |
+
min_number_of_downloads: int = MIN_NUMBER_OF_DOWNLOADS,
|
119 |
+
min_number_of_likes: int = MIN_NUMBER_OF_LIKES,
|
120 |
+
) -> Set[str]:
|
121 |
+
return set(generate_supported_model_names(
|
122 |
+
min_number_of_downloads=min_number_of_downloads,
|
123 |
+
min_number_of_likes=min_number_of_likes,
|
124 |
+
))
|
125 |
|
126 |
|
127 |
if __name__ == "__main__":
|