Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade

Enhance model information retrieval: include number of downloads and likes in the fetch_model_information function and update UI elements accordingly.
f6f28fc
import gradio as gr | |
import pandas as pd | |
import json | |
import os | |
from pathlib import Path | |
from huggingface_hub import HfApi, hf_hub_download | |
api = HfApi() | |
OWNER = "Navid-AI" | |
DATASET_REPO_ID = f"{OWNER}/requests-dataset" | |
def load_retrieval_results(): | |
base_path = Path(__file__).parent | |
results_dir = base_path / "results" | |
retrieval_dataframe_path = results_dir / "retrieval_results.json" | |
if not retrieval_dataframe_path.exists(): | |
df = pd.DataFrame(columns=["Model", "Revision", "Precision", "Task", "Model Size (in Millions)", "Embedding Dimension", "Max Tokens", "Num Likes", "Web Search Dataset (Overall Score)"]) | |
else: | |
df = pd.read_json(retrieval_dataframe_path) | |
return df | |
def get_model_info(model_id, verbose=False): | |
model_info = api.model_info(model_id) | |
num_downloads = model_info.downloads | |
num_likes = model_info.likes | |
license = model_info.card_data["license"] | |
num_parameters = round(model_info.safetensors.total / 1e6) | |
supported_precisions = list(model_info.safetensors.parameters.keys()) | |
if verbose: | |
print(f"Model '{model_id}' has {num_downloads} downloads, {num_likes} likes, and is licensed under {license}.") | |
print(f"The model has approximately {num_parameters:.2f} billion parameters.") | |
print(f"The model supports the following precisions: {supported_precisions}") | |
return num_downloads, num_likes, license, num_parameters, supported_precisions | |
def fetch_model_information(model_name): | |
try: | |
num_downloads, num_likes, license, num_parameters, supported_precisions = get_model_info(model_name) | |
if len(supported_precisions) == 0: | |
supported_precisions = [None] | |
except Exception as e: | |
gr.Error(f"Error: Could not fetch model information. {str(e)}") | |
return | |
return gr.update(choices=supported_precisions, value=supported_precisions[0]), license, num_parameters, num_downloads, num_likes | |
def submit_model(model_name, revision, precision, params, license, task): | |
# Load existing evaluations | |
if task == "Retriever": | |
df = load_retrieval_results() | |
elif task == "Reranker": | |
df = load_retrieval_results() | |
else: | |
return gr.Error(f"Error: Task '{task}' is not supported.") | |
existing_models_results = df[['Model', 'Revision', 'Precision', 'Task']] | |
# Handle 'Missing' precision | |
if precision == 'Missing': | |
precision = None | |
else: | |
precision = precision.strip().lower() | |
# Load pending and finished requests from the dataset repository | |
df_pending = load_requests('pending') | |
df_finished = load_requests('finished') | |
# Check if model is already evaluated | |
model_exists_in_results = ((existing_models_results['Model'] == model_name) & | |
(existing_models_results['Revision'] == revision) & | |
(existing_models_results['Precision'] == precision) & | |
(existing_models_results['Task'] == task)).any() | |
if model_exists_in_results: | |
return gr.Info(f"Model '{model_name}' with revision '{revision}' and precision '{precision}' for task '{task}' has already been evaluated.") | |
# Check if model is in pending requests | |
if not df_pending.empty: | |
existing_models_pending = df_pending[['model_name', 'revision', 'precision']] | |
model_exists_in_pending = ((existing_models_pending['model_name'] == model_name) & | |
(existing_models_pending['revision'] == revision) & | |
(existing_models_pending['precision'] == precision) & | |
(existing_models_pending['task'] == task)).any() | |
if model_exists_in_pending: | |
return gr.Info(f"Model '{model_name}' with revision '{revision}' and precision '{precision}' for task '{task}' is already in the evaluation queue.") | |
# Check if model is in finished requests | |
if not df_finished.empty: | |
existing_models_finished = df_finished[['model_name', 'revision', 'precision']] | |
model_exists_in_finished = ((existing_models_finished['model_name'] == model_name) & | |
(existing_models_finished['revision'] == revision) & | |
(existing_models_finished['precision'] == precision) & | |
(existing_models_finished['task'] == task)).any() | |
if model_exists_in_finished: | |
return gr.Info(f"Model '{model_name}' with revision '{revision}' and precision '{precision}' for task '{task}' has already been evaluated.") | |
# Check if model exists on HuggingFace Hub | |
try: | |
api.model_info(model_name) | |
except Exception as e: | |
print(f"Error fetching model info: {e}") | |
return gr.Error(f"Error: Model '{model_name}' not found on HuggingFace Hub.") | |
# Proceed with submission | |
status = "PENDING" | |
# Prepare the submission data | |
submission = { | |
"model_name": model_name, | |
"license": license, | |
"revision": revision, | |
"precision": precision, | |
"status": status, | |
"params": params, | |
"task": task | |
} | |
# Serialize the submission to JSON | |
submission_json = json.dumps(submission, indent=2) | |
# Define the file path in the repository | |
org_model = model_name.split('/') | |
if len(org_model) != 2: | |
return "**Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct'**" | |
org, model_id = org_model | |
precision_str = precision if precision else 'Missing' | |
file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}_{task.lower()}.json" | |
# Upload the submission to the dataset repository | |
try: | |
hf_api_token = os.environ.get('HF_TOKEN', None) | |
api.upload_file( | |
path_or_fileobj=submission_json.encode('utf-8'), | |
path_in_repo=file_path_in_repo, | |
repo_id=DATASET_REPO_ID, | |
repo_type="dataset", | |
token=hf_api_token | |
) | |
except Exception as e: | |
print(f"Error uploading file: {e}") | |
return gr.Error(f"Error: Could not submit model '{model_name}' for evaluation.") | |
return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' for task '{task}' has been submitted successfully.**" | |
def load_requests(status_folder): | |
api = HfApi() | |
requests_data = [] | |
folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed' | |
hf_api_token = os.environ.get('HF_TOKEN', None) | |
try: | |
# List files in the dataset repository | |
files_info = api.list_repo_files( | |
repo_id=DATASET_REPO_ID, | |
repo_type="dataset", | |
token=hf_api_token | |
) | |
except Exception as e: | |
print(f"Error accessing dataset repository: {e}") | |
return pd.DataFrame() # Return empty DataFrame if repository not found or inaccessible | |
# Filter files in the desired folder | |
files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')] | |
for file_path in files_in_folder: | |
try: | |
# Download the JSON file | |
local_file_path = hf_hub_download( | |
repo_id=DATASET_REPO_ID, | |
filename=file_path, | |
repo_type="dataset", | |
token=hf_api_token | |
) | |
# Load JSON data | |
with open(local_file_path, 'r') as f: | |
request = json.load(f) | |
requests_data.append(request) | |
except Exception as e: | |
print(f"Error loading file {file_path}: {e}") | |
continue # Skip files that can't be loaded | |
df = pd.DataFrame(requests_data) | |
return df | |
def submit_gradio_module(task_type): | |
var = gr.State(value=task_type) | |
with gr.Tab(f"Submit {task_type}") as submitter_tab: | |
with gr.Row(equal_height=True): | |
model_name_input = gr.Textbox( | |
label="Model", | |
placeholder="Enter the full model name from HuggingFace Hub (e.g., intfloat/multilingual-e5-large-instruct)", | |
scale=4, | |
) | |
fetch_data_button = gr.Button(value="Auto Fetch Model Info", variant="secondary") | |
with gr.Row(): | |
precision_input = gr.Dropdown( | |
choices=["F16", "F32", "BF16", "I8", "U8", "I16"], | |
label="Precision", | |
value="F16" | |
) | |
license_input = gr.Textbox( | |
label="License", | |
placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)", | |
value="Open" | |
) | |
revision_input = gr.Textbox( | |
label="Revision", | |
placeholder="main", | |
value="main" | |
) | |
with gr.Row(): | |
params_input = gr.Textbox( | |
label="Params (in Millions)", | |
interactive=False, | |
) | |
num_downloads_input = gr.Textbox( | |
label="Number of Downloads", | |
interactive=False, | |
) | |
num_likes_input = gr.Textbox( | |
label="Number of Likes", | |
interactive=False, | |
) | |
submit_button = gr.Button("Submit Model", variant="primary") | |
submission_result = gr.Markdown() | |
fetch_outputs = [precision_input, license_input, params_input, num_downloads_input, num_likes_input] | |
fetch_data_button.click( | |
fetch_model_information, | |
inputs=[model_name_input], | |
outputs=fetch_outputs | |
) | |
model_name_input.submit( | |
fetch_model_information, | |
inputs=[model_name_input], | |
outputs=fetch_outputs | |
) | |
submit_button.click( | |
submit_model, | |
inputs=[model_name_input, revision_input, precision_input, params_input, license_input, var], | |
outputs=submission_result | |
) | |
# Load pending, finished, and failed requests | |
df_pending = load_requests('pending') | |
df_finished = load_requests('finished') | |
df_failed = load_requests('failed') | |
# Display the tables | |
gr.Markdown("## Evaluation Status") | |
with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False): | |
if not df_pending.empty: | |
gr.Dataframe(df_pending) | |
else: | |
gr.Markdown("No pending evaluations.") | |
with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False): | |
if not df_finished.empty: | |
gr.Dataframe(df_finished) | |
else: | |
gr.Markdown("No finished evaluations.") | |
with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False): | |
if not df_failed.empty: | |
gr.Dataframe(df_failed) | |
else: | |
gr.Markdown("No failed evaluations.") | |
return submitter_tab |