import os from apscheduler.schedulers.background import BackgroundScheduler import gradio as gr import pandas as pd from src.control_panel import create_control_panel, create_control_callback from src.leaderboard import ( create_leaderboard_table, COLS, TYPES, ) from src.llm_perf import get_llm_perf_df from src.content import ( LOGO, TITLE, ABOUT, INTRODUCTION, EXAMPLE_CONFIG, CITATION_BUTTON, CITATION_BUTTON_LABEL, ) MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB-275W 🖥️", "audace": "RTX4090-24GB-450W 💻"} HF_TOKEN = os.environ.get("HF_TOKEN", None) def restart_space(): API.restart_space(repo_id=REPO_ID, token=HF_TOKEN) import unicodedata def is_valid_unicode(char): try: unicodedata.name(char) return True # Valid Unicode character except ValueError: return False # Invalid Unicode character def remove_invalid_unicode(input_string): if isinstance(input_string, str): valid_chars = [char for char in input_string if is_valid_unicode(char)] return ''.join(valid_chars) else: return input_string # Return non-string values as is def preprocess_dataframe(df): # Apply the `remove_invalid_unicode` function to all string columns for column in df.columns: if df[column].dtype == 'object': # Checking for string columns df[column] = df[column].apply(remove_invalid_unicode) return df leaderboard_df = None # Apply preprocessing right after fetching the DataFrame and before passing it to create_leaderboard_table llm_perf_df = get_llm_perf_df() llm_perf_df = preprocess_dataframe(llm_perf_df) # Make sure your DataFrame is cleaned up print(llm_perf_df) leaderboard_table = create_leaderboard_table(llm_perf_df) def display(x, y): # Check if leaderboard_table is indeed a DataFrame if isinstance(leaderboard_table, pd.DataFrame): return leaderboard_table else: # Handle the case where leaderboard_table is not a DataFrame # This could include logging an error or initializing leaderboard_table as a DataFrame print("leaderboard_table is not a DataFrame.") dummy1 = gr.Textbox(visible=False) INTRODUCTION_TEXT = """ This is a copied space from LLM Trustworthy Leaderboard. Instead of displaying the results as table this space was modified to simply provides a gradio API interface. Using the following python script below, users can access the full leaderboard data easily. Python on how to access the data: ```python # Import dependencies from gradio_client import Client # Initialize the Gradio client with the API URL client = Client("https://rodrigomasini-data-only-llm-perf-leaderboard.hf.space/") try: # Perform the API call response = client.predict("","", api_name='/predict') # Check if response it's directly accessible if len(response) > 0: print("Response received!") headers = response.get('headers', []) data = response.get('data', []) print(headers) # Remove commenst if you want to download the dataset and save in csv format # Specify the path to your CSV file #csv_file_path = 'llm-perf-benchmark.csv' # Open the CSV file for writing #with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file: # writer = csv.writer(file) # Write the headers # writer.writerow(headers) # Write the data # for row in data: # writer.writerow(row) #print(f"Results saved to {csv_file_path}") # If the above line prints a string that looks like JSON, you can parse it with json.loads(response) # Otherwise, you might need to adjust based on the actual structure of `response` except Exception as e: print(f"An error occurred: {e}") ``` """ interface = gr.Interface( fn=display, inputs=[gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text"), dummy1], outputs=[leaderboard_table] ) scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.start() interface.launch()