rodrigomasini's picture
Update app.py
964afcc verified
raw
history blame
4.16 kB
import os
from apscheduler.schedulers.background import BackgroundScheduler
import gradio as gr
import pandas as pd
from src.control_panel import create_control_panel, create_control_callback
from src.leaderboard import (
create_leaderboard_table,
COLS,
TYPES,
)
from src.llm_perf import get_llm_perf_df
from src.content import (
LOGO,
TITLE,
ABOUT,
INTRODUCTION,
EXAMPLE_CONFIG,
CITATION_BUTTON,
CITATION_BUTTON_LABEL,
)
MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB-275W 🖥️", "audace": "RTX4090-24GB-450W 💻"}
HF_TOKEN = os.environ.get("HF_TOKEN", None)
def restart_space():
API.restart_space(repo_id=REPO_ID, token=HF_TOKEN)
import unicodedata
def is_valid_unicode(char):
try:
unicodedata.name(char)
return True # Valid Unicode character
except ValueError:
return False # Invalid Unicode character
def remove_invalid_unicode(input_string):
if isinstance(input_string, str):
valid_chars = [char for char in input_string if is_valid_unicode(char)]
return ''.join(valid_chars)
else:
return input_string # Return non-string values as is
def preprocess_dataframe(df):
# Apply the `remove_invalid_unicode` function to all string columns
for column in df.columns:
if df[column].dtype == 'object': # Checking for string columns
df[column] = df[column].apply(remove_invalid_unicode)
return df
# Fetch and preprocess the leaderboard DataFrame
llm_perf_df = get_llm_perf_df()
llm_perf_df = preprocess_dataframe(llm_perf_df)
# Create the leaderboard table
leaderboard_table = create_leaderboard_table(llm_perf_df)
hidden_leaderboard_table_for_search = gr.components.Dataframe(
leaderboard_table,
headers=COLS,
datatype=TYPES,
visible=False,
line_breaks=False,
interactive=False
)
def display(x, y):
# Assuming df is your DataFrame
for column in leaderboard_table.columns:
if leaderboard_table[column].dtype == 'object':
leaderboard_table[column] = leaderboard_table[column].apply(remove_invalid_unicode)
subset_df = leaderboard_table[COLS]
return subset_df
dummy1 = gr.Textbox(visible=False)
INTRODUCTION_TEXT = """
This is a copied space from LLM Trustworthy Leaderboard. Instead of displaying
the results as table this space was modified to simply provides a gradio API interface.
Using the following python script below, users can access the full leaderboard data easily.
Python on how to access the data:
```python
# Import dependencies
from gradio_client import Client
# Initialize the Gradio client with the API URL
client = Client("https://rodrigomasini-data-only-llm-perf-leaderboard.hf.space/")
try:
# Perform the API call
response = client.predict("","", api_name='/predict')
# Check if response it's directly accessible
if len(response) > 0:
print("Response received!")
headers = response.get('headers', [])
data = response.get('data', [])
print(headers)
# Remove commenst if you want to download the dataset and save in csv format
# Specify the path to your CSV file
#csv_file_path = 'llm-perf-benchmark.csv'
# Open the CSV file for writing
#with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file:
# writer = csv.writer(file)
# Write the headers
# writer.writerow(headers)
# Write the data
# for row in data:
# writer.writerow(row)
#print(f"Results saved to {csv_file_path}")
# If the above line prints a string that looks like JSON, you can parse it with json.loads(response)
# Otherwise, you might need to adjust based on the actual structure of `response`
except Exception as e:
print(f"An error occurred: {e}")
```
"""
interface = gr.Interface(
fn=display,
inputs=[gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text"), dummy1],
outputs=[hidden_leaderboard_table_for_search]
)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
interface.launch()