Spaces:

mrm8488
/

test_dev_mode

Sleeping

App Files Files Community

mrm8488 commited on May 23

Commit

e706e8c

•

1 Parent(s): 454e13a

Feat: add task synonyms

Browse files

Files changed (1) hide show

app.py +28 -5

app.py CHANGED Viewed

@@ -1,5 +1,19 @@
 import gradio as gr
 # LLM performance data with scores
 performance_data = {
     "Undergraduate level knowledge": [("Claude 3 Opus", 86.8), ("GPT-4", 86.4), ("Gemini 1.0 Ultra", 83.7)],
@@ -15,9 +29,18 @@ performance_data = {
 }
 def recommend_llm(task):
-    recommendations = performance_data.get(task, [])
-    if not recommendations:
         return "No data available"
     recommendations_sorted = sorted(recommendations, key=lambda x: x[1], reverse=True)
     result = f"For {task}, the recommended LLMs are:\n"
     for i, (model, score) in enumerate(recommendations_sorted):
@@ -27,11 +50,11 @@ def recommend_llm(task):
 # Gradio interface
 interface = gr.Interface(
     fn=recommend_llm,
-    inputs=gr.Dropdown(list(performance_data.keys()), label="Select Task"),
     outputs=gr.Textbox(label="LLM Recommendations"),
     title="LLM Recommendation App",
-    description="Select a task to get recommendations for the best LLMs based on performance data."
 )
 # Launch the app
-interface.launch()

 import gradio as gr
+# Synonyms for each task category
+task_synonyms = {
+    "Undergraduate level knowledge": ["undergraduate level knowledge", "MMLU"],
+    "Graduate level reasoning": ["graduate level reasoning", "GPOA", "Diamond"],
+    "Grade school math": ["grade school math", "GSM8K"],
+    "Math problem-solving": ["math problem-solving", "MATH"],
+    "Multilingual math": ["multilingual math", "MGSM"],
+    "Code": ["code", "coding", "programming", "HumanEval"],
+    "Reasoning over text": ["reasoning over text", "DROP", "F1 score"],
+    "Mixed evaluations": ["mixed evaluations", "BIG-Bench-Hard"],
+    "Knowledge Q&A": ["knowledge Q&A", "ARC-Challenge"],
+    "Common Knowledge": ["common knowledge", "HellaSwag"],
+}
 # LLM performance data with scores
 performance_data = {
     "Undergraduate level knowledge": [("Claude 3 Opus", 86.8), ("GPT-4", 86.4), ("Gemini 1.0 Ultra", 83.7)],
 }
 def recommend_llm(task):
+    # Normalize the input task to match against synonyms
+    task_lower = task.lower()
+    main_category = None
+    for key, synonyms in task_synonyms.items():
+        if task_lower in map(str.lower, synonyms):
+            main_category = key
+            break
+    if not main_category:
         return "No data available"
+    recommendations = performance_data.get(main_category, [])
     recommendations_sorted = sorted(recommendations, key=lambda x: x[1], reverse=True)
     result = f"For {task}, the recommended LLMs are:\n"
     for i, (model, score) in enumerate(recommendations_sorted):
 # Gradio interface
 interface = gr.Interface(
     fn=recommend_llm,
+    inputs=gr.Textbox(label="Enter Task"),
     outputs=gr.Textbox(label="LLM Recommendations"),
     title="LLM Recommendation App",
+    description="Enter a task to get recommendations for the best LLMs based on performance data. For example, you can enter 'coding', 'undergraduate level knowledge', etc."
 )
 # Launch the app
+interface.launch()