mrm8488 commited on
Commit
454e13a
1 Parent(s): e0eb092

Update app

Browse files
Files changed (2) hide show
  1. Untitled-1.ipynb +0 -34
  2. app.py +34 -4
Untitled-1.ipynb DELETED
@@ -1,34 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "10 ** 10"
10
- ]
11
- },
12
- {
13
- "cell_type": "markdown",
14
- "metadata": {},
15
- "source": [
16
- "1+1"
17
- ]
18
- },
19
- {
20
- "cell_type": "markdown",
21
- "metadata": {},
22
- "source": [
23
- "# Testing dev mode"
24
- ]
25
- }
26
- ],
27
- "metadata": {
28
- "language_info": {
29
- "name": "python"
30
- }
31
- },
32
- "nbformat": 4,
33
- "nbformat_minor": 2
34
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,7 +1,37 @@
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
+ # LLM performance data with scores
4
+ performance_data = {
5
+ "Undergraduate level knowledge": [("Claude 3 Opus", 86.8), ("GPT-4", 86.4), ("Gemini 1.0 Ultra", 83.7)],
6
+ "Graduate level reasoning": [("Claude 3 Opus", 50.4), ("Claude 3 Sonnet", 40.4), ("GPT-4", 35.7)],
7
+ "Grade school math": [("Claude 3 Opus", 95.0), ("Gemini 1.0 Ultra", 94.4), ("GPT-4", 92.0)],
8
+ "Math problem-solving": [("Claude 3 Opus", 60.1), ("Gemini 1.0 Ultra", 53.2), ("GPT-4", 52.9)],
9
+ "Multilingual math": [("Claude 3 Opus", 90.7), ("Claude 3 Sonnet", 83.5), ("Gemini 1.0 Ultra", 79.0)],
10
+ "Code": [("Claude 3 Opus", 84.9), ("Gemini 1.0 Ultra", 74.4), ("Claude 3 Haiku", 75.9)],
11
+ "Reasoning over text": [("Claude 3 Opus", 83.1), ("Gemini 1.0 Ultra", 82.4), ("GPT-4", 80.9)],
12
+ "Mixed evaluations": [("Claude 3 Opus", 86.8), ("Gemini 1.0 Ultra", 83.6), ("GPT-4", 83.1)],
13
+ "Knowledge Q&A": [("Claude 3 Opus", 96.4), ("GPT-4", 96.3), ("Claude 3 Sonnet", 93.2)],
14
+ "Common Knowledge": [("Claude 3 Opus", 95.4), ("GPT-4", 95.3), ("Gemini 1.0 Ultra", 87.8)],
15
+ }
16
 
17
+ def recommend_llm(task):
18
+ recommendations = performance_data.get(task, [])
19
+ if not recommendations:
20
+ return "No data available"
21
+ recommendations_sorted = sorted(recommendations, key=lambda x: x[1], reverse=True)
22
+ result = f"For {task}, the recommended LLMs are:\n"
23
+ for i, (model, score) in enumerate(recommendations_sorted):
24
+ result += f"{i+1}. {model} with a score of {score}%\n"
25
+ return result
26
+
27
+ # Gradio interface
28
+ interface = gr.Interface(
29
+ fn=recommend_llm,
30
+ inputs=gr.Dropdown(list(performance_data.keys()), label="Select Task"),
31
+ outputs=gr.Textbox(label="LLM Recommendations"),
32
+ title="LLM Recommendation App",
33
+ description="Select a task to get recommendations for the best LLMs based on performance data."
34
+ )
35
+
36
+ # Launch the app
37
+ interface.launch()