Omartificial-Intelligence-Space commited on
Commit
61dd04e
1 Parent(s): 6f8d49a

update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -35
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import spaces
3
  import torch
4
  import pandas as pd
5
- import matplotlib.pyplot as plt
6
  from datasets import load_dataset
7
  from sentence_transformers import SentenceTransformer
8
  from sentence_transformers.evaluation import InformationRetrievalEvaluator, SequentialEvaluator
@@ -13,7 +13,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
13
  zero = torch.Tensor([0]).to(device)
14
  print(f"Device being used: {zero.device}")
15
 
16
- @spaces.GPU(duration=120)
17
  def evaluate_model(model_id):
18
  model = SentenceTransformer(model_id, device=device)
19
  matryoshka_dimensions = [768, 512, 256, 128, 64]
@@ -21,7 +21,7 @@ def evaluate_model(model_id):
21
  # Prepare datasets
22
  datasets_info = [
23
  {
24
- "name": "Arabic Financial Dataset (Financial Evaluation)",
25
  "dataset_id": "Omartificial-Intelligence-Space/Arabic-finanical-rag-embedding-dataset",
26
  "split": "train",
27
  "size": 7000,
@@ -29,7 +29,7 @@ def evaluate_model(model_id):
29
  "sample_size": 500
30
  },
31
  {
32
- "name": "MLQA Arabic (Long Context Evaluation)",
33
  "dataset_id": "google/xtreme",
34
  "subset": "MLQA.ar.ar",
35
  "split": "validation",
@@ -38,7 +38,7 @@ def evaluate_model(model_id):
38
  "sample_size": 500
39
  },
40
  {
41
- "name": "ARCD (Short Context Evaluation)",
42
  "dataset_id": "hsseinmz/arcd",
43
  "split": "train",
44
  "size": None,
@@ -105,24 +105,32 @@ def evaluate_model(model_id):
105
  })
106
  scores.append(score)
107
 
108
- # Store scores by dataset for bar chart creation
109
  scores_by_dataset[dataset_info["name"]] = scores
110
 
111
  # Convert results to DataFrame for display
112
  result_df = pd.DataFrame(evaluation_results)
113
 
114
- # Generate bar charts for each dataset
115
  charts = []
116
- colors = ['#FF5733', '#33FF57', '#3357FF', '#FF33C4', '#F3FF33'] # Creative color palette
 
117
  for dataset_name, scores in scores_by_dataset.items():
118
- fig, ax = plt.subplots()
119
- ax.bar([str(dim) for dim in matryoshka_dimensions], scores, color=colors)
120
- ax.set_title(f"{dataset_name} Evaluation Scores", fontsize=16, color='darkblue')
121
- ax.set_xlabel("Embedding Dimension", fontsize=12)
122
- ax.set_ylabel("NDCG@10 Score", fontsize=12)
123
- ax.spines['top'].set_visible(False)
124
- ax.spines['right'].set_visible(False)
125
- plt.tight_layout()
 
 
 
 
 
 
 
126
  charts.append(fig)
127
 
128
  return result_df, charts[0], charts[1], charts[2]
@@ -134,32 +142,23 @@ def display_results(model_name):
134
 
135
  demo = gr.Interface(
136
  fn=display_results,
137
- inputs=gr.Textbox(label="Enter Your Embedding Model ID", placeholder="e.g., Omartificial-Intelligence-Space/GATE-AraBert-v1"),
138
  outputs=[
139
  gr.Dataframe(label="Evaluation Results"),
140
- gr.Plot(label="Arabic Financial Dataset (Financial Evaluation)"),
141
- gr.Plot(label="MLQA Arabic (Long Context Evaluation)"),
142
- gr.Plot(label="ARCD (Short Context Evaluation)")
143
  ],
144
- title="Evaluation of Arabic Matryoshka Embedding Models on Retrieval Tasks ",
145
  description=(
146
- "Evaluate your Sentence Transformer model's performance on **context and question retrieval** for Arabic datasets for enhancing Arabic RAG.\n"
147
- "- **ARCD** evaluates short context retrieval performance.\n"
148
- "- **MLQA Arabic** evaluates long context retrieval performance.\n"
149
- "- **Arabic Financial Dataset** focuses on financial context retrieval.\n\n"
150
- "**Evaluation Metric:**\n"
151
- "The evaluation uses **NDCG@10** (Normalized Discounted Cumulative Gain), which measures how well the retrieved documents (contexts) match the query relevance.\n"
152
- "Higher scores indicate better performance. Embedding dimensions are reduced from 768 to 64, evaluating how well the model performs with fewer dimensions."
153
  ),
154
  theme="default",
155
  live=False,
156
  css="footer {visibility: hidden;}"
157
  )
158
 
159
- demo.launch(share=True)
160
-
161
-
162
- demo.launch(share=True)
163
-
164
- # Add the footer
165
- print("\nCreated by Omar Najar | Omartificial Intelligence Space")
 
2
  import spaces
3
  import torch
4
  import pandas as pd
5
+ import plotly.graph_objects as go
6
  from datasets import load_dataset
7
  from sentence_transformers import SentenceTransformer
8
  from sentence_transformers.evaluation import InformationRetrievalEvaluator, SequentialEvaluator
 
13
  zero = torch.Tensor([0]).to(device)
14
  print(f"Device being used: {zero.device}")
15
 
16
+ @spaces.GPU
17
  def evaluate_model(model_id):
18
  model = SentenceTransformer(model_id, device=device)
19
  matryoshka_dimensions = [768, 512, 256, 128, 64]
 
21
  # Prepare datasets
22
  datasets_info = [
23
  {
24
+ "name": "Financial",
25
  "dataset_id": "Omartificial-Intelligence-Space/Arabic-finanical-rag-embedding-dataset",
26
  "split": "train",
27
  "size": 7000,
 
29
  "sample_size": 500
30
  },
31
  {
32
+ "name": "MLQA",
33
  "dataset_id": "google/xtreme",
34
  "subset": "MLQA.ar.ar",
35
  "split": "validation",
 
38
  "sample_size": 500
39
  },
40
  {
41
+ "name": "ARCD",
42
  "dataset_id": "hsseinmz/arcd",
43
  "split": "train",
44
  "size": None,
 
105
  })
106
  scores.append(score)
107
 
108
+ # Store scores by dataset for plot creation
109
  scores_by_dataset[dataset_info["name"]] = scores
110
 
111
  # Convert results to DataFrame for display
112
  result_df = pd.DataFrame(evaluation_results)
113
 
114
+ # Generate bar charts for each dataset using Plotly
115
  charts = []
116
+ color_scale = ['#003f5c', '#2f4b7c', '#665191', '#a05195', '#d45087']
117
+
118
  for dataset_name, scores in scores_by_dataset.items():
119
+ fig = go.Figure()
120
+ fig.add_trace(go.Bar(
121
+ x=[str(dim) for dim in matryoshka_dimensions],
122
+ y=scores,
123
+ marker_color=color_scale,
124
+ text=[f"{score:.3f}" if score else "N/A" for score in scores],
125
+ textposition='auto'
126
+ ))
127
+
128
+ fig.update_layout(
129
+ title=f"{dataset_name} Evaluation",
130
+ xaxis_title="Embedding Dimension",
131
+ yaxis_title="NDCG@10 Score",
132
+ template="plotly_white"
133
+ )
134
  charts.append(fig)
135
 
136
  return result_df, charts[0], charts[1], charts[2]
 
142
 
143
  demo = gr.Interface(
144
  fn=display_results,
145
+ inputs=gr.Textbox(label="Enter a Hugging Face Model ID", placeholder="e.g., sentence-transformers/all-MiniLM-L6-v2"),
146
  outputs=[
147
  gr.Dataframe(label="Evaluation Results"),
148
+ gr.Plot(label="Financial Dataset"),
149
+ gr.Plot(label="MLQA Dataset"),
150
+ gr.Plot(label="ARCD Dataset")
151
  ],
152
+ title="Arabic Embedding Evaluation",
153
  description=(
154
+ "Evaluate your Sentence Transformer model on **Arabic retrieval tasks** using Matryoshka embeddings. "
155
+ "Compare performance across financial, long-context, and short-context datasets.\n\n"
156
+ "The evaluation uses **NDCG@10** to measure how well the model retrieves relevant contexts. "
157
+ "Embedding dimensions are reduced from 768 to 64."
 
 
 
158
  ),
159
  theme="default",
160
  live=False,
161
  css="footer {visibility: hidden;}"
162
  )
163
 
164
+ demo.launch(share=True)