navidved commited on
Commit
9fc679b
1 Parent(s): 5a85062

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -36
app.py CHANGED
@@ -2,16 +2,17 @@ import gradio as gr
2
  import pandas as pd
3
  import json
4
  from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS
5
- from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
6
  from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
7
  from datetime import datetime, timezone
 
8
 
9
  LAST_UPDATED = "OCT 2nd 2024"
10
 
11
  column_names = {
12
- "Model": "Model",
13
- "WER": "WER",
14
- "CER": "CER",
15
  }
16
 
17
  # Load evaluation results
@@ -31,13 +32,13 @@ def formatter(x):
31
  return round(x, 2)
32
 
33
  for col in original_df.columns:
34
- if col == "Model":
35
  original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
36
  else:
37
  original_df[col] = original_df[col].apply(formatter)
38
 
39
  original_df.rename(columns=column_names, inplace=True)
40
- original_df.sort_values(by='WER', inplace=True)
41
 
42
  COLS = [c.name for c in fields(AutoEvalColumn)]
43
  TYPES = [c.type for c in fields(AutoEvalColumn)]
@@ -49,39 +50,82 @@ def request_model(model_text):
49
  if not base_model_on_hub:
50
  return styled_error(f"Base model '{model_text}' {error_msg}")
51
 
52
- # Construct the output dictionary
53
- current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
54
- eval_entry = {
55
- "date": current_time,
56
- "model": model_text,
57
- "dataset": "vargha/common_voice_fa"
58
- }
59
 
60
- # Prepare file path
61
- DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True)
62
-
63
- filename = model_text.replace("/", "@")
64
- if filename in requested_models:
65
- return styled_error(f"A request for this model '{model_text}' was already made.")
66
  try:
67
- filename_ext = filename + ".txt"
68
- out_filepath = DIR_OUTPUT_REQUESTS / filename_ext
69
-
70
- # Write the results to a text file
71
- with open(out_filepath, "w") as f:
72
- f.write(json.dumps(eval_entry))
73
-
74
- upload_file(filename, out_filepath)
75
-
76
- # Include file in the list of uploaded files
77
- requested_models.append(filename)
78
-
79
- # Remove the local file
80
- out_filepath.unlink()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- return styled_message("🤗 Your request has been submitted and will be evaluated soon!</p>")
83
  except Exception as e:
84
- return styled_error(f"Error submitting request: {e}")
85
 
86
  with gr.Blocks() as demo:
87
  gr.HTML(BANNER, elem_id="banner")
@@ -89,7 +133,7 @@ with gr.Blocks() as demo:
89
 
90
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
91
  with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
92
- leaderboard_table = gr.components.Dataframe(
93
  value=original_df,
94
  datatype=TYPES,
95
  elem_id="leaderboard-table",
 
2
  import pandas as pd
3
  import json
4
  from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS
5
+ from init import is_model_on_hub, load_all_info_from_dataset_hub
6
  from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
7
  from datetime import datetime, timezone
8
+ import torch
9
 
10
  LAST_UPDATED = "OCT 2nd 2024"
11
 
12
  column_names = {
13
+ "MODEL": "Model",
14
+ "WER": "WER ⬇️",
15
+ "CER": "CER ⬇️",
16
  }
17
 
18
  # Load evaluation results
 
32
  return round(x, 2)
33
 
34
  for col in original_df.columns:
35
+ if col == "model":
36
  original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
37
  else:
38
  original_df[col] = original_df[col].apply(formatter)
39
 
40
  original_df.rename(columns=column_names, inplace=True)
41
+ original_df.sort_values(by='WER ⬇️', inplace=True)
42
 
43
  COLS = [c.name for c in fields(AutoEvalColumn)]
44
  TYPES = [c.type for c in fields(AutoEvalColumn)]
 
50
  if not base_model_on_hub:
51
  return styled_error(f"Base model '{model_text}' {error_msg}")
52
 
53
+ # Check if the model has already been evaluated
54
+ if model_text in original_df['Model'].apply(lambda x: x.split('href="')[1].split('"')[0].replace('https://huggingface.co/', '')).values:
55
+ return styled_error(f"The model '{model_text}' is already in the leaderboard.")
 
 
 
 
56
 
 
 
 
 
 
 
57
  try:
58
+ # Run the evaluation code
59
+ from transformers import pipeline
60
+ from transformers.utils import is_flash_attn_2_available
61
+ from datasets import load_dataset
62
+ from tqdm import tqdm
63
+ from transformers.pipelines.pt_utils import KeyDataset
64
+ from evaluate import load
65
+
66
+ # Load a subset of the Common Voice test dataset for evaluation
67
+ common_voice_test = load_dataset(
68
+ "mozilla-foundation/common_voice_17_0", "fa", split="test"
69
+ ).shuffle(seed=42).select(range(len(load_dataset(
70
+ "mozilla-foundation/common_voice_17_0", "fa", split="test")) // 15))
71
+
72
+ # Initialize the pipeline with the requested model
73
+ pipe = pipeline(
74
+ "automatic-speech-recognition",
75
+ model=model_text,
76
+ torch_dtype=torch.float32,
77
+ device=0 if torch.cuda.is_available() else -1, # Use GPU if available
78
+ )
79
+
80
+ all_predictions = []
81
+
82
+ # Run inference
83
+ for prediction in tqdm(
84
+ pipe(
85
+ KeyDataset(common_voice_test, "audio"),
86
+ max_new_tokens=128,
87
+ chunk_length_s=30,
88
+ generate_kwargs={"task": "transcribe"},
89
+ batch_size=32,
90
+ ),
91
+ total=len(common_voice_test),
92
+ ):
93
+ all_predictions.append(prediction["text"])
94
+
95
+ wer_metric = load("wer")
96
+ cer_metric = load("cer")
97
+
98
+ wer_result = 100 * wer_metric.compute(
99
+ references=common_voice_test["sentence"], predictions=all_predictions
100
+ )
101
+
102
+ cer_result = 100 * cer_metric.compute(
103
+ references=common_voice_test["sentence"], predictions=all_predictions
104
+ )
105
+
106
+ # Update the results CSV
107
+ new_row = {'model': model_text, 'wer': wer_result, 'cer': cer_result}
108
+ df_results = pd.read_csv(csv_results)
109
+ df_results = df_results.append(new_row, ignore_index=True)
110
+ df_results.to_csv(csv_results, index=False)
111
+
112
+ # Update the leaderboard DataFrame
113
+ global original_df
114
+ original_df = df_results.copy()
115
+ original_df['Model'] = original_df['model'].apply(lambda x: make_clickable_model(x))
116
+ original_df['WER ⬇️'] = original_df['wer'].apply(lambda x: round(x, 2))
117
+ original_df['CER ⬇️'] = original_df['cer'].apply(lambda x: round(x, 2))
118
+ original_df = original_df[['Model', 'WER ⬇️', 'CER ⬇️']]
119
+ original_df.sort_values(by='WER ⬇️', inplace=True)
120
+
121
+ # Update the leaderboard table in the UI
122
+ leaderboard_table.update(value=original_df)
123
+
124
+ # Return success message
125
+ return styled_message("🤗 Your model has been evaluated and added to the leaderboard!")
126
 
 
127
  except Exception as e:
128
+ return styled_error(f"Error during evaluation: {e}")
129
 
130
  with gr.Blocks() as demo:
131
  gr.HTML(BANNER, elem_id="banner")
 
133
 
134
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
135
  with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
136
+ leaderboard_table = gr.Dataframe(
137
  value=original_df,
138
  datatype=TYPES,
139
  elem_id="leaderboard-table",