shanchen commited on
Commit
59e18af
β€’
1 Parent(s): 1ab4d20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -17,7 +17,7 @@ explanation_data = {
17
  "Adjusted Robustness Score"
18
  ],
19
  "Description": [
20
- "A custom MC task where the model is asked to match a brand name to its generic counterpart and vice versa. This task is designed to test the model's ability to understand drug name synonyms.",
21
  "G2B Refers to the 'Generic' to 'Brand' name swap. This is model accuracy on MedMCQA task where generic drug names are substituted with brand names.",
22
  "Model accuracy on MedMCQA task with original data. (Only includes questions that overlap with the g2b dataset)",
23
  "Difference in MedMCQA accuracy for swapped and non-swapped datasets, highlighting the impact of G2B drug name substitution on performance.",
@@ -55,7 +55,7 @@ df.rename(columns={
55
  }, inplace=True)
56
 
57
  # Sort DataFrame by DrugMatchQA descending
58
- df = df.sort_values(by='average_g2b', ascending=False)
59
 
60
  #Create adjusted robustness score that accounts for g2b accuracy and difference in accuracy
61
 
 
17
  "Adjusted Robustness Score"
18
  ],
19
  "Description": [
20
+ "A custom MC task where the model is asked to match a brand name to its generic counterpart and vice versa. This task is designed to test the model's ability to understand drug name synonyms. Gemini results are missing due to their safety filters",
21
  "G2B Refers to the 'Generic' to 'Brand' name swap. This is model accuracy on MedMCQA task where generic drug names are substituted with brand names.",
22
  "Model accuracy on MedMCQA task with original data. (Only includes questions that overlap with the g2b dataset)",
23
  "Difference in MedMCQA accuracy for swapped and non-swapped datasets, highlighting the impact of G2B drug name substitution on performance.",
 
55
  }, inplace=True)
56
 
57
  # Sort DataFrame by DrugMatchQA descending
58
+ df = df.sort_values(by='Average G2B Accuracy', ascending=False)
59
 
60
  #Create adjusted robustness score that accounts for g2b accuracy and difference in accuracy
61