Spaces:
Sleeping
Sleeping
Update evaluate.py
Browse files- evaluate.py +1 -1
evaluate.py
CHANGED
|
@@ -581,6 +581,7 @@ def run_comprehensive_evaluation(
|
|
| 581 |
- **Behaviour F1 (avg)**: {bf1_mean:.2f}%
|
| 582 |
- **Emotion F1 (avg)**: {ef1_mean:.2f}%
|
| 583 |
- **Topic F1 (avg)**: {tf1_mean:.2f}%
|
|
|
|
| 584 |
"""
|
| 585 |
# END of summary_text
|
| 586 |
|
|
@@ -607,7 +608,6 @@ def run_comprehensive_evaluation(
|
|
| 607 |
recall_at_5_mean = to_f(df["recall_at_5"]).mean() * 100
|
| 608 |
|
| 609 |
rag_summary = f"""
|
| 610 |
-
- **Context F1 (avg)**: {cf1_mean:.2f}%
|
| 611 |
- **RAG: Context Precision**: {context_precision_mean * 100:.1f}%
|
| 612 |
- **RAG: Context Recall**: {context_recall_mean * 100:.1f}%
|
| 613 |
- **RAG: Recall@5**: {recall_at_5_mean:.1f}%
|
|
|
|
| 581 |
- **Behaviour F1 (avg)**: {bf1_mean:.2f}%
|
| 582 |
- **Emotion F1 (avg)**: {ef1_mean:.2f}%
|
| 583 |
- **Topic F1 (avg)**: {tf1_mean:.2f}%
|
| 584 |
+
- **Context F1 (avg)**: {cf1_mean:.2f}%
|
| 585 |
"""
|
| 586 |
# END of summary_text
|
| 587 |
|
|
|
|
| 608 |
recall_at_5_mean = to_f(df["recall_at_5"]).mean() * 100
|
| 609 |
|
| 610 |
rag_summary = f"""
|
|
|
|
| 611 |
- **RAG: Context Precision**: {context_precision_mean * 100:.1f}%
|
| 612 |
- **RAG: Context Recall**: {context_recall_mean * 100:.1f}%
|
| 613 |
- **RAG: Recall@5**: {recall_at_5_mean:.1f}%
|