Spaces:
Running
Running
mehran
commited on
Commit
Β·
532ac38
1
Parent(s):
a274034
update gpt-oss resutls
Browse files- leaderboard/__pycache__/leaderboard.cpython-310.pyc +0 -0
- leaderboard/boards_data/MMLU.jsonl +1 -1
- leaderboard/boards_data/all.jsonl +2 -2
- leaderboard/boards_data/extractive-qa_PQuAD.jsonl +1 -1
- leaderboard/boards_data/ifeval.jsonl +1 -1
- leaderboard/boards_data/keyword-extraction_SynKeywords.jsonl +1 -1
- leaderboard/boards_data/mt_bench.jsonl +1 -1
- leaderboard/boards_data/ner_arman.jsonl +1 -1
- leaderboard/boards_data/nli_farstail.jsonl +1 -1
- leaderboard/boards_data/paraphrase-detection_FarsiParaphraseDetection.jsonl +1 -1
- leaderboard/boards_data/paraphrase-detection_parsinlu.jsonl +1 -1
- leaderboard/boards_data/persian_csr.jsonl +2 -2
- leaderboard/boards_data/persian_nlg.jsonl +1 -1
- leaderboard/boards_data/persian_nlu.jsonl +1 -1
- leaderboard/boards_data/question-generation_PersianQA.jsonl +1 -1
- leaderboard/boards_data/sentiment-analysis_deepsentipers.jsonl +1 -1
- leaderboard/boards_data/sts_FarSICK.jsonl +1 -1
- leaderboard/boards_data/sts_SynPerSTS.jsonl +1 -1
- leaderboard/boards_data/summarization_PnSummary.jsonl +1 -1
- leaderboard/boards_data/summarization_SamSUM-fa.jsonl +1 -1
- leaderboard/boards_data/tone-classification_SynTone.jsonl +1 -1
- leaderboard/boards_data/topic-classification_sid.jsonl +1 -1
- leaderboard/boards_data/translation-ar2fa_ar2fa.jsonl +1 -1
- leaderboard/boards_data/translation-en2fa_en2fa.jsonl +1 -1
- leaderboard/boards_data/translation-fa2ar_fa2ar.jsonl +1 -1
- leaderboard/boards_data/translation-fa2en_fa2en.jsonl +1 -1
- leaderboard/leaderboard.py +2 -2
- leaderboard/leaderboard_config.yaml +4 -3
leaderboard/__pycache__/leaderboard.cpython-310.pyc
CHANGED
|
Binary files a/leaderboard/__pycache__/leaderboard.cpython-310.pyc and b/leaderboard/__pycache__/leaderboard.cpython-310.pyc differ
|
|
|
leaderboard/boards_data/MMLU.jsonl
CHANGED
|
@@ -8,7 +8,7 @@
|
|
| 8 |
{"Model Name":"gpt-4o","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6884607359,"cinema_acc":0.75,"emergency_number_acc":0.7,"foods_acc":0.78,"games_acc":0.6,"herbal_drugs_acc":0.7,"places_acc":0.8380952381,"poetry_acc":0.9,"politicians_acc":0.95,"popular_people_acc":0.8615384615,"Government_law_acc":0.9347826087,"proverbs_acc":0.8,"religous_acc":0.9333333333,"social_manners_acc":0.8426966292,"souvenirs_acc":0.66,"sports_acc":0.5555555556,"GPK_acc":0.8015952144,"SPK_acc":0.720121842,"UPK_acc":0.6512254587}
|
| 9 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6864436252,"cinema_acc":0.5375,"emergency_number_acc":0.5,"foods_acc":0.72,"games_acc":0.5,"herbal_drugs_acc":0.7,"places_acc":0.8095238095,"poetry_acc":0.425,"politicians_acc":0.6,"popular_people_acc":0.7128205128,"Government_law_acc":0.9347826087,"proverbs_acc":0.69,"religous_acc":0.8222222222,"social_manners_acc":0.7415730337,"souvenirs_acc":0.68,"sports_acc":0.5873015873,"GPK_acc":0.6949152542,"SPK_acc":0.6867944813,"UPK_acc":0.6851020146}
|
| 10 |
{"Model Name":"deepseek-reasoner","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.6810513107,"cinema_acc":0.5125,"emergency_number_acc":0.5,"foods_acc":0.63,"games_acc":0.55,"herbal_drugs_acc":0.65,"places_acc":0.8666666667,"poetry_acc":0.55,"politicians_acc":0.8,"popular_people_acc":0.7435897436,"Government_law_acc":0.9347826087,"proverbs_acc":0.81,"religous_acc":0.9111111111,"social_manners_acc":0.8764044944,"souvenirs_acc":0.72,"sports_acc":0.5079365079,"GPK_acc":0.7288135593,"SPK_acc":0.7400865177,"UPK_acc":0.6328756576}
|
| 11 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"
|
| 12 |
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6466578563,"cinema_acc":0.7125,"emergency_number_acc":0.6,"foods_acc":0.72,"games_acc":0.5,"herbal_drugs_acc":0.7,"places_acc":0.8666666667,"poetry_acc":0.8,"politicians_acc":0.8,"popular_people_acc":0.7743589744,"Government_law_acc":0.9347826087,"proverbs_acc":0.77,"religous_acc":0.9111111111,"social_manners_acc":0.8539325843,"souvenirs_acc":0.68,"sports_acc":0.5873015873,"GPK_acc":0.7686939182,"SPK_acc":0.6764020785,"UPK_acc":0.6096496856}
|
| 13 |
{"Model Name":"gpt-4.1-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6128538638,"cinema_acc":0.525,"emergency_number_acc":0.7,"foods_acc":0.73,"games_acc":0.55,"herbal_drugs_acc":0.625,"places_acc":0.8380952381,"poetry_acc":0.575,"politicians_acc":0.6,"popular_people_acc":0.7076923077,"Government_law_acc":0.847826087,"proverbs_acc":0.71,"religous_acc":0.6666666667,"social_manners_acc":0.8202247191,"souvenirs_acc":0.68,"sports_acc":0.4920634921,"GPK_acc":0.6949152542,"SPK_acc":0.6265902168,"UPK_acc":0.5924547671}
|
| 14 |
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"β","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.5980651448,"cinema_acc":0.6,"emergency_number_acc":0.5,"foods_acc":0.67,"games_acc":0.65,"herbal_drugs_acc":0.675,"places_acc":0.8476190476,"poetry_acc":0.775,"politicians_acc":0.95,"popular_people_acc":0.8092783505,"Government_law_acc":0.8913043478,"proverbs_acc":0.78,"religous_acc":0.8666666667,"social_manners_acc":0.8988764045,"souvenirs_acc":0.68,"sports_acc":0.5396825397,"GPK_acc":0.7604790419,"SPK_acc":0.6417428725,"UPK_acc":0.5458980614}
|
|
|
|
| 8 |
{"Model Name":"gpt-4o","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6884607359,"cinema_acc":0.75,"emergency_number_acc":0.7,"foods_acc":0.78,"games_acc":0.6,"herbal_drugs_acc":0.7,"places_acc":0.8380952381,"poetry_acc":0.9,"politicians_acc":0.95,"popular_people_acc":0.8615384615,"Government_law_acc":0.9347826087,"proverbs_acc":0.8,"religous_acc":0.9333333333,"social_manners_acc":0.8426966292,"souvenirs_acc":0.66,"sports_acc":0.5555555556,"GPK_acc":0.8015952144,"SPK_acc":0.720121842,"UPK_acc":0.6512254587}
|
| 9 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6864436252,"cinema_acc":0.5375,"emergency_number_acc":0.5,"foods_acc":0.72,"games_acc":0.5,"herbal_drugs_acc":0.7,"places_acc":0.8095238095,"poetry_acc":0.425,"politicians_acc":0.6,"popular_people_acc":0.7128205128,"Government_law_acc":0.9347826087,"proverbs_acc":0.69,"religous_acc":0.8222222222,"social_manners_acc":0.7415730337,"souvenirs_acc":0.68,"sports_acc":0.5873015873,"GPK_acc":0.6949152542,"SPK_acc":0.6867944813,"UPK_acc":0.6851020146}
|
| 10 |
{"Model Name":"deepseek-reasoner","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.6810513107,"cinema_acc":0.5125,"emergency_number_acc":0.5,"foods_acc":0.63,"games_acc":0.55,"herbal_drugs_acc":0.65,"places_acc":0.8666666667,"poetry_acc":0.55,"politicians_acc":0.8,"popular_people_acc":0.7435897436,"Government_law_acc":0.9347826087,"proverbs_acc":0.81,"religous_acc":0.9111111111,"social_manners_acc":0.8764044944,"souvenirs_acc":0.72,"sports_acc":0.5079365079,"GPK_acc":0.7288135593,"SPK_acc":0.7400865177,"UPK_acc":0.6328756576}
|
| 11 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"120000000000","source_type":"Open-Source","acc":0.6789316269,"cinema_acc":0.4375,"emergency_number_acc":0.6,"foods_acc":0.72,"games_acc":0.45,"herbal_drugs_acc":0.675,"places_acc":0.7619047619,"poetry_acc":0.45,"politicians_acc":0.6,"popular_people_acc":0.6717948718,"Government_law_acc":0.8260869565,"proverbs_acc":0.57,"religous_acc":0.8222222222,"social_manners_acc":0.7191011236,"souvenirs_acc":0.52,"sports_acc":0.5714285714,"GPK_acc":0.6460618146,"SPK_acc":0.6767604372,"UPK_acc":0.6847170538}
|
| 12 |
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6466578563,"cinema_acc":0.7125,"emergency_number_acc":0.6,"foods_acc":0.72,"games_acc":0.5,"herbal_drugs_acc":0.7,"places_acc":0.8666666667,"poetry_acc":0.8,"politicians_acc":0.8,"popular_people_acc":0.7743589744,"Government_law_acc":0.9347826087,"proverbs_acc":0.77,"religous_acc":0.9111111111,"social_manners_acc":0.8539325843,"souvenirs_acc":0.68,"sports_acc":0.5873015873,"GPK_acc":0.7686939182,"SPK_acc":0.6764020785,"UPK_acc":0.6096496856}
|
| 13 |
{"Model Name":"gpt-4.1-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6128538638,"cinema_acc":0.525,"emergency_number_acc":0.7,"foods_acc":0.73,"games_acc":0.55,"herbal_drugs_acc":0.625,"places_acc":0.8380952381,"poetry_acc":0.575,"politicians_acc":0.6,"popular_people_acc":0.7076923077,"Government_law_acc":0.847826087,"proverbs_acc":0.71,"religous_acc":0.6666666667,"social_manners_acc":0.8202247191,"souvenirs_acc":0.68,"sports_acc":0.4920634921,"GPK_acc":0.6949152542,"SPK_acc":0.6265902168,"UPK_acc":0.5924547671}
|
| 14 |
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"β","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.5980651448,"cinema_acc":0.6,"emergency_number_acc":0.5,"foods_acc":0.67,"games_acc":0.65,"herbal_drugs_acc":0.675,"places_acc":0.8476190476,"poetry_acc":0.775,"politicians_acc":0.95,"popular_people_acc":0.8092783505,"Government_law_acc":0.8913043478,"proverbs_acc":0.78,"religous_acc":0.8666666667,"social_manners_acc":0.8988764045,"souvenirs_acc":0.68,"sports_acc":0.5396825397,"GPK_acc":0.7604790419,"SPK_acc":0.6417428725,"UPK_acc":0.5458980614}
|
leaderboard/boards_data/all.jsonl
CHANGED
|
@@ -8,14 +8,14 @@
|
|
| 8 |
{"Model Name":"gpt-4o","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6877,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.8371666667,"PerMMLU":0.6884607359,"PerCoR":0.8665,"Persian NLU":0.7146808531,"Persian NLG":0.18964968}
|
| 9 |
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.664,"Persian IFEval":0.8414096916,"Persian MT-Bench":0.8244166667,"PerMMLU":0.6466578563,"PerCoR":0.8143,"Persian NLU":0.6914202844,"Persian NLG":0.1659339021}
|
| 10 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6637,"Persian IFEval":0.8972099853,"Persian MT-Bench":0.82825,"PerMMLU":0.6864436252,"PerCoR":0.731,"Persian NLU":0.6749652797,"Persian NLG":0.1643361642}
|
| 11 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"
|
| 12 |
{"Model Name":"gpt-4.1-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6556,"Persian IFEval":0.8340675477,"Persian MT-Bench":0.8418333333,"PerMMLU":0.6128538638,"PerCoR":0.7712,"Persian NLU":0.6833497104,"Persian NLG":0.1901206806}
|
| 13 |
{"Model Name":"deepseek-reasoner","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6549,"Persian IFEval":0.8370044053,"Persian MT-Bench":0.86175,"PerMMLU":0.6810513107,"PerCoR":0.825165033,"Persian NLU":0.6361186163,"Persian NLG":0.0880621978}
|
| 14 |
{"Model Name":"deepseek-chat","thinking_method":"β","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6458,"Persian IFEval":0.8311306902,"Persian MT-Bench":0.8600833333,"PerMMLU":0.5908047576,"PerCoR":0.8241,"Persian NLU":0.6752949557,"Persian NLG":0.0934094344}
|
| 15 |
{"Model Name":"gemma-3-27b-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","Average":0.6247,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.796,"PerMMLU":0.5633303193,"PerCoR":0.7628,"Persian NLU":0.6898261633,"Persian NLG":0.1067134448}
|
| 16 |
{"Model Name":"gpt-4o-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6246,"Persian IFEval":0.8017621145,"Persian MT-Bench":0.7891666667,"PerMMLU":0.56986854,"PerCoR":0.7598,"Persian NLU":0.6459120734,"Persian NLG":0.1810678527}
|
| 17 |
{"Model Name":"Qwen3-32B","thinking_method":"β","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","Average":0.6224,"Persian IFEval":0.803030303,"Persian MT-Bench":0.7632996633,"PerMMLU":0.5635086255,"PerCoR":0.7654,"Persian NLU":0.6714091535,"Persian NLG":0.1679338638}
|
| 18 |
-
{"Model Name":"gpt-oss:20b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","Average":0.
|
| 19 |
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"β","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","Average":0.613,"Persian IFEval":0.7125925926,"Persian MT-Bench":0.7172558923,"PerMMLU":0.5714086374,"PerCoR":0.7956,"Persian NLU":0.6800109206,"Persian NLG":0.2010896964}
|
| 20 |
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"β","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","Average":0.6098,"Persian IFEval":0.8438880707,"Persian MT-Bench":0.8219166667,"PerMMLU":0.5980651448,"PerCoR":0.798859772,"Persian NLU":0.4824528512,"Persian NLG":0.1137933652}
|
| 21 |
{"Model Name":"gemma-3-12b-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","Average":0.6008,"Persian IFEval":0.8149779736,"Persian MT-Bench":0.75125,"PerMMLU":0.5105376643,"PerCoR":0.7094,"Persian NLU":0.699116864,"Persian NLG":0.1196804312}
|
|
|
|
| 8 |
{"Model Name":"gpt-4o","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6877,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.8371666667,"PerMMLU":0.6884607359,"PerCoR":0.8665,"Persian NLU":0.7146808531,"Persian NLG":0.18964968}
|
| 9 |
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.664,"Persian IFEval":0.8414096916,"Persian MT-Bench":0.8244166667,"PerMMLU":0.6466578563,"PerCoR":0.8143,"Persian NLU":0.6914202844,"Persian NLG":0.1659339021}
|
| 10 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6637,"Persian IFEval":0.8972099853,"Persian MT-Bench":0.82825,"PerMMLU":0.6864436252,"PerCoR":0.731,"Persian NLU":0.6749652797,"Persian NLG":0.1643361642}
|
| 11 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"120000000000","source_type":"Open-Source","Average":0.6633,"Persian IFEval":0.8865979381,"Persian MT-Bench":0.8433333333,"PerMMLU":0.6789316269,"PerCoR":0.7714,"Persian NLU":0.6458443785,"Persian NLG":0.1538910531}
|
| 12 |
{"Model Name":"gpt-4.1-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6556,"Persian IFEval":0.8340675477,"Persian MT-Bench":0.8418333333,"PerMMLU":0.6128538638,"PerCoR":0.7712,"Persian NLU":0.6833497104,"Persian NLG":0.1901206806}
|
| 13 |
{"Model Name":"deepseek-reasoner","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6549,"Persian IFEval":0.8370044053,"Persian MT-Bench":0.86175,"PerMMLU":0.6810513107,"PerCoR":0.825165033,"Persian NLU":0.6361186163,"Persian NLG":0.0880621978}
|
| 14 |
{"Model Name":"deepseek-chat","thinking_method":"β","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6458,"Persian IFEval":0.8311306902,"Persian MT-Bench":0.8600833333,"PerMMLU":0.5908047576,"PerCoR":0.8241,"Persian NLU":0.6752949557,"Persian NLG":0.0934094344}
|
| 15 |
{"Model Name":"gemma-3-27b-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","Average":0.6247,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.796,"PerMMLU":0.5633303193,"PerCoR":0.7628,"Persian NLU":0.6898261633,"Persian NLG":0.1067134448}
|
| 16 |
{"Model Name":"gpt-4o-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6246,"Persian IFEval":0.8017621145,"Persian MT-Bench":0.7891666667,"PerMMLU":0.56986854,"PerCoR":0.7598,"Persian NLU":0.6459120734,"Persian NLG":0.1810678527}
|
| 17 |
{"Model Name":"Qwen3-32B","thinking_method":"β","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","Average":0.6224,"Persian IFEval":0.803030303,"Persian MT-Bench":0.7632996633,"PerMMLU":0.5635086255,"PerCoR":0.7654,"Persian NLU":0.6714091535,"Persian NLG":0.1679338638}
|
| 18 |
+
{"Model Name":"gpt-oss:20b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","Average":0.6196,"Persian IFEval":0.8585657371,"Persian MT-Bench":0.8046099291,"PerMMLU":0.5958127565,"PerCoR":0.6965089527,"Persian NLU":0.628506628,"Persian NLG":0.1334687319}
|
| 19 |
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"β","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","Average":0.613,"Persian IFEval":0.7125925926,"Persian MT-Bench":0.7172558923,"PerMMLU":0.5714086374,"PerCoR":0.7956,"Persian NLU":0.6800109206,"Persian NLG":0.2010896964}
|
| 20 |
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"β","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","Average":0.6098,"Persian IFEval":0.8438880707,"Persian MT-Bench":0.8219166667,"PerMMLU":0.5980651448,"PerCoR":0.798859772,"Persian NLU":0.4824528512,"Persian NLG":0.1137933652}
|
| 21 |
{"Model Name":"gemma-3-12b-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","Average":0.6008,"Persian IFEval":0.8149779736,"Persian MT-Bench":0.75125,"PerMMLU":0.5105376643,"PerCoR":0.7094,"Persian NLU":0.699116864,"Persian NLG":0.1196804312}
|
leaderboard/boards_data/extractive-qa_PQuAD.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":51.6587677725,"extractive-qa_PQuAD_f1":0.7997294818,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":42.9383886256,"extractive-qa_PQuAD_f1":0.7674489336,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":41.6113744076,"extractive-qa_PQuAD_f1":0.7625286761,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":16.2085308057,"extractive-qa_PQuAD_f1":0.5540542726,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":51.6587677725,"extractive-qa_PQuAD_f1":0.7997294818,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":42.9383886256,"extractive-qa_PQuAD_f1":0.7674489336,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":41.6113744076,"extractive-qa_PQuAD_f1":0.7625286761,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":38.5781990521,"extractive-qa_PQuAD_f1":0.7377983931,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":16.2085308057,"extractive-qa_PQuAD_f1":0.5540542726,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":null}
|
leaderboard/boards_data/ifeval.jsonl
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.875273523,"strict_instruction_accuracy":0.9104258443,"loose_prompt_accuracy":0.8927789934,"loose_instruction_accuracy":0.9251101322,"strict_combination_category":0.7230769231,"strict_detectable_content_category":1.0,"strict_detectable_format_category":0.925170068,"strict_keywords_category":0.924137931,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.9024390244,"strict_punctuation_category":0.9508196721,"strict_startend_category":0.9206349206,"loose_combination_category":0.7384615385,"loose_detectable_content_category":1.0,"loose_detectable_format_category":0.925170068,"loose_keywords_category":0.9517241379,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.9105691057,"loose_punctuation_category":1.0,"loose_startend_category":0.9365079365}
|
| 3 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8577680525,"strict_instruction_accuracy":0.8972099853,"loose_prompt_accuracy":0.8840262582,"loose_instruction_accuracy":0.9177679883,"strict_combination_category":0.7076923077,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8843537415,"strict_keywords_category":0.924137931,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.8943089431,"strict_punctuation_category":0.9508196721,"strict_startend_category":0.9365079365,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.9448275862,"loose_language_category":1.0,"loose_length_constraints_category":0.9430894309,"loose_punctuation_category":1.0,"loose_startend_category":0.9365079365}
|
| 4 |
{"Model Name":"gemini-2.5-pro","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8464912281,"strict_instruction_accuracy":0.8911764706,"loose_prompt_accuracy":0.8815789474,"loose_instruction_accuracy":0.9191176471,"strict_combination_category":0.8461538462,"strict_detectable_content_category":0.9777777778,"strict_detectable_format_category":0.8707482993,"strict_keywords_category":0.875862069,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.8211382114,"strict_punctuation_category":1.0,"strict_startend_category":0.9523809524,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9777777778,"loose_detectable_format_category":0.8707482993,"loose_keywords_category":0.9172413793,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.9268292683,"loose_punctuation_category":1.0,"loose_startend_category":0.9523809524}
|
| 5 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"
|
| 6 |
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8336980306,"strict_instruction_accuracy":0.8810572687,"loose_prompt_accuracy":0.8774617068,"loose_instruction_accuracy":0.9148311307,"strict_combination_category":0.8307692308,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8965517241,"strict_language_category":1.0,"strict_length_constraints_category":0.756097561,"strict_punctuation_category":0.9508196721,"strict_startend_category":0.9523809524,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.9448275862,"loose_language_category":1.0,"loose_length_constraints_category":0.8536585366,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9682539683}
|
| 7 |
{"Model Name":"gemini-2.5-flash","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.836689038,"strict_instruction_accuracy":0.8796992481,"loose_prompt_accuracy":0.8680089485,"loose_instruction_accuracy":0.9067669173,"strict_combination_category":0.8,"strict_detectable_content_category":0.9772727273,"strict_detectable_format_category":0.8689655172,"strict_keywords_category":0.8623188406,"strict_language_category":0.935483871,"strict_length_constraints_category":0.8166666667,"strict_punctuation_category":0.9666666667,"strict_startend_category":0.9677419355,"loose_combination_category":0.8307692308,"loose_detectable_content_category":0.9772727273,"loose_detectable_format_category":0.875862069,"loose_keywords_category":0.9130434783,"loose_language_category":0.935483871,"loose_length_constraints_category":0.8666666667,"loose_punctuation_category":1.0,"loose_startend_category":0.9677419355}
|
| 8 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8140043764,"strict_instruction_accuracy":0.8634361233,"loose_prompt_accuracy":0.8512035011,"loose_instruction_accuracy":0.8942731278,"strict_combination_category":0.7846153846,"strict_detectable_content_category":0.9347826087,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.8482758621,"strict_language_category":1.0,"strict_length_constraints_category":0.7804878049,"strict_punctuation_category":0.868852459,"strict_startend_category":0.9523809524,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9347826087,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.9103448276,"loose_language_category":1.0,"loose_length_constraints_category":0.837398374,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9523809524}
|
|
|
|
| 2 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.875273523,"strict_instruction_accuracy":0.9104258443,"loose_prompt_accuracy":0.8927789934,"loose_instruction_accuracy":0.9251101322,"strict_combination_category":0.7230769231,"strict_detectable_content_category":1.0,"strict_detectable_format_category":0.925170068,"strict_keywords_category":0.924137931,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.9024390244,"strict_punctuation_category":0.9508196721,"strict_startend_category":0.9206349206,"loose_combination_category":0.7384615385,"loose_detectable_content_category":1.0,"loose_detectable_format_category":0.925170068,"loose_keywords_category":0.9517241379,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.9105691057,"loose_punctuation_category":1.0,"loose_startend_category":0.9365079365}
|
| 3 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8577680525,"strict_instruction_accuracy":0.8972099853,"loose_prompt_accuracy":0.8840262582,"loose_instruction_accuracy":0.9177679883,"strict_combination_category":0.7076923077,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8843537415,"strict_keywords_category":0.924137931,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.8943089431,"strict_punctuation_category":0.9508196721,"strict_startend_category":0.9365079365,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.9448275862,"loose_language_category":1.0,"loose_length_constraints_category":0.9430894309,"loose_punctuation_category":1.0,"loose_startend_category":0.9365079365}
|
| 4 |
{"Model Name":"gemini-2.5-pro","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8464912281,"strict_instruction_accuracy":0.8911764706,"loose_prompt_accuracy":0.8815789474,"loose_instruction_accuracy":0.9191176471,"strict_combination_category":0.8461538462,"strict_detectable_content_category":0.9777777778,"strict_detectable_format_category":0.8707482993,"strict_keywords_category":0.875862069,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.8211382114,"strict_punctuation_category":1.0,"strict_startend_category":0.9523809524,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9777777778,"loose_detectable_format_category":0.8707482993,"loose_keywords_category":0.9172413793,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.9268292683,"loose_punctuation_category":1.0,"loose_startend_category":0.9523809524}
|
| 5 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"120000000000","source_type":"Open-Source","strict_prompt_accuracy":0.8373626374,"strict_instruction_accuracy":0.8865979381,"loose_prompt_accuracy":0.8813186813,"loose_instruction_accuracy":0.9189985272,"strict_combination_category":0.8153846154,"strict_detectable_content_category":0.9347826087,"strict_detectable_format_category":0.9589041096,"strict_keywords_category":0.8680555556,"strict_language_category":1.0,"strict_length_constraints_category":0.7967479675,"strict_punctuation_category":0.868852459,"strict_startend_category":0.9365079365,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9347826087,"loose_detectable_format_category":0.9657534247,"loose_keywords_category":0.9236111111,"loose_language_category":1.0,"loose_length_constraints_category":0.8617886179,"loose_punctuation_category":0.9016393443,"loose_startend_category":0.9523809524}
|
| 6 |
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8336980306,"strict_instruction_accuracy":0.8810572687,"loose_prompt_accuracy":0.8774617068,"loose_instruction_accuracy":0.9148311307,"strict_combination_category":0.8307692308,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8965517241,"strict_language_category":1.0,"strict_length_constraints_category":0.756097561,"strict_punctuation_category":0.9508196721,"strict_startend_category":0.9523809524,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.9448275862,"loose_language_category":1.0,"loose_length_constraints_category":0.8536585366,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9682539683}
|
| 7 |
{"Model Name":"gemini-2.5-flash","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.836689038,"strict_instruction_accuracy":0.8796992481,"loose_prompt_accuracy":0.8680089485,"loose_instruction_accuracy":0.9067669173,"strict_combination_category":0.8,"strict_detectable_content_category":0.9772727273,"strict_detectable_format_category":0.8689655172,"strict_keywords_category":0.8623188406,"strict_language_category":0.935483871,"strict_length_constraints_category":0.8166666667,"strict_punctuation_category":0.9666666667,"strict_startend_category":0.9677419355,"loose_combination_category":0.8307692308,"loose_detectable_content_category":0.9772727273,"loose_detectable_format_category":0.875862069,"loose_keywords_category":0.9130434783,"loose_language_category":0.935483871,"loose_length_constraints_category":0.8666666667,"loose_punctuation_category":1.0,"loose_startend_category":0.9677419355}
|
| 8 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8140043764,"strict_instruction_accuracy":0.8634361233,"loose_prompt_accuracy":0.8512035011,"loose_instruction_accuracy":0.8942731278,"strict_combination_category":0.7846153846,"strict_detectable_content_category":0.9347826087,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.8482758621,"strict_language_category":1.0,"strict_length_constraints_category":0.7804878049,"strict_punctuation_category":0.868852459,"strict_startend_category":0.9523809524,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9347826087,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.9103448276,"loose_language_category":1.0,"loose_length_constraints_category":0.837398374,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9523809524}
|
leaderboard/boards_data/keyword-extraction_SynKeywords.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1547160783,"keyword-extraction_SynKeywords_precision_mean":0.1275089966,"keyword-extraction_SynKeywords_recall_mean":0.2111413043,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1186662307,"keyword-extraction_SynKeywords_precision_mean":0.1013265485,"keyword-extraction_SynKeywords_recall_mean":0.1581521739,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2645652392,"keyword-extraction_SynKeywords_precision_mean":0.2349391249,"keyword-extraction_SynKeywords_recall_mean":0.3166666667,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1217550899,"keyword-extraction_SynKeywords_precision_mean":0.1020894964,"keyword-extraction_SynKeywords_recall_mean":0.1608695652,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":null,"keyword-extraction_SynKeywords_precision_mean":null,"keyword-extraction_SynKeywords_recall_mean":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1547160783,"keyword-extraction_SynKeywords_precision_mean":0.1275089966,"keyword-extraction_SynKeywords_recall_mean":0.2111413043,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1186662307,"keyword-extraction_SynKeywords_precision_mean":0.1013265485,"keyword-extraction_SynKeywords_recall_mean":0.1581521739,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1543389439,"keyword-extraction_SynKeywords_precision_mean":0.1301371778,"keyword-extraction_SynKeywords_recall_mean":0.2038949275,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2645652392,"keyword-extraction_SynKeywords_precision_mean":0.2349391249,"keyword-extraction_SynKeywords_recall_mean":0.3166666667,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1217550899,"keyword-extraction_SynKeywords_precision_mean":0.1020894964,"keyword-extraction_SynKeywords_recall_mean":0.1608695652,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":null,"keyword-extraction_SynKeywords_precision_mean":null,"keyword-extraction_SynKeywords_recall_mean":null,"nlu_score":null}
|
leaderboard/boards_data/mt_bench.jsonl
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.5632653061,"score_mean":8.6775,"writing_score_w_mean":7.7037037037,"writing_score_mean":7.675,"roleplay_score_w_mean":8.3928571429,"roleplay_score_mean":8.4166666667,"reasoning_score_w_mean":9.0333333333,"reasoning_score_mean":9.1166666667,"math_score_w_mean":9.652173913,"math_score_mean":9.6,"coding_score_w_mean":9.2,"coding_score_mean":9.2,"extraction_score_w_mean":9.15,"extraction_score_mean":9.15,"stem_score_w_mean":9.0,"stem_score_mean":9.0,"humanities_score_w_mean":9.2,"humanities_score_mean":9.2,"persian_general_knowledge_score_w_mean":5.5925925926,"persian_general_knowledge_score_mean":6.05,"chatbot_rag_score_w_mean":9.3333333333,"chatbot_rag_score_mean":9.3666666667}
|
| 7 |
{"Model Name":"deepseek-reasoner","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5795918367,"score_mean":8.6175,"writing_score_w_mean":8.6296296296,"writing_score_mean":8.65,"roleplay_score_w_mean":8.1785714286,"roleplay_score_mean":8.225,"reasoning_score_w_mean":8.9,"reasoning_score_mean":8.7416666667,"math_score_w_mean":9.3043478261,"math_score_mean":9.2,"coding_score_w_mean":8.75,"coding_score_mean":8.75,"extraction_score_w_mean":8.5,"extraction_score_mean":8.5,"stem_score_w_mean":8.55,"stem_score_mean":8.55,"humanities_score_w_mean":9.15,"humanities_score_mean":9.15,"persian_general_knowledge_score_w_mean":6.8148148148,"persian_general_knowledge_score_mean":7.2416666667,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1666666667}
|
| 8 |
{"Model Name":"deepseek-chat","thinking_method":"β","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5102040816,"score_mean":8.6008333333,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.4916666667,"roleplay_score_w_mean":8.9285714286,"roleplay_score_mean":8.9666666667,"reasoning_score_w_mean":8.3666666667,"reasoning_score_mean":8.3083333333,"math_score_w_mean":9.1304347826,"math_score_mean":9.0,"coding_score_w_mean":9.35,"coding_score_mean":9.35,"extraction_score_w_mean":8.65,"extraction_score_mean":8.65,"stem_score_w_mean":9.05,"stem_score_mean":9.05,"humanities_score_w_mean":9.25,"humanities_score_mean":9.25,"persian_general_knowledge_score_w_mean":5.0740740741,"persian_general_knowledge_score_mean":5.4916666667,"chatbot_rag_score_w_mean":9.4333333333,"chatbot_rag_score_mean":9.45}
|
| 9 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"
|
| 10 |
{"Model Name":"gpt-4.1-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3183673469,"score_mean":8.4183333333,"writing_score_w_mean":8.1111111111,"writing_score_mean":8.125,"roleplay_score_w_mean":8.0714285714,"roleplay_score_mean":8.0333333333,"reasoning_score_w_mean":8.1333333333,"reasoning_score_mean":8.0833333333,"math_score_w_mean":9.4347826087,"math_score_mean":9.35,"coding_score_w_mean":8.85,"coding_score_mean":8.85,"extraction_score_w_mean":8.6,"extraction_score_mean":8.6,"stem_score_w_mean":8.9,"stem_score_mean":8.9,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":5.2222222222,"persian_general_knowledge_score_mean":5.8083333333,"chatbot_rag_score_w_mean":9.4666666667,"chatbot_rag_score_mean":9.4333333333}
|
| 11 |
{"Model Name":"gemini-2.0-flash","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.4522821577,"score_mean":8.3897306397,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.475,"roleplay_score_w_mean":8.6785714286,"roleplay_score_mean":8.65,"reasoning_score_w_mean":8.4,"reasoning_score_mean":8.3333333333,"math_score_w_mean":9.0434782609,"math_score_mean":8.9,"coding_score_w_mean":7.05,"coding_score_mean":7.05,"extraction_score_w_mean":7.6,"extraction_score_mean":7.6,"stem_score_w_mean":8.4,"stem_score_mean":8.4,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":8.4074074074,"persian_general_knowledge_score_mean":8.4166666667,"chatbot_rag_score_w_mean":9.1538461538,"chatbot_rag_score_mean":9.1481481481}
|
| 12 |
{"Model Name":"gpt-4o","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3551020408,"score_mean":8.3716666667,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.175,"roleplay_score_w_mean":7.5,"roleplay_score_mean":7.45,"reasoning_score_w_mean":8.4666666667,"reasoning_score_mean":8.4833333333,"math_score_w_mean":8.7391304348,"math_score_mean":8.8416666667,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.25,"extraction_score_mean":8.25,"stem_score_w_mean":8.6,"stem_score_mean":8.6,"humanities_score_w_mean":9.05,"humanities_score_mean":9.05,"persian_general_knowledge_score_w_mean":7.8888888889,"persian_general_knowledge_score_mean":7.9,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
|
|
|
|
| 6 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.5632653061,"score_mean":8.6775,"writing_score_w_mean":7.7037037037,"writing_score_mean":7.675,"roleplay_score_w_mean":8.3928571429,"roleplay_score_mean":8.4166666667,"reasoning_score_w_mean":9.0333333333,"reasoning_score_mean":9.1166666667,"math_score_w_mean":9.652173913,"math_score_mean":9.6,"coding_score_w_mean":9.2,"coding_score_mean":9.2,"extraction_score_w_mean":9.15,"extraction_score_mean":9.15,"stem_score_w_mean":9.0,"stem_score_mean":9.0,"humanities_score_w_mean":9.2,"humanities_score_mean":9.2,"persian_general_knowledge_score_w_mean":5.5925925926,"persian_general_knowledge_score_mean":6.05,"chatbot_rag_score_w_mean":9.3333333333,"chatbot_rag_score_mean":9.3666666667}
|
| 7 |
{"Model Name":"deepseek-reasoner","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5795918367,"score_mean":8.6175,"writing_score_w_mean":8.6296296296,"writing_score_mean":8.65,"roleplay_score_w_mean":8.1785714286,"roleplay_score_mean":8.225,"reasoning_score_w_mean":8.9,"reasoning_score_mean":8.7416666667,"math_score_w_mean":9.3043478261,"math_score_mean":9.2,"coding_score_w_mean":8.75,"coding_score_mean":8.75,"extraction_score_w_mean":8.5,"extraction_score_mean":8.5,"stem_score_w_mean":8.55,"stem_score_mean":8.55,"humanities_score_w_mean":9.15,"humanities_score_mean":9.15,"persian_general_knowledge_score_w_mean":6.8148148148,"persian_general_knowledge_score_mean":7.2416666667,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1666666667}
|
| 8 |
{"Model Name":"deepseek-chat","thinking_method":"β","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5102040816,"score_mean":8.6008333333,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.4916666667,"roleplay_score_w_mean":8.9285714286,"roleplay_score_mean":8.9666666667,"reasoning_score_w_mean":8.3666666667,"reasoning_score_mean":8.3083333333,"math_score_w_mean":9.1304347826,"math_score_mean":9.0,"coding_score_w_mean":9.35,"coding_score_mean":9.35,"extraction_score_w_mean":8.65,"extraction_score_mean":8.65,"stem_score_w_mean":9.05,"stem_score_mean":9.05,"humanities_score_w_mean":9.25,"humanities_score_mean":9.25,"persian_general_knowledge_score_w_mean":5.0740740741,"persian_general_knowledge_score_mean":5.4916666667,"chatbot_rag_score_w_mean":9.4333333333,"chatbot_rag_score_mean":9.45}
|
| 9 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"120000000000","source_type":"Open-Source","score_w_mean":8.3959183673,"score_mean":8.4333333333,"writing_score_w_mean":8.2222222222,"writing_score_mean":8.1333333333,"roleplay_score_w_mean":7.7142857143,"roleplay_score_mean":7.4916666667,"reasoning_score_w_mean":9.0333333333,"reasoning_score_mean":8.9583333333,"math_score_w_mean":9.3913043478,"math_score_mean":9.3,"coding_score_w_mean":9.45,"coding_score_mean":9.45,"extraction_score_w_mean":8.5,"extraction_score_mean":8.5,"stem_score_w_mean":8.9,"stem_score_mean":8.9,"humanities_score_w_mean":9.1,"humanities_score_mean":9.1,"persian_general_knowledge_score_w_mean":4.5555555556,"persian_general_knowledge_score_mean":4.85,"chatbot_rag_score_w_mean":9.6666666667,"chatbot_rag_score_mean":9.65}
|
| 10 |
{"Model Name":"gpt-4.1-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3183673469,"score_mean":8.4183333333,"writing_score_w_mean":8.1111111111,"writing_score_mean":8.125,"roleplay_score_w_mean":8.0714285714,"roleplay_score_mean":8.0333333333,"reasoning_score_w_mean":8.1333333333,"reasoning_score_mean":8.0833333333,"math_score_w_mean":9.4347826087,"math_score_mean":9.35,"coding_score_w_mean":8.85,"coding_score_mean":8.85,"extraction_score_w_mean":8.6,"extraction_score_mean":8.6,"stem_score_w_mean":8.9,"stem_score_mean":8.9,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":5.2222222222,"persian_general_knowledge_score_mean":5.8083333333,"chatbot_rag_score_w_mean":9.4666666667,"chatbot_rag_score_mean":9.4333333333}
|
| 11 |
{"Model Name":"gemini-2.0-flash","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.4522821577,"score_mean":8.3897306397,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.475,"roleplay_score_w_mean":8.6785714286,"roleplay_score_mean":8.65,"reasoning_score_w_mean":8.4,"reasoning_score_mean":8.3333333333,"math_score_w_mean":9.0434782609,"math_score_mean":8.9,"coding_score_w_mean":7.05,"coding_score_mean":7.05,"extraction_score_w_mean":7.6,"extraction_score_mean":7.6,"stem_score_w_mean":8.4,"stem_score_mean":8.4,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":8.4074074074,"persian_general_knowledge_score_mean":8.4166666667,"chatbot_rag_score_w_mean":9.1538461538,"chatbot_rag_score_mean":9.1481481481}
|
| 12 |
{"Model Name":"gpt-4o","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3551020408,"score_mean":8.3716666667,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.175,"roleplay_score_w_mean":7.5,"roleplay_score_mean":7.45,"reasoning_score_w_mean":8.4666666667,"reasoning_score_mean":8.4833333333,"math_score_w_mean":8.7391304348,"math_score_mean":8.8416666667,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.25,"extraction_score_mean":8.25,"stem_score_w_mean":8.6,"stem_score_mean":8.6,"humanities_score_w_mean":9.05,"humanities_score_mean":9.05,"persian_general_knowledge_score_w_mean":7.8888888889,"persian_general_knowledge_score_mean":7.9,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
|
leaderboard/boards_data/ner_arman.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0031613599,"ner_arman_precision_mean":0.0024235201,"ner_arman_recall_mean":0.0047675805,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5087496797,"ner_arman_precision_mean":0.4749602702,"ner_arman_recall_mean":0.5891338896,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5238146835,"ner_arman_precision_mean":0.511958681,"ner_arman_recall_mean":0.5638855781,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","ner_arman_f1_mean":0.5030535945,"ner_arman_precision_mean":0.4617288155,"ner_arman_recall_mean":0.5952522845,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3097820535,"ner_arman_precision_mean":0.2833333333,"ner_arman_recall_mean":0.3710568137,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":null,"ner_arman_precision_mean":null,"ner_arman_recall_mean":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0031613599,"ner_arman_precision_mean":0.0024235201,"ner_arman_recall_mean":0.0047675805,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5087496797,"ner_arman_precision_mean":0.4749602702,"ner_arman_recall_mean":0.5891338896,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5238146835,"ner_arman_precision_mean":0.511958681,"ner_arman_recall_mean":0.5638855781,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","ner_arman_f1_mean":0.090559262,"ner_arman_precision_mean":0.0812673818,"ner_arman_recall_mean":0.1104290822,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","ner_arman_f1_mean":0.5030535945,"ner_arman_precision_mean":0.4617288155,"ner_arman_recall_mean":0.5952522845,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3097820535,"ner_arman_precision_mean":0.2833333333,"ner_arman_recall_mean":0.3710568137,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":null,"ner_arman_precision_mean":null,"ner_arman_recall_mean":null,"nlu_score":null}
|
leaderboard/boards_data/nli_farstail.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6898976982,"nli_farstail_precision_modified":0.752223151,"nli_farstail_recall_modified":0.691698665,"nli_farstail_fscore_modified":0.6834607357,"nli_farstail_acc":0.6898976982,"nli_farstail_precision":0.752223151,"nli_farstail_recall":0.691698665,"nli_farstail_fscore":0.6834607357,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8542199488,"nli_farstail_precision_modified":0.8634293173,"nli_farstail_recall_modified":0.8539318442,"nli_farstail_fscore_modified":0.8536753404,"nli_farstail_acc":0.8553137004,"nli_farstail_precision":0.8645348606,"nli_farstail_recall":0.8550252268,"nli_farstail_fscore":0.8547683946,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7653452685,"nli_farstail_precision_modified":0.7847248212,"nli_farstail_recall_modified":0.7659002614,"nli_farstail_fscore_modified":0.7629523234,"nli_farstail_acc":0.7653452685,"nli_farstail_precision":0.7847248212,"nli_farstail_recall":0.7659002614,"nli_farstail_fscore":0.7629523234,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7078005115,"nli_farstail_precision_modified":0.7418983007,"nli_farstail_recall_modified":0.70995102,"nli_farstail_fscore_modified":0.6987179454,"nli_farstail_acc":0.7091607944,"nli_farstail_precision":0.7433241143,"nli_farstail_recall":0.7113154358,"nli_farstail_fscore":0.700060773,"nli_farstail_valid_output_ratio":0.9980818414,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8069053708,"nli_farstail_precision_modified":0.8274480721,"nli_farstail_recall_modified":0.8078020735,"nli_farstail_fscore_modified":0.8055860349,"nli_farstail_acc":0.8069053708,"nli_farstail_precision":0.8274480721,"nli_farstail_recall":0.8078020735,"nli_farstail_fscore":0.8055860349,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":null,"nli_farstail_precision_modified":null,"nli_farstail_recall_modified":null,"nli_farstail_fscore_modified":null,"nli_farstail_acc":null,"nli_farstail_precision":null,"nli_farstail_recall":null,"nli_farstail_fscore":null,"nli_farstail_valid_output_ratio":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6898976982,"nli_farstail_precision_modified":0.752223151,"nli_farstail_recall_modified":0.691698665,"nli_farstail_fscore_modified":0.6834607357,"nli_farstail_acc":0.6898976982,"nli_farstail_precision":0.752223151,"nli_farstail_recall":0.691698665,"nli_farstail_fscore":0.6834607357,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8542199488,"nli_farstail_precision_modified":0.8634293173,"nli_farstail_recall_modified":0.8539318442,"nli_farstail_fscore_modified":0.8536753404,"nli_farstail_acc":0.8553137004,"nli_farstail_precision":0.8645348606,"nli_farstail_recall":0.8550252268,"nli_farstail_fscore":0.8547683946,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7653452685,"nli_farstail_precision_modified":0.7847248212,"nli_farstail_recall_modified":0.7659002614,"nli_farstail_fscore_modified":0.7629523234,"nli_farstail_acc":0.7653452685,"nli_farstail_precision":0.7847248212,"nli_farstail_recall":0.7659002614,"nli_farstail_fscore":0.7629523234,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7685421995,"nli_farstail_precision_modified":0.7858737557,"nli_farstail_recall_modified":0.7699859437,"nli_farstail_fscore_modified":0.7654192549,"nli_farstail_acc":0.7685421995,"nli_farstail_precision":0.7858737557,"nli_farstail_recall":0.7699859437,"nli_farstail_fscore":0.7654192549,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7078005115,"nli_farstail_precision_modified":0.7418983007,"nli_farstail_recall_modified":0.70995102,"nli_farstail_fscore_modified":0.6987179454,"nli_farstail_acc":0.7091607944,"nli_farstail_precision":0.7433241143,"nli_farstail_recall":0.7113154358,"nli_farstail_fscore":0.700060773,"nli_farstail_valid_output_ratio":0.9980818414,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8069053708,"nli_farstail_precision_modified":0.8274480721,"nli_farstail_recall_modified":0.8078020735,"nli_farstail_fscore_modified":0.8055860349,"nli_farstail_acc":0.8069053708,"nli_farstail_precision":0.8274480721,"nli_farstail_recall":0.8078020735,"nli_farstail_fscore":0.8055860349,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":null,"nli_farstail_precision_modified":null,"nli_farstail_recall_modified":null,"nli_farstail_fscore_modified":null,"nli_farstail_acc":null,"nli_farstail_precision":null,"nli_farstail_recall":null,"nli_farstail_fscore":null,"nli_farstail_valid_output_ratio":null,"nlu_score":null}
|
leaderboard/boards_data/paraphrase-detection_FarsiParaphraseDetection.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8378033206,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8876229723,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8118030537,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8221408221,"paraphrase-detection_FarsiParaphraseDetection_acc":0.841025641,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8910369069,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8149253731,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8253029022,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9961685824,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":null,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":null,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":null,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":null,"paraphrase-detection_FarsiParaphraseDetection_acc":null,"paraphrase-detection_FarsiParaphraseDetection_precision":null,"paraphrase-detection_FarsiParaphraseDetection_recall":null,"paraphrase-detection_FarsiParaphraseDetection_fscore":null,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8378033206,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8876229723,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8118030537,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8221408221,"paraphrase-detection_FarsiParaphraseDetection_acc":0.841025641,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8910369069,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8149253731,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8253029022,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9961685824,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.840357599,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8909249564,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8134328358,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8241066053,"paraphrase-detection_FarsiParaphraseDetection_acc":0.840357599,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8909249564,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8134328358,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8241066053,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":null,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":null,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":null,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":null,"paraphrase-detection_FarsiParaphraseDetection_acc":null,"paraphrase-detection_FarsiParaphraseDetection_precision":null,"paraphrase-detection_FarsiParaphraseDetection_recall":null,"paraphrase-detection_FarsiParaphraseDetection_fscore":null,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":null,"nlu_score":null}
|
leaderboard/boards_data/paraphrase-detection_parsinlu.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.872,"paraphrase-detection_parsinlu_precision_modified":0.872593554,"paraphrase-detection_parsinlu_recall_modified":0.8654426765,"paraphrase-detection_parsinlu_fscore_modified":0.8682824025,"paraphrase-detection_parsinlu_acc":0.872,"paraphrase-detection_parsinlu_precision":0.872593554,"paraphrase-detection_parsinlu_recall":0.8654426765,"paraphrase-detection_parsinlu_fscore":0.8682824025,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.812,"paraphrase-detection_parsinlu_precision_modified":0.8560421519,"paraphrase-detection_parsinlu_recall_modified":0.7853480321,"paraphrase-detection_parsinlu_fscore_modified":0.7936411239,"paraphrase-detection_parsinlu_acc":0.8152610442,"paraphrase-detection_parsinlu_precision":0.8594800722,"paraphrase-detection_parsinlu_recall":0.7885020403,"paraphrase-detection_parsinlu_fscore":0.7968284376,"paraphrase-detection_parsinlu_valid_output_ratio":0.996,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.78,"paraphrase-detection_parsinlu_precision_modified":0.8453499806,"paraphrase-detection_parsinlu_recall_modified":0.7464708282,"paraphrase-detection_parsinlu_fscore_modified":0.7504718351,"paraphrase-detection_parsinlu_acc":0.78,"paraphrase-detection_parsinlu_precision":0.8453499806,"paraphrase-detection_parsinlu_recall":0.7464708282,"paraphrase-detection_parsinlu_fscore":0.7504718351,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.866,"paraphrase-detection_parsinlu_precision_modified":0.8771198269,"paraphrase-detection_parsinlu_recall_modified":0.852753978,"paraphrase-detection_parsinlu_fscore_modified":0.8595688134,"paraphrase-detection_parsinlu_acc":0.866,"paraphrase-detection_parsinlu_precision":0.8771198269,"paraphrase-detection_parsinlu_recall":0.852753978,"paraphrase-detection_parsinlu_fscore":0.8595688134,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.861267166,"paraphrase-detection_parsinlu_recall_modified":0.802243982,"paraphrase-detection_parsinlu_fscore_modified":0.8109673691,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.861267166,"paraphrase-detection_parsinlu_recall":0.802243982,"paraphrase-detection_parsinlu_fscore":0.8109673691,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":null,"paraphrase-detection_parsinlu_precision_modified":null,"paraphrase-detection_parsinlu_recall_modified":null,"paraphrase-detection_parsinlu_fscore_modified":null,"paraphrase-detection_parsinlu_acc":null,"paraphrase-detection_parsinlu_precision":null,"paraphrase-detection_parsinlu_recall":null,"paraphrase-detection_parsinlu_fscore":null,"paraphrase-detection_parsinlu_valid_output_ratio":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.872,"paraphrase-detection_parsinlu_precision_modified":0.872593554,"paraphrase-detection_parsinlu_recall_modified":0.8654426765,"paraphrase-detection_parsinlu_fscore_modified":0.8682824025,"paraphrase-detection_parsinlu_acc":0.872,"paraphrase-detection_parsinlu_precision":0.872593554,"paraphrase-detection_parsinlu_recall":0.8654426765,"paraphrase-detection_parsinlu_fscore":0.8682824025,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.812,"paraphrase-detection_parsinlu_precision_modified":0.8560421519,"paraphrase-detection_parsinlu_recall_modified":0.7853480321,"paraphrase-detection_parsinlu_fscore_modified":0.7936411239,"paraphrase-detection_parsinlu_acc":0.8152610442,"paraphrase-detection_parsinlu_precision":0.8594800722,"paraphrase-detection_parsinlu_recall":0.7885020403,"paraphrase-detection_parsinlu_fscore":0.7968284376,"paraphrase-detection_parsinlu_valid_output_ratio":0.996,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.78,"paraphrase-detection_parsinlu_precision_modified":0.8453499806,"paraphrase-detection_parsinlu_recall_modified":0.7464708282,"paraphrase-detection_parsinlu_fscore_modified":0.7504718351,"paraphrase-detection_parsinlu_acc":0.78,"paraphrase-detection_parsinlu_precision":0.8453499806,"paraphrase-detection_parsinlu_recall":0.7464708282,"paraphrase-detection_parsinlu_fscore":0.7504718351,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.792,"paraphrase-detection_parsinlu_precision_modified":0.8370839341,"paraphrase-detection_parsinlu_recall_modified":0.7632802938,"paraphrase-detection_parsinlu_fscore_modified":0.7695689166,"paraphrase-detection_parsinlu_acc":0.792,"paraphrase-detection_parsinlu_precision":0.8370839341,"paraphrase-detection_parsinlu_recall":0.7632802938,"paraphrase-detection_parsinlu_fscore":0.7695689166,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.866,"paraphrase-detection_parsinlu_precision_modified":0.8771198269,"paraphrase-detection_parsinlu_recall_modified":0.852753978,"paraphrase-detection_parsinlu_fscore_modified":0.8595688134,"paraphrase-detection_parsinlu_acc":0.866,"paraphrase-detection_parsinlu_precision":0.8771198269,"paraphrase-detection_parsinlu_recall":0.852753978,"paraphrase-detection_parsinlu_fscore":0.8595688134,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.861267166,"paraphrase-detection_parsinlu_recall_modified":0.802243982,"paraphrase-detection_parsinlu_fscore_modified":0.8109673691,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.861267166,"paraphrase-detection_parsinlu_recall":0.802243982,"paraphrase-detection_parsinlu_fscore":0.8109673691,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":null,"paraphrase-detection_parsinlu_precision_modified":null,"paraphrase-detection_parsinlu_recall_modified":null,"paraphrase-detection_parsinlu_fscore_modified":null,"paraphrase-detection_parsinlu_acc":null,"paraphrase-detection_parsinlu_precision":null,"paraphrase-detection_parsinlu_recall":null,"paraphrase-detection_parsinlu_fscore":null,"paraphrase-detection_parsinlu_valid_output_ratio":null,"nlu_score":null}
|
leaderboard/boards_data/persian_csr.jsonl
CHANGED
|
@@ -13,16 +13,16 @@
|
|
| 13 |
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"β","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.798859772,"acc_strict":0.7983596719,"donyaeeqtesad_acc":0.7673716012,"isna_acc":0.76953125,"ninisite_article_acc":0.7696476965,"virgool_4_acc":0.8398791541,"khabaronline_acc":0.78,"digiato_acc":0.7870563674,"doctoreto_acc":0.82,"sarzamindownload_acc":0.7712418301,"hamgardi_acc":0.7610619469,"bigbangpage_acc":0.8789808917,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.8119402985,"virgool_2_acc":0.8226299694,"virgool_1_acc":0.8037974684,"hamshahrionline_acc":0.8557377049,"tabnak_acc":0.8298755187,"alibaba_acc":0.8562091503,"digikala_mag_acc":0.8152610442,"yjc_acc":0.7471264368,"beytoote_acc":0.8005540166,"asriran_acc":0.7922705314,"ecoiran_acc":0.7333333333,"hawzah_acc":0.8342696629,"zoomit_acc":0.8427230047,"wikipedia_acc":0.9095238095,"namnak_acc":0.7738419619,"khodro45_acc":0.8088235294,"fidibo_acc":0.845814978,"newmiind_acc":0.7222222222,"taaghche_acc":0.8397435897,"motamem_acc":0.8947368421,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.7338709677,"tasnim_acc":0.7730769231,"magerta_acc":0.6974789916,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.6578947368,"parsiday_acc":0.7166666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
| 14 |
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"β","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","acc":0.7956,"acc_strict":0.1123,"donyaeeqtesad_acc":0.7764350453,"isna_acc":0.76171875,"ninisite_article_acc":0.772972973,"virgool_4_acc":0.8549848943,"khabaronline_acc":0.78,"digiato_acc":0.7954070981,"doctoreto_acc":0.755,"sarzamindownload_acc":0.7385620915,"hamgardi_acc":0.7492625369,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8421052632,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8287461774,"virgool_1_acc":0.8259493671,"hamshahrionline_acc":0.862295082,"tabnak_acc":0.8257261411,"alibaba_acc":0.8366013072,"digikala_mag_acc":0.8076152305,"yjc_acc":0.7356321839,"beytoote_acc":0.7922437673,"asriran_acc":0.7874396135,"ecoiran_acc":0.7142857143,"hawzah_acc":0.845505618,"zoomit_acc":0.8403755869,"wikipedia_acc":0.9047619048,"namnak_acc":0.7874659401,"khodro45_acc":0.7941176471,"fidibo_acc":0.8414096916,"newmiind_acc":0.7465277778,"taaghche_acc":0.8076923077,"motamem_acc":0.8947368421,"varzesh3_acc":0.7959866221,"mehrnews_acc":0.7419354839,"tasnim_acc":0.7346153846,"magerta_acc":0.6848739496,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8787878788,"voolak_acc":0.7906976744,"farsroid_acc":0.6578947368,"parsiday_acc":0.7583333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
|
| 15 |
{"Model Name":"llama4:scout","thinking_method":"β","model_url":"https_google.com","parameters_count":"109000000000","source_type":"Open-Source","acc":0.7814,"acc_strict":0.1472,"donyaeeqtesad_acc":0.7945619335,"isna_acc":0.7421875,"ninisite_article_acc":0.7486486486,"virgool_4_acc":0.8157099698,"khabaronline_acc":0.756,"digiato_acc":0.7933194154,"doctoreto_acc":0.8,"sarzamindownload_acc":0.7189542484,"hamgardi_acc":0.7197640118,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8421052632,"virgool_3_acc":0.7850746269,"virgool_2_acc":0.8073394495,"virgool_1_acc":0.7784810127,"hamshahrionline_acc":0.8557377049,"tabnak_acc":0.8132780083,"alibaba_acc":0.8006535948,"digikala_mag_acc":0.7715430862,"yjc_acc":0.7471264368,"beytoote_acc":0.7700831025,"asriran_acc":0.7777777778,"ecoiran_acc":0.7523809524,"hawzah_acc":0.7949438202,"zoomit_acc":0.8192488263,"wikipedia_acc":0.8428571429,"namnak_acc":0.7820163488,"khodro45_acc":0.8088235294,"fidibo_acc":0.7929515419,"newmiind_acc":0.6979166667,"taaghche_acc":0.8269230769,"motamem_acc":0.8842105263,"varzesh3_acc":0.8160535117,"mehrnews_acc":0.7258064516,"tasnim_acc":0.7769230769,"magerta_acc":0.7100840336,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.7631578947,"parsiday_acc":0.6916666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
|
|
|
| 16 |
{"Model Name":"gpt-4.1-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7712,"acc_strict":0.7712,"donyaeeqtesad_acc":0.7703927492,"isna_acc":0.75390625,"ninisite_article_acc":0.7648648649,"virgool_4_acc":0.8247734139,"khabaronline_acc":0.76,"digiato_acc":0.7745302714,"doctoreto_acc":0.785,"sarzamindownload_acc":0.7581699346,"hamgardi_acc":0.6784660767,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8223684211,"virgool_3_acc":0.7910447761,"virgool_2_acc":0.7920489297,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8295081967,"tabnak_acc":0.7634854772,"alibaba_acc":0.7973856209,"digikala_mag_acc":0.8056112224,"yjc_acc":0.724137931,"beytoote_acc":0.7783933518,"asriran_acc":0.7777777778,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7724719101,"zoomit_acc":0.8098591549,"wikipedia_acc":0.8761904762,"namnak_acc":0.7547683924,"khodro45_acc":0.7941176471,"fidibo_acc":0.7841409692,"newmiind_acc":0.6875,"taaghche_acc":0.8269230769,"motamem_acc":0.8631578947,"varzesh3_acc":0.7926421405,"mehrnews_acc":0.7056451613,"tasnim_acc":0.7076923077,"magerta_acc":0.6890756303,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8181818182,"voolak_acc":0.6279069767,"farsroid_acc":0.6578947368,"parsiday_acc":0.7083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.5}
|
| 17 |
{"Model Name":"Qwen3-32B","thinking_method":"β","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","acc":0.7654,"acc_strict":0.7653,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.75,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.7824773414,"khabaronline_acc":0.724,"digiato_acc":0.8037578288,"doctoreto_acc":0.8,"sarzamindownload_acc":0.7450980392,"hamgardi_acc":0.6991150442,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.8157894737,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8073394495,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8,"tabnak_acc":0.7634854772,"alibaba_acc":0.8039215686,"digikala_mag_acc":0.7875751503,"yjc_acc":0.6896551724,"beytoote_acc":0.7783933518,"asriran_acc":0.7632850242,"ecoiran_acc":0.6793650794,"hawzah_acc":0.7724719101,"zoomit_acc":0.8215962441,"wikipedia_acc":0.8523809524,"namnak_acc":0.7520435967,"khodro45_acc":0.8088235294,"fidibo_acc":0.7665198238,"newmiind_acc":0.6909722222,"taaghche_acc":0.7564102564,"motamem_acc":0.8736842105,"varzesh3_acc":0.762541806,"mehrnews_acc":0.689516129,"tasnim_acc":0.7192307692,"magerta_acc":0.7268907563,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8484848485,"voolak_acc":0.6744186047,"farsroid_acc":0.6578947368,"parsiday_acc":0.675,"soft98_acc":0.7,"ninisite_discussion_acc":0.7}
|
| 18 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","acc":0.7643,"acc_strict":0.7643,"donyaeeqtesad_acc":0.7039274924,"isna_acc":0.73828125,"ninisite_article_acc":0.7567567568,"virgool_4_acc":0.8096676737,"khabaronline_acc":0.688,"digiato_acc":0.7703549061,"doctoreto_acc":0.84,"sarzamindownload_acc":0.7712418301,"hamgardi_acc":0.7256637168,"bigbangpage_acc":0.7898089172,"wiki_ahlolbait_acc":0.7697368421,"virgool_3_acc":0.8149253731,"virgool_2_acc":0.8195718654,"virgool_1_acc":0.7879746835,"hamshahrionline_acc":0.7573770492,"tabnak_acc":0.7136929461,"alibaba_acc":0.7712418301,"digikala_mag_acc":0.8016032064,"yjc_acc":0.7011494253,"beytoote_acc":0.7950138504,"asriran_acc":0.7246376812,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7556179775,"zoomit_acc":0.7957746479,"wikipedia_acc":0.9,"namnak_acc":0.7547683924,"khodro45_acc":0.8014705882,"fidibo_acc":0.7885462555,"newmiind_acc":0.7465277778,"taaghche_acc":0.7820512821,"motamem_acc":0.8631578947,"varzesh3_acc":0.7257525084,"mehrnews_acc":0.6814516129,"tasnim_acc":0.7269230769,"magerta_acc":0.7731092437,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.7674418605,"farsroid_acc":0.6842105263,"parsiday_acc":0.6916666667,"soft98_acc":0.9,"ninisite_discussion_acc":0.3}
|
| 19 |
{"Model Name":"gemma-3-27b-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","acc":0.7628,"acc_strict":0.7628,"donyaeeqtesad_acc":0.6978851964,"isna_acc":0.7265625,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.8187311178,"khabaronline_acc":0.74,"digiato_acc":0.7661795407,"doctoreto_acc":0.78,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7109144543,"bigbangpage_acc":0.821656051,"wiki_ahlolbait_acc":0.8026315789,"virgool_3_acc":0.7940298507,"virgool_2_acc":0.755351682,"virgool_1_acc":0.7784810127,"hamshahrionline_acc":0.8229508197,"tabnak_acc":0.8174273859,"alibaba_acc":0.7843137255,"digikala_mag_acc":0.7975951904,"yjc_acc":0.7126436782,"beytoote_acc":0.7534626039,"asriran_acc":0.7391304348,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7752808989,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8428571429,"namnak_acc":0.7493188011,"khodro45_acc":0.7867647059,"fidibo_acc":0.8237885463,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8315789474,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.6975806452,"tasnim_acc":0.7307692308,"magerta_acc":0.6722689076,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":0.8461538462,"wikishia_acc":0.9393939394,"voolak_acc":0.6976744186,"farsroid_acc":0.6315789474,"parsiday_acc":0.7,"soft98_acc":0.8,"ninisite_discussion_acc":0.7}
|
| 20 |
{"Model Name":"gpt-4o-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7598,"acc_strict":0.7598,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.71484375,"ninisite_article_acc":0.7351351351,"virgool_4_acc":0.8006042296,"khabaronline_acc":0.736,"digiato_acc":0.7599164927,"doctoreto_acc":0.775,"sarzamindownload_acc":0.6535947712,"hamgardi_acc":0.7197640118,"bigbangpage_acc":0.7961783439,"wiki_ahlolbait_acc":0.8289473684,"virgool_3_acc":0.7492537313,"virgool_2_acc":0.7828746177,"virgool_1_acc":0.8006329114,"hamshahrionline_acc":0.8131147541,"tabnak_acc":0.7427385892,"alibaba_acc":0.7810457516,"digikala_mag_acc":0.7615230461,"yjc_acc":0.7643678161,"beytoote_acc":0.7783933518,"asriran_acc":0.7536231884,"ecoiran_acc":0.6952380952,"hawzah_acc":0.7668539326,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8761904762,"namnak_acc":0.7765667575,"khodro45_acc":0.7573529412,"fidibo_acc":0.7621145374,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8736842105,"varzesh3_acc":0.8060200669,"mehrnews_acc":0.6733870968,"tasnim_acc":0.75,"magerta_acc":0.6764705882,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.7631578947,"parsiday_acc":0.7083333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 21 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","acc":0.7364,"acc_strict":0.7364,"donyaeeqtesad_acc":0.752265861,"isna_acc":0.67578125,"ninisite_article_acc":0.7054054054,"virgool_4_acc":0.746223565,"khabaronline_acc":0.724,"digiato_acc":0.7223382046,"doctoreto_acc":0.7,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7020648968,"bigbangpage_acc":0.8089171975,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7339449541,"virgool_1_acc":0.7246835443,"hamshahrionline_acc":0.8262295082,"tabnak_acc":0.7178423237,"alibaba_acc":0.7712418301,"digikala_mag_acc":0.7715430862,"yjc_acc":0.7183908046,"beytoote_acc":0.7479224377,"asriran_acc":0.768115942,"ecoiran_acc":0.6698412698,"hawzah_acc":0.7415730337,"zoomit_acc":0.79342723,"wikipedia_acc":0.819047619,"namnak_acc":0.7220708447,"khodro45_acc":0.75,"fidibo_acc":0.7665198238,"newmiind_acc":0.6631944444,"taaghche_acc":0.7820512821,"motamem_acc":0.8631578947,"varzesh3_acc":0.7525083612,"mehrnews_acc":0.6653225806,"tasnim_acc":0.75,"magerta_acc":0.6134453782,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.5,"parsiday_acc":0.7083333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 22 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.731,"acc_strict":0.7309,"donyaeeqtesad_acc":0.664652568,"isna_acc":0.69921875,"ninisite_article_acc":0.7540540541,"virgool_4_acc":0.7764350453,"khabaronline_acc":0.712,"digiato_acc":0.7265135699,"doctoreto_acc":0.78,"sarzamindownload_acc":0.7124183007,"hamgardi_acc":0.6814159292,"bigbangpage_acc":0.7770700637,"wiki_ahlolbait_acc":0.8157894737,"virgool_3_acc":0.7701492537,"virgool_2_acc":0.752293578,"virgool_1_acc":0.7784810127,"hamshahrionline_acc":0.7442622951,"tabnak_acc":0.7302904564,"alibaba_acc":0.7679738562,"digikala_mag_acc":0.7855711423,"yjc_acc":0.6781609195,"beytoote_acc":0.7423822715,"asriran_acc":0.6811594203,"ecoiran_acc":0.6857142857,"hawzah_acc":0.6882022472,"zoomit_acc":0.7417840376,"wikipedia_acc":0.8333333333,"namnak_acc":0.7166212534,"khodro45_acc":0.7720588235,"fidibo_acc":0.7400881057,"newmiind_acc":0.6736111111,"taaghche_acc":0.7179487179,"motamem_acc":0.8210526316,"varzesh3_acc":0.6956521739,"mehrnews_acc":0.689516129,"tasnim_acc":0.7269230769,"magerta_acc":0.6974789916,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.5263157895,"parsiday_acc":0.675,"soft98_acc":0.7,"ninisite_discussion_acc":0.5}
|
| 23 |
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7160432086,"acc_strict":0.7160432086,"donyaeeqtesad_acc":0.6888217523,"isna_acc":0.69140625,"ninisite_article_acc":0.7,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.712,"digiato_acc":0.7181628392,"doctoreto_acc":0.745,"sarzamindownload_acc":0.6013071895,"hamgardi_acc":0.6755162242,"bigbangpage_acc":0.7770700637,"wiki_ahlolbait_acc":0.8684210526,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7064220183,"virgool_1_acc":0.7056962025,"hamshahrionline_acc":0.7803278689,"tabnak_acc":0.6887966805,"alibaba_acc":0.7483660131,"digikala_mag_acc":0.7434869739,"yjc_acc":0.6724137931,"beytoote_acc":0.728531856,"asriran_acc":0.7487922705,"ecoiran_acc":0.6761904762,"hawzah_acc":0.7584269663,"zoomit_acc":0.7558685446,"wikipedia_acc":0.780952381,"namnak_acc":0.7002724796,"khodro45_acc":0.7279411765,"fidibo_acc":0.7665198238,"newmiind_acc":0.6202090592,"taaghche_acc":0.7628205128,"motamem_acc":0.8404255319,"varzesh3_acc":0.7324414716,"mehrnews_acc":0.6169354839,"tasnim_acc":0.6923076923,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.7878787879,"voolak_acc":0.6511627907,"farsroid_acc":0.7105263158,"parsiday_acc":0.575,"soft98_acc":0.9,"ninisite_discussion_acc":0.3}
|
| 24 |
{"Model Name":"gemma-3-12b-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","acc":0.7094,"acc_strict":0.7094,"donyaeeqtesad_acc":0.6586102719,"isna_acc":0.65625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7643504532,"khabaronline_acc":0.704,"digiato_acc":0.7369519833,"doctoreto_acc":0.76,"sarzamindownload_acc":0.6797385621,"hamgardi_acc":0.6666666667,"bigbangpage_acc":0.7515923567,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7223880597,"virgool_2_acc":0.7584097859,"virgool_1_acc":0.7183544304,"hamshahrionline_acc":0.7213114754,"tabnak_acc":0.7219917012,"alibaba_acc":0.6830065359,"digikala_mag_acc":0.7354709419,"yjc_acc":0.6206896552,"beytoote_acc":0.7146814404,"asriran_acc":0.7198067633,"ecoiran_acc":0.6603174603,"hawzah_acc":0.702247191,"zoomit_acc":0.7323943662,"wikipedia_acc":0.7714285714,"namnak_acc":0.7329700272,"khodro45_acc":0.7352941176,"fidibo_acc":0.718061674,"newmiind_acc":0.6493055556,"taaghche_acc":0.7564102564,"motamem_acc":0.8210526316,"varzesh3_acc":0.7157190635,"mehrnews_acc":0.6088709677,"tasnim_acc":0.6576923077,"magerta_acc":0.6302521008,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6976744186,"farsroid_acc":0.7368421053,"parsiday_acc":0.6583333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.8}
|
| 25 |
-
{"Model Name":"gpt-oss:20b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","acc":0.
|
| 26 |
{"Model Name":"Qwen3-14B","thinking_method":"β","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","acc":0.6958,"acc_strict":0.6958,"donyaeeqtesad_acc":0.6495468278,"isna_acc":0.62890625,"ninisite_article_acc":0.6972972973,"virgool_4_acc":0.7069486405,"khabaronline_acc":0.652,"digiato_acc":0.7202505219,"doctoreto_acc":0.77,"sarzamindownload_acc":0.614379085,"hamgardi_acc":0.6430678466,"bigbangpage_acc":0.7579617834,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7373134328,"virgool_2_acc":0.7155963303,"virgool_1_acc":0.7278481013,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6970954357,"alibaba_acc":0.7254901961,"digikala_mag_acc":0.7074148297,"yjc_acc":0.6379310345,"beytoote_acc":0.6842105263,"asriran_acc":0.6859903382,"ecoiran_acc":0.653968254,"hawzah_acc":0.7078651685,"zoomit_acc":0.7676056338,"wikipedia_acc":0.8142857143,"namnak_acc":0.6621253406,"khodro45_acc":0.7647058824,"fidibo_acc":0.731277533,"newmiind_acc":0.6597222222,"taaghche_acc":0.6987179487,"motamem_acc":0.8105263158,"varzesh3_acc":0.6220735786,"mehrnews_acc":0.625,"tasnim_acc":0.6692307692,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6279069767,"farsroid_acc":0.6052631579,"parsiday_acc":0.5666666667,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 27 |
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"β","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","acc":0.6894,"acc_strict":0.6894,"donyaeeqtesad_acc":0.670694864,"isna_acc":0.63671875,"ninisite_article_acc":0.6945945946,"virgool_4_acc":0.7039274924,"khabaronline_acc":0.664,"digiato_acc":0.6826722338,"doctoreto_acc":0.755,"sarzamindownload_acc":0.6339869281,"hamgardi_acc":0.6342182891,"bigbangpage_acc":0.7452229299,"wiki_ahlolbait_acc":0.7697368421,"virgool_3_acc":0.7014925373,"virgool_2_acc":0.7125382263,"virgool_1_acc":0.7341772152,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6307053942,"alibaba_acc":0.7647058824,"digikala_mag_acc":0.7174348697,"yjc_acc":0.5804597701,"beytoote_acc":0.6814404432,"asriran_acc":0.6811594203,"ecoiran_acc":0.6158730159,"hawzah_acc":0.6994382022,"zoomit_acc":0.7441314554,"wikipedia_acc":0.8333333333,"namnak_acc":0.659400545,"khodro45_acc":0.7058823529,"fidibo_acc":0.7268722467,"newmiind_acc":0.6527777778,"taaghche_acc":0.7051282051,"motamem_acc":0.8526315789,"varzesh3_acc":0.6789297659,"mehrnews_acc":0.5887096774,"tasnim_acc":0.6692307692,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.4347826087,"vipofilm_acc":0.6923076923,"wikishia_acc":0.7575757576,"voolak_acc":0.6511627907,"farsroid_acc":0.6842105263,"parsiday_acc":0.55,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 28 |
{"Model Name":"Qwen3-30B-A3B","thinking_method":"β","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","acc":0.688,"acc_strict":0.688,"donyaeeqtesad_acc":0.6465256798,"isna_acc":0.6640625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.668,"digiato_acc":0.6764091858,"doctoreto_acc":0.765,"sarzamindownload_acc":0.7058823529,"hamgardi_acc":0.6519174041,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.7368421053,"virgool_3_acc":0.7134328358,"virgool_2_acc":0.7003058104,"virgool_1_acc":0.7025316456,"hamshahrionline_acc":0.6819672131,"tabnak_acc":0.7012448133,"alibaba_acc":0.7189542484,"digikala_mag_acc":0.6753507014,"yjc_acc":0.632183908,"beytoote_acc":0.6703601108,"asriran_acc":0.652173913,"ecoiran_acc":0.6126984127,"hawzah_acc":0.7387640449,"zoomit_acc":0.7300469484,"wikipedia_acc":0.7904761905,"namnak_acc":0.6920980926,"khodro45_acc":0.7279411765,"fidibo_acc":0.6872246696,"newmiind_acc":0.6631944444,"taaghche_acc":0.6858974359,"motamem_acc":0.8,"varzesh3_acc":0.6120401338,"mehrnews_acc":0.6129032258,"tasnim_acc":0.65,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.4782608696,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.6578947368,"parsiday_acc":0.6,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
|
|
|
| 13 |
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"β","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.798859772,"acc_strict":0.7983596719,"donyaeeqtesad_acc":0.7673716012,"isna_acc":0.76953125,"ninisite_article_acc":0.7696476965,"virgool_4_acc":0.8398791541,"khabaronline_acc":0.78,"digiato_acc":0.7870563674,"doctoreto_acc":0.82,"sarzamindownload_acc":0.7712418301,"hamgardi_acc":0.7610619469,"bigbangpage_acc":0.8789808917,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.8119402985,"virgool_2_acc":0.8226299694,"virgool_1_acc":0.8037974684,"hamshahrionline_acc":0.8557377049,"tabnak_acc":0.8298755187,"alibaba_acc":0.8562091503,"digikala_mag_acc":0.8152610442,"yjc_acc":0.7471264368,"beytoote_acc":0.8005540166,"asriran_acc":0.7922705314,"ecoiran_acc":0.7333333333,"hawzah_acc":0.8342696629,"zoomit_acc":0.8427230047,"wikipedia_acc":0.9095238095,"namnak_acc":0.7738419619,"khodro45_acc":0.8088235294,"fidibo_acc":0.845814978,"newmiind_acc":0.7222222222,"taaghche_acc":0.8397435897,"motamem_acc":0.8947368421,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.7338709677,"tasnim_acc":0.7730769231,"magerta_acc":0.6974789916,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.6578947368,"parsiday_acc":0.7166666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
| 14 |
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"β","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","acc":0.7956,"acc_strict":0.1123,"donyaeeqtesad_acc":0.7764350453,"isna_acc":0.76171875,"ninisite_article_acc":0.772972973,"virgool_4_acc":0.8549848943,"khabaronline_acc":0.78,"digiato_acc":0.7954070981,"doctoreto_acc":0.755,"sarzamindownload_acc":0.7385620915,"hamgardi_acc":0.7492625369,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8421052632,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8287461774,"virgool_1_acc":0.8259493671,"hamshahrionline_acc":0.862295082,"tabnak_acc":0.8257261411,"alibaba_acc":0.8366013072,"digikala_mag_acc":0.8076152305,"yjc_acc":0.7356321839,"beytoote_acc":0.7922437673,"asriran_acc":0.7874396135,"ecoiran_acc":0.7142857143,"hawzah_acc":0.845505618,"zoomit_acc":0.8403755869,"wikipedia_acc":0.9047619048,"namnak_acc":0.7874659401,"khodro45_acc":0.7941176471,"fidibo_acc":0.8414096916,"newmiind_acc":0.7465277778,"taaghche_acc":0.8076923077,"motamem_acc":0.8947368421,"varzesh3_acc":0.7959866221,"mehrnews_acc":0.7419354839,"tasnim_acc":0.7346153846,"magerta_acc":0.6848739496,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8787878788,"voolak_acc":0.7906976744,"farsroid_acc":0.6578947368,"parsiday_acc":0.7583333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
|
| 15 |
{"Model Name":"llama4:scout","thinking_method":"β","model_url":"https_google.com","parameters_count":"109000000000","source_type":"Open-Source","acc":0.7814,"acc_strict":0.1472,"donyaeeqtesad_acc":0.7945619335,"isna_acc":0.7421875,"ninisite_article_acc":0.7486486486,"virgool_4_acc":0.8157099698,"khabaronline_acc":0.756,"digiato_acc":0.7933194154,"doctoreto_acc":0.8,"sarzamindownload_acc":0.7189542484,"hamgardi_acc":0.7197640118,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8421052632,"virgool_3_acc":0.7850746269,"virgool_2_acc":0.8073394495,"virgool_1_acc":0.7784810127,"hamshahrionline_acc":0.8557377049,"tabnak_acc":0.8132780083,"alibaba_acc":0.8006535948,"digikala_mag_acc":0.7715430862,"yjc_acc":0.7471264368,"beytoote_acc":0.7700831025,"asriran_acc":0.7777777778,"ecoiran_acc":0.7523809524,"hawzah_acc":0.7949438202,"zoomit_acc":0.8192488263,"wikipedia_acc":0.8428571429,"namnak_acc":0.7820163488,"khodro45_acc":0.8088235294,"fidibo_acc":0.7929515419,"newmiind_acc":0.6979166667,"taaghche_acc":0.8269230769,"motamem_acc":0.8842105263,"varzesh3_acc":0.8160535117,"mehrnews_acc":0.7258064516,"tasnim_acc":0.7769230769,"magerta_acc":0.7100840336,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.7631578947,"parsiday_acc":0.6916666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
| 16 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"120000000000","source_type":"Open-Source","acc":0.7714,"acc_strict":0.7714,"donyaeeqtesad_acc":0.7432024169,"isna_acc":0.765625,"ninisite_article_acc":0.7783783784,"virgool_4_acc":0.8157099698,"khabaronline_acc":0.732,"digiato_acc":0.7849686848,"doctoreto_acc":0.79,"sarzamindownload_acc":0.8235294118,"hamgardi_acc":0.7256637168,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.7763157895,"virgool_3_acc":0.7880597015,"virgool_2_acc":0.7859327217,"virgool_1_acc":0.7848101266,"hamshahrionline_acc":0.7868852459,"tabnak_acc":0.7427385892,"alibaba_acc":0.7679738562,"digikala_mag_acc":0.8236472946,"yjc_acc":0.7183908046,"beytoote_acc":0.8033240997,"asriran_acc":0.7246376812,"ecoiran_acc":0.6984126984,"hawzah_acc":0.7556179775,"zoomit_acc":0.8122065728,"wikipedia_acc":0.8952380952,"namnak_acc":0.7465940054,"khodro45_acc":0.8382352941,"fidibo_acc":0.7709251101,"newmiind_acc":0.7465277778,"taaghche_acc":0.7884615385,"motamem_acc":0.8210526316,"varzesh3_acc":0.7391304348,"mehrnews_acc":0.7016129032,"tasnim_acc":0.7461538462,"magerta_acc":0.768907563,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.7209302326,"farsroid_acc":0.7105263158,"parsiday_acc":0.6916666667,"soft98_acc":0.7,"ninisite_discussion_acc":0.6}
|
| 17 |
{"Model Name":"gpt-4.1-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7712,"acc_strict":0.7712,"donyaeeqtesad_acc":0.7703927492,"isna_acc":0.75390625,"ninisite_article_acc":0.7648648649,"virgool_4_acc":0.8247734139,"khabaronline_acc":0.76,"digiato_acc":0.7745302714,"doctoreto_acc":0.785,"sarzamindownload_acc":0.7581699346,"hamgardi_acc":0.6784660767,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8223684211,"virgool_3_acc":0.7910447761,"virgool_2_acc":0.7920489297,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8295081967,"tabnak_acc":0.7634854772,"alibaba_acc":0.7973856209,"digikala_mag_acc":0.8056112224,"yjc_acc":0.724137931,"beytoote_acc":0.7783933518,"asriran_acc":0.7777777778,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7724719101,"zoomit_acc":0.8098591549,"wikipedia_acc":0.8761904762,"namnak_acc":0.7547683924,"khodro45_acc":0.7941176471,"fidibo_acc":0.7841409692,"newmiind_acc":0.6875,"taaghche_acc":0.8269230769,"motamem_acc":0.8631578947,"varzesh3_acc":0.7926421405,"mehrnews_acc":0.7056451613,"tasnim_acc":0.7076923077,"magerta_acc":0.6890756303,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8181818182,"voolak_acc":0.6279069767,"farsroid_acc":0.6578947368,"parsiday_acc":0.7083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.5}
|
| 18 |
{"Model Name":"Qwen3-32B","thinking_method":"β","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","acc":0.7654,"acc_strict":0.7653,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.75,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.7824773414,"khabaronline_acc":0.724,"digiato_acc":0.8037578288,"doctoreto_acc":0.8,"sarzamindownload_acc":0.7450980392,"hamgardi_acc":0.6991150442,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.8157894737,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8073394495,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8,"tabnak_acc":0.7634854772,"alibaba_acc":0.8039215686,"digikala_mag_acc":0.7875751503,"yjc_acc":0.6896551724,"beytoote_acc":0.7783933518,"asriran_acc":0.7632850242,"ecoiran_acc":0.6793650794,"hawzah_acc":0.7724719101,"zoomit_acc":0.8215962441,"wikipedia_acc":0.8523809524,"namnak_acc":0.7520435967,"khodro45_acc":0.8088235294,"fidibo_acc":0.7665198238,"newmiind_acc":0.6909722222,"taaghche_acc":0.7564102564,"motamem_acc":0.8736842105,"varzesh3_acc":0.762541806,"mehrnews_acc":0.689516129,"tasnim_acc":0.7192307692,"magerta_acc":0.7268907563,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8484848485,"voolak_acc":0.6744186047,"farsroid_acc":0.6578947368,"parsiday_acc":0.675,"soft98_acc":0.7,"ninisite_discussion_acc":0.7}
|
|
|
|
| 19 |
{"Model Name":"gemma-3-27b-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","acc":0.7628,"acc_strict":0.7628,"donyaeeqtesad_acc":0.6978851964,"isna_acc":0.7265625,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.8187311178,"khabaronline_acc":0.74,"digiato_acc":0.7661795407,"doctoreto_acc":0.78,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7109144543,"bigbangpage_acc":0.821656051,"wiki_ahlolbait_acc":0.8026315789,"virgool_3_acc":0.7940298507,"virgool_2_acc":0.755351682,"virgool_1_acc":0.7784810127,"hamshahrionline_acc":0.8229508197,"tabnak_acc":0.8174273859,"alibaba_acc":0.7843137255,"digikala_mag_acc":0.7975951904,"yjc_acc":0.7126436782,"beytoote_acc":0.7534626039,"asriran_acc":0.7391304348,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7752808989,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8428571429,"namnak_acc":0.7493188011,"khodro45_acc":0.7867647059,"fidibo_acc":0.8237885463,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8315789474,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.6975806452,"tasnim_acc":0.7307692308,"magerta_acc":0.6722689076,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":0.8461538462,"wikishia_acc":0.9393939394,"voolak_acc":0.6976744186,"farsroid_acc":0.6315789474,"parsiday_acc":0.7,"soft98_acc":0.8,"ninisite_discussion_acc":0.7}
|
| 20 |
{"Model Name":"gpt-4o-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7598,"acc_strict":0.7598,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.71484375,"ninisite_article_acc":0.7351351351,"virgool_4_acc":0.8006042296,"khabaronline_acc":0.736,"digiato_acc":0.7599164927,"doctoreto_acc":0.775,"sarzamindownload_acc":0.6535947712,"hamgardi_acc":0.7197640118,"bigbangpage_acc":0.7961783439,"wiki_ahlolbait_acc":0.8289473684,"virgool_3_acc":0.7492537313,"virgool_2_acc":0.7828746177,"virgool_1_acc":0.8006329114,"hamshahrionline_acc":0.8131147541,"tabnak_acc":0.7427385892,"alibaba_acc":0.7810457516,"digikala_mag_acc":0.7615230461,"yjc_acc":0.7643678161,"beytoote_acc":0.7783933518,"asriran_acc":0.7536231884,"ecoiran_acc":0.6952380952,"hawzah_acc":0.7668539326,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8761904762,"namnak_acc":0.7765667575,"khodro45_acc":0.7573529412,"fidibo_acc":0.7621145374,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8736842105,"varzesh3_acc":0.8060200669,"mehrnews_acc":0.6733870968,"tasnim_acc":0.75,"magerta_acc":0.6764705882,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.7631578947,"parsiday_acc":0.7083333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 21 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","acc":0.7364,"acc_strict":0.7364,"donyaeeqtesad_acc":0.752265861,"isna_acc":0.67578125,"ninisite_article_acc":0.7054054054,"virgool_4_acc":0.746223565,"khabaronline_acc":0.724,"digiato_acc":0.7223382046,"doctoreto_acc":0.7,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7020648968,"bigbangpage_acc":0.8089171975,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7339449541,"virgool_1_acc":0.7246835443,"hamshahrionline_acc":0.8262295082,"tabnak_acc":0.7178423237,"alibaba_acc":0.7712418301,"digikala_mag_acc":0.7715430862,"yjc_acc":0.7183908046,"beytoote_acc":0.7479224377,"asriran_acc":0.768115942,"ecoiran_acc":0.6698412698,"hawzah_acc":0.7415730337,"zoomit_acc":0.79342723,"wikipedia_acc":0.819047619,"namnak_acc":0.7220708447,"khodro45_acc":0.75,"fidibo_acc":0.7665198238,"newmiind_acc":0.6631944444,"taaghche_acc":0.7820512821,"motamem_acc":0.8631578947,"varzesh3_acc":0.7525083612,"mehrnews_acc":0.6653225806,"tasnim_acc":0.75,"magerta_acc":0.6134453782,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.5,"parsiday_acc":0.7083333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 22 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.731,"acc_strict":0.7309,"donyaeeqtesad_acc":0.664652568,"isna_acc":0.69921875,"ninisite_article_acc":0.7540540541,"virgool_4_acc":0.7764350453,"khabaronline_acc":0.712,"digiato_acc":0.7265135699,"doctoreto_acc":0.78,"sarzamindownload_acc":0.7124183007,"hamgardi_acc":0.6814159292,"bigbangpage_acc":0.7770700637,"wiki_ahlolbait_acc":0.8157894737,"virgool_3_acc":0.7701492537,"virgool_2_acc":0.752293578,"virgool_1_acc":0.7784810127,"hamshahrionline_acc":0.7442622951,"tabnak_acc":0.7302904564,"alibaba_acc":0.7679738562,"digikala_mag_acc":0.7855711423,"yjc_acc":0.6781609195,"beytoote_acc":0.7423822715,"asriran_acc":0.6811594203,"ecoiran_acc":0.6857142857,"hawzah_acc":0.6882022472,"zoomit_acc":0.7417840376,"wikipedia_acc":0.8333333333,"namnak_acc":0.7166212534,"khodro45_acc":0.7720588235,"fidibo_acc":0.7400881057,"newmiind_acc":0.6736111111,"taaghche_acc":0.7179487179,"motamem_acc":0.8210526316,"varzesh3_acc":0.6956521739,"mehrnews_acc":0.689516129,"tasnim_acc":0.7269230769,"magerta_acc":0.6974789916,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.5263157895,"parsiday_acc":0.675,"soft98_acc":0.7,"ninisite_discussion_acc":0.5}
|
| 23 |
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7160432086,"acc_strict":0.7160432086,"donyaeeqtesad_acc":0.6888217523,"isna_acc":0.69140625,"ninisite_article_acc":0.7,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.712,"digiato_acc":0.7181628392,"doctoreto_acc":0.745,"sarzamindownload_acc":0.6013071895,"hamgardi_acc":0.6755162242,"bigbangpage_acc":0.7770700637,"wiki_ahlolbait_acc":0.8684210526,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7064220183,"virgool_1_acc":0.7056962025,"hamshahrionline_acc":0.7803278689,"tabnak_acc":0.6887966805,"alibaba_acc":0.7483660131,"digikala_mag_acc":0.7434869739,"yjc_acc":0.6724137931,"beytoote_acc":0.728531856,"asriran_acc":0.7487922705,"ecoiran_acc":0.6761904762,"hawzah_acc":0.7584269663,"zoomit_acc":0.7558685446,"wikipedia_acc":0.780952381,"namnak_acc":0.7002724796,"khodro45_acc":0.7279411765,"fidibo_acc":0.7665198238,"newmiind_acc":0.6202090592,"taaghche_acc":0.7628205128,"motamem_acc":0.8404255319,"varzesh3_acc":0.7324414716,"mehrnews_acc":0.6169354839,"tasnim_acc":0.6923076923,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.7878787879,"voolak_acc":0.6511627907,"farsroid_acc":0.7105263158,"parsiday_acc":0.575,"soft98_acc":0.9,"ninisite_discussion_acc":0.3}
|
| 24 |
{"Model Name":"gemma-3-12b-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","acc":0.7094,"acc_strict":0.7094,"donyaeeqtesad_acc":0.6586102719,"isna_acc":0.65625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7643504532,"khabaronline_acc":0.704,"digiato_acc":0.7369519833,"doctoreto_acc":0.76,"sarzamindownload_acc":0.6797385621,"hamgardi_acc":0.6666666667,"bigbangpage_acc":0.7515923567,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7223880597,"virgool_2_acc":0.7584097859,"virgool_1_acc":0.7183544304,"hamshahrionline_acc":0.7213114754,"tabnak_acc":0.7219917012,"alibaba_acc":0.6830065359,"digikala_mag_acc":0.7354709419,"yjc_acc":0.6206896552,"beytoote_acc":0.7146814404,"asriran_acc":0.7198067633,"ecoiran_acc":0.6603174603,"hawzah_acc":0.702247191,"zoomit_acc":0.7323943662,"wikipedia_acc":0.7714285714,"namnak_acc":0.7329700272,"khodro45_acc":0.7352941176,"fidibo_acc":0.718061674,"newmiind_acc":0.6493055556,"taaghche_acc":0.7564102564,"motamem_acc":0.8210526316,"varzesh3_acc":0.7157190635,"mehrnews_acc":0.6088709677,"tasnim_acc":0.6576923077,"magerta_acc":0.6302521008,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6976744186,"farsroid_acc":0.7368421053,"parsiday_acc":0.6583333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.8}
|
| 25 |
+
{"Model Name":"gpt-oss:20b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","acc":0.6965089527,"acc_strict":0.6963088927,"donyaeeqtesad_acc":0.6858006042,"isna_acc":0.61328125,"ninisite_article_acc":0.7297297297,"virgool_4_acc":0.7824773414,"khabaronline_acc":0.656,"digiato_acc":0.7118997912,"doctoreto_acc":0.765,"sarzamindownload_acc":0.7973856209,"hamgardi_acc":0.6991150442,"bigbangpage_acc":0.7515923567,"wiki_ahlolbait_acc":0.6776315789,"virgool_3_acc":0.7164179104,"virgool_2_acc":0.7217125382,"virgool_1_acc":0.7523809524,"hamshahrionline_acc":0.6950819672,"tabnak_acc":0.6597510373,"alibaba_acc":0.7516339869,"digikala_mag_acc":0.7208835341,"yjc_acc":0.7068965517,"beytoote_acc":0.7008310249,"asriran_acc":0.6570048309,"ecoiran_acc":0.5904761905,"hawzah_acc":0.6235955056,"zoomit_acc":0.7159624413,"wikipedia_acc":0.8,"namnak_acc":0.6648501362,"khodro45_acc":0.7647058824,"fidibo_acc":0.7136563877,"newmiind_acc":0.6666666667,"taaghche_acc":0.6987179487,"motamem_acc":0.7684210526,"varzesh3_acc":0.6622073579,"mehrnews_acc":0.6275303644,"tasnim_acc":0.5730769231,"magerta_acc":0.6890756303,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.5384615385,"wikishia_acc":0.8787878788,"voolak_acc":0.6976744186,"farsroid_acc":0.7105263158,"parsiday_acc":0.6666666667,"soft98_acc":0.7,"ninisite_discussion_acc":0.4}
|
| 26 |
{"Model Name":"Qwen3-14B","thinking_method":"β","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","acc":0.6958,"acc_strict":0.6958,"donyaeeqtesad_acc":0.6495468278,"isna_acc":0.62890625,"ninisite_article_acc":0.6972972973,"virgool_4_acc":0.7069486405,"khabaronline_acc":0.652,"digiato_acc":0.7202505219,"doctoreto_acc":0.77,"sarzamindownload_acc":0.614379085,"hamgardi_acc":0.6430678466,"bigbangpage_acc":0.7579617834,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7373134328,"virgool_2_acc":0.7155963303,"virgool_1_acc":0.7278481013,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6970954357,"alibaba_acc":0.7254901961,"digikala_mag_acc":0.7074148297,"yjc_acc":0.6379310345,"beytoote_acc":0.6842105263,"asriran_acc":0.6859903382,"ecoiran_acc":0.653968254,"hawzah_acc":0.7078651685,"zoomit_acc":0.7676056338,"wikipedia_acc":0.8142857143,"namnak_acc":0.6621253406,"khodro45_acc":0.7647058824,"fidibo_acc":0.731277533,"newmiind_acc":0.6597222222,"taaghche_acc":0.6987179487,"motamem_acc":0.8105263158,"varzesh3_acc":0.6220735786,"mehrnews_acc":0.625,"tasnim_acc":0.6692307692,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6279069767,"farsroid_acc":0.6052631579,"parsiday_acc":0.5666666667,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 27 |
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"β","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","acc":0.6894,"acc_strict":0.6894,"donyaeeqtesad_acc":0.670694864,"isna_acc":0.63671875,"ninisite_article_acc":0.6945945946,"virgool_4_acc":0.7039274924,"khabaronline_acc":0.664,"digiato_acc":0.6826722338,"doctoreto_acc":0.755,"sarzamindownload_acc":0.6339869281,"hamgardi_acc":0.6342182891,"bigbangpage_acc":0.7452229299,"wiki_ahlolbait_acc":0.7697368421,"virgool_3_acc":0.7014925373,"virgool_2_acc":0.7125382263,"virgool_1_acc":0.7341772152,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6307053942,"alibaba_acc":0.7647058824,"digikala_mag_acc":0.7174348697,"yjc_acc":0.5804597701,"beytoote_acc":0.6814404432,"asriran_acc":0.6811594203,"ecoiran_acc":0.6158730159,"hawzah_acc":0.6994382022,"zoomit_acc":0.7441314554,"wikipedia_acc":0.8333333333,"namnak_acc":0.659400545,"khodro45_acc":0.7058823529,"fidibo_acc":0.7268722467,"newmiind_acc":0.6527777778,"taaghche_acc":0.7051282051,"motamem_acc":0.8526315789,"varzesh3_acc":0.6789297659,"mehrnews_acc":0.5887096774,"tasnim_acc":0.6692307692,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.4347826087,"vipofilm_acc":0.6923076923,"wikishia_acc":0.7575757576,"voolak_acc":0.6511627907,"farsroid_acc":0.6842105263,"parsiday_acc":0.55,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 28 |
{"Model Name":"Qwen3-30B-A3B","thinking_method":"β","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","acc":0.688,"acc_strict":0.688,"donyaeeqtesad_acc":0.6465256798,"isna_acc":0.6640625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.668,"digiato_acc":0.6764091858,"doctoreto_acc":0.765,"sarzamindownload_acc":0.7058823529,"hamgardi_acc":0.6519174041,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.7368421053,"virgool_3_acc":0.7134328358,"virgool_2_acc":0.7003058104,"virgool_1_acc":0.7025316456,"hamshahrionline_acc":0.6819672131,"tabnak_acc":0.7012448133,"alibaba_acc":0.7189542484,"digikala_mag_acc":0.6753507014,"yjc_acc":0.632183908,"beytoote_acc":0.6703601108,"asriran_acc":0.652173913,"ecoiran_acc":0.6126984127,"hawzah_acc":0.7387640449,"zoomit_acc":0.7300469484,"wikipedia_acc":0.7904761905,"namnak_acc":0.6920980926,"khodro45_acc":0.7279411765,"fidibo_acc":0.6872246696,"newmiind_acc":0.6631944444,"taaghche_acc":0.6858974359,"motamem_acc":0.8,"varzesh3_acc":0.6120401338,"mehrnews_acc":0.6129032258,"tasnim_acc":0.65,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.4782608696,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.6578947368,"parsiday_acc":0.6,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
leaderboard/boards_data/persian_nlg.jsonl
CHANGED
|
@@ -17,7 +17,7 @@
|
|
| 17 |
{"Model Name":"Qwen3-14B","thinking_method":"β","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1987198912,"question-generation_PersianQA_rougeL_recall":0.3431437262,"question-generation_PersianQA_rougeL_f1_score":0.2419384398},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1535253787,"translation-en2fa_en2fa_epoque_bleu":0.3553678809,"translation-en2fa_en2fa_mizan_bleu":0.1285441922,"translation-en2fa_en2fa_quran_bleu":0.0857809616,"translation-en2fa_en2fa_sahife_bleu":0.0787025343,"translation-en2fa_en2fa_nahj_bleu":0.0404850935,"translation-en2fa_en2fa_tep_bleu":0.0586129062},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1469468837,"summarization_SamSUM-fa_rougeL_recall":0.3743807014,"summarization_SamSUM-fa_rougeL_f1_score":0.2022859929},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2145488085,"translation-fa2en_fa2en_tep_bleu":0.1307272464,"translation-fa2en_fa2en_mizan_bleu":0.1697754862,"translation-fa2en_fa2en_quran_bleu":0.1552415558,"translation-fa2en_fa2en_epoque_bleu":0.4513682579,"translation-fa2en_fa2en_nahj_bleu":0.0842673472,"translation-fa2en_fa2en_sahife_bleu":0.0853787118},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0910450298,"translation-ar2fa_ar2fa_sahife_bleu":0.0862679894,"translation-ar2fa_ar2fa_nahj_bleu":0.0558129824,"translation-ar2fa_ar2fa_quran_bleu":0.1292925153},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1123870374,"summarization_PnSummary_rougeL_recall":0.4032007327,"summarization_PnSummary_rougeL_f1_score":0.17115848},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0494411806,"translation-fa2ar_fa2ar_nahj_bleu":0.0369805868,"translation-fa2ar_fa2ar_sahife_bleu":0.0567654991,"translation-fa2ar_fa2ar_quran_bleu":0.0545774559},"nlg_score":0.16056333}
|
| 18 |
{"Model Name":"llama4:scout","thinking_method":"β","model_url":"https_google.com","parameters_count":"109000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1572445395,"question-generation_PersianQA_rougeL_recall":0.2651515671,"question-generation_PersianQA_rougeL_f1_score":0.1889377754},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1694667296,"translation-en2fa_en2fa_epoque_bleu":0.3980975238,"translation-en2fa_en2fa_mizan_bleu":0.1400810731,"translation-en2fa_en2fa_quran_bleu":0.1173019123,"translation-en2fa_en2fa_sahife_bleu":0.0758825134,"translation-en2fa_en2fa_nahj_bleu":0.0380397952,"translation-en2fa_en2fa_tep_bleu":0.05711699},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1539866165,"summarization_SamSUM-fa_rougeL_recall":0.341409574,"summarization_SamSUM-fa_rougeL_f1_score":0.2007085976},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1909462413,"translation-fa2en_fa2en_tep_bleu":0.1038996524,"translation-fa2en_fa2en_mizan_bleu":0.1513900262,"translation-fa2en_fa2en_quran_bleu":0.129609905,"translation-fa2en_fa2en_epoque_bleu":0.4266734606,"translation-fa2en_fa2en_nahj_bleu":0.0619630431,"translation-fa2en_fa2en_sahife_bleu":0.0584029483},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0997661237,"translation-ar2fa_ar2fa_sahife_bleu":0.0880416079,"translation-ar2fa_ar2fa_nahj_bleu":0.0425251453,"translation-ar2fa_ar2fa_quran_bleu":0.165869569},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1377053282,"summarization_PnSummary_rougeL_recall":0.380073051,"summarization_PnSummary_rougeL_f1_score":0.1928750247},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0548753777,"translation-fa2ar_fa2ar_nahj_bleu":0.0318327001,"translation-fa2ar_fa2ar_sahife_bleu":0.0567893259,"translation-fa2ar_fa2ar_quran_bleu":0.076004107},"nlg_score":0.1567965528}
|
| 19 |
{"Model Name":"Qwen3-8B","thinking_method":"β","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1965366702,"question-generation_PersianQA_rougeL_recall":0.340760284,"question-generation_PersianQA_rougeL_f1_score":0.2388923895},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1550276898,"translation-en2fa_en2fa_epoque_bleu":0.3721582216,"translation-en2fa_en2fa_mizan_bleu":0.1231599039,"translation-en2fa_en2fa_quran_bleu":0.0882213453,"translation-en2fa_en2fa_sahife_bleu":0.0725213197,"translation-en2fa_en2fa_nahj_bleu":0.0424186358,"translation-en2fa_en2fa_tep_bleu":0.0528718634},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1463365551,"summarization_SamSUM-fa_rougeL_recall":0.3856017289,"summarization_SamSUM-fa_rougeL_f1_score":0.2024070197},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2024225184,"translation-fa2en_fa2en_tep_bleu":0.1163127945,"translation-fa2en_fa2en_mizan_bleu":0.1649009947,"translation-fa2en_fa2en_quran_bleu":0.1513328968,"translation-fa2en_fa2en_epoque_bleu":0.4171232399,"translation-fa2en_fa2en_nahj_bleu":0.0857999462,"translation-fa2en_fa2en_sahife_bleu":0.0929479364},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0821020713,"translation-ar2fa_ar2fa_sahife_bleu":0.0730469461,"translation-ar2fa_ar2fa_nahj_bleu":0.0579031327,"translation-ar2fa_ar2fa_quran_bleu":0.1141461882},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.109255822,"summarization_PnSummary_rougeL_recall":0.3979273385,"summarization_PnSummary_rougeL_f1_score":0.1669061111},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0423318046,"translation-fa2ar_fa2ar_nahj_bleu":0.0329089717,"translation-fa2ar_fa2ar_sahife_bleu":0.0445101244,"translation-fa2ar_fa2ar_quran_bleu":0.0495763178},"nlg_score":0.1557270864}
|
| 20 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"
|
| 21 |
{"Model Name":"Qwen3-4B","thinking_method":"β","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1744197112,"question-generation_PersianQA_rougeL_recall":0.2697024508,"question-generation_PersianQA_rougeL_f1_score":0.2017710943},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1217211215,"translation-en2fa_en2fa_epoque_bleu":0.2916268514,"translation-en2fa_en2fa_mizan_bleu":0.091925603,"translation-en2fa_en2fa_quran_bleu":0.065498518,"translation-en2fa_en2fa_sahife_bleu":0.0612237455,"translation-en2fa_en2fa_nahj_bleu":0.0385824628,"translation-en2fa_en2fa_tep_bleu":0.0453883692},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1429609514,"summarization_SamSUM-fa_rougeL_recall":0.397717388,"summarization_SamSUM-fa_rougeL_f1_score":0.2013136641},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1840809218,"translation-fa2en_fa2en_tep_bleu":0.1011436783,"translation-fa2en_fa2en_mizan_bleu":0.149157222,"translation-fa2en_fa2en_quran_bleu":0.1377761662,"translation-fa2en_fa2en_epoque_bleu":0.3802946233,"translation-fa2en_fa2en_nahj_bleu":0.0851756367,"translation-fa2en_fa2en_sahife_bleu":0.0857201524},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0636385541,"translation-ar2fa_ar2fa_sahife_bleu":0.0557180428,"translation-ar2fa_ar2fa_nahj_bleu":0.0539968488,"translation-ar2fa_ar2fa_quran_bleu":0.0807186853},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1067208324,"summarization_PnSummary_rougeL_recall":0.4109136551,"summarization_PnSummary_rougeL_f1_score":0.1648475797},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0351351131,"translation-fa2ar_fa2ar_nahj_bleu":0.0313503027,"translation-fa2ar_fa2ar_sahife_bleu":0.042075565,"translation-fa2ar_fa2ar_quran_bleu":0.0319794715},"nlg_score":0.1389297212}
|
| 22 |
{"Model Name":"gemini-2.5-flash","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.3121385499,"question-generation_PersianQA_rougeL_recall":0.4162991047,"question-generation_PersianQA_rougeL_f1_score":0.3445136596},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0650794176,"translation-en2fa_en2fa_epoque_bleu":0.1194375779,"translation-en2fa_en2fa_mizan_bleu":0.0640649978,"translation-en2fa_en2fa_quran_bleu":0.0926514743,"translation-en2fa_en2fa_sahife_bleu":0.0392464347,"translation-en2fa_en2fa_nahj_bleu":0.022322883,"translation-en2fa_en2fa_tep_bleu":0.0184227674},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1525978605,"summarization_SamSUM-fa_rougeL_recall":0.3945587249,"summarization_SamSUM-fa_rougeL_f1_score":0.209852471},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0559484689,"translation-fa2en_fa2en_tep_bleu":0.0190401646,"translation-fa2en_fa2en_mizan_bleu":0.0369126121,"translation-fa2en_fa2en_quran_bleu":0.0401048971,"translation-fa2en_fa2en_epoque_bleu":0.1381975553,"translation-fa2en_fa2en_nahj_bleu":0.0232788817,"translation-fa2en_fa2en_sahife_bleu":0.017477039},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0772188097,"translation-ar2fa_ar2fa_sahife_bleu":0.0610321929,"translation-ar2fa_ar2fa_nahj_bleu":0.0273061824,"translation-ar2fa_ar2fa_quran_bleu":0.1408224224},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1070140366,"summarization_PnSummary_rougeL_recall":0.4357356292,"summarization_PnSummary_rougeL_f1_score":0.1672508999},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0382543341,"translation-fa2ar_fa2ar_nahj_bleu":0.032191006,"translation-fa2ar_fa2ar_sahife_bleu":0.028980881,"translation-fa2ar_fa2ar_quran_bleu":0.0535911152},"nlg_score":0.1368740087}
|
| 23 |
{"Model Name":"gpt-oss:20b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1877254615,"question-generation_PersianQA_rougeL_recall":0.3036923298,"question-generation_PersianQA_rougeL_f1_score":0.2215402117},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1482164359,"translation-en2fa_en2fa_epoque_bleu":0.3332313032,"translation-en2fa_en2fa_mizan_bleu":0.1348649993,"translation-en2fa_en2fa_quran_bleu":0.0798910499,"translation-en2fa_en2fa_sahife_bleu":0.0724923326,"translation-en2fa_en2fa_nahj_bleu":0.0425031053,"translation-en2fa_en2fa_tep_bleu":0.0570157331},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1076131004,"summarization_SamSUM-fa_rougeL_recall":0.354952604,"summarization_SamSUM-fa_rougeL_f1_score":0.1578241504},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1811060704,"translation-fa2en_fa2en_tep_bleu":0.1180786789,"translation-fa2en_fa2en_mizan_bleu":0.1503794353,"translation-fa2en_fa2en_quran_bleu":0.1042682142,"translation-fa2en_fa2en_epoque_bleu":0.3794274854,"translation-fa2en_fa2en_nahj_bleu":0.0641545233,"translation-fa2en_fa2en_sahife_bleu":0.0772362522},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0704966071,"translation-ar2fa_ar2fa_sahife_bleu":0.0675578984,"translation-ar2fa_ar2fa_nahj_bleu":0.042116411,"translation-ar2fa_ar2fa_quran_bleu":0.1003965021},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0796516779,"summarization_PnSummary_rougeL_recall":0.3573917363,"summarization_PnSummary_rougeL_f1_score":0.1263677591},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0287298887,"translation-fa2ar_fa2ar_nahj_bleu":0.025061663,"translation-fa2ar_fa2ar_sahife_bleu":0.0355711393,"translation-fa2ar_fa2ar_quran_bleu":0.0255568639},"nlg_score":0.1334687319}
|
|
|
|
| 17 |
{"Model Name":"Qwen3-14B","thinking_method":"β","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1987198912,"question-generation_PersianQA_rougeL_recall":0.3431437262,"question-generation_PersianQA_rougeL_f1_score":0.2419384398},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1535253787,"translation-en2fa_en2fa_epoque_bleu":0.3553678809,"translation-en2fa_en2fa_mizan_bleu":0.1285441922,"translation-en2fa_en2fa_quran_bleu":0.0857809616,"translation-en2fa_en2fa_sahife_bleu":0.0787025343,"translation-en2fa_en2fa_nahj_bleu":0.0404850935,"translation-en2fa_en2fa_tep_bleu":0.0586129062},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1469468837,"summarization_SamSUM-fa_rougeL_recall":0.3743807014,"summarization_SamSUM-fa_rougeL_f1_score":0.2022859929},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2145488085,"translation-fa2en_fa2en_tep_bleu":0.1307272464,"translation-fa2en_fa2en_mizan_bleu":0.1697754862,"translation-fa2en_fa2en_quran_bleu":0.1552415558,"translation-fa2en_fa2en_epoque_bleu":0.4513682579,"translation-fa2en_fa2en_nahj_bleu":0.0842673472,"translation-fa2en_fa2en_sahife_bleu":0.0853787118},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0910450298,"translation-ar2fa_ar2fa_sahife_bleu":0.0862679894,"translation-ar2fa_ar2fa_nahj_bleu":0.0558129824,"translation-ar2fa_ar2fa_quran_bleu":0.1292925153},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1123870374,"summarization_PnSummary_rougeL_recall":0.4032007327,"summarization_PnSummary_rougeL_f1_score":0.17115848},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0494411806,"translation-fa2ar_fa2ar_nahj_bleu":0.0369805868,"translation-fa2ar_fa2ar_sahife_bleu":0.0567654991,"translation-fa2ar_fa2ar_quran_bleu":0.0545774559},"nlg_score":0.16056333}
|
| 18 |
{"Model Name":"llama4:scout","thinking_method":"β","model_url":"https_google.com","parameters_count":"109000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1572445395,"question-generation_PersianQA_rougeL_recall":0.2651515671,"question-generation_PersianQA_rougeL_f1_score":0.1889377754},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1694667296,"translation-en2fa_en2fa_epoque_bleu":0.3980975238,"translation-en2fa_en2fa_mizan_bleu":0.1400810731,"translation-en2fa_en2fa_quran_bleu":0.1173019123,"translation-en2fa_en2fa_sahife_bleu":0.0758825134,"translation-en2fa_en2fa_nahj_bleu":0.0380397952,"translation-en2fa_en2fa_tep_bleu":0.05711699},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1539866165,"summarization_SamSUM-fa_rougeL_recall":0.341409574,"summarization_SamSUM-fa_rougeL_f1_score":0.2007085976},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1909462413,"translation-fa2en_fa2en_tep_bleu":0.1038996524,"translation-fa2en_fa2en_mizan_bleu":0.1513900262,"translation-fa2en_fa2en_quran_bleu":0.129609905,"translation-fa2en_fa2en_epoque_bleu":0.4266734606,"translation-fa2en_fa2en_nahj_bleu":0.0619630431,"translation-fa2en_fa2en_sahife_bleu":0.0584029483},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0997661237,"translation-ar2fa_ar2fa_sahife_bleu":0.0880416079,"translation-ar2fa_ar2fa_nahj_bleu":0.0425251453,"translation-ar2fa_ar2fa_quran_bleu":0.165869569},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1377053282,"summarization_PnSummary_rougeL_recall":0.380073051,"summarization_PnSummary_rougeL_f1_score":0.1928750247},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0548753777,"translation-fa2ar_fa2ar_nahj_bleu":0.0318327001,"translation-fa2ar_fa2ar_sahife_bleu":0.0567893259,"translation-fa2ar_fa2ar_quran_bleu":0.076004107},"nlg_score":0.1567965528}
|
| 19 |
{"Model Name":"Qwen3-8B","thinking_method":"β","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1965366702,"question-generation_PersianQA_rougeL_recall":0.340760284,"question-generation_PersianQA_rougeL_f1_score":0.2388923895},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1550276898,"translation-en2fa_en2fa_epoque_bleu":0.3721582216,"translation-en2fa_en2fa_mizan_bleu":0.1231599039,"translation-en2fa_en2fa_quran_bleu":0.0882213453,"translation-en2fa_en2fa_sahife_bleu":0.0725213197,"translation-en2fa_en2fa_nahj_bleu":0.0424186358,"translation-en2fa_en2fa_tep_bleu":0.0528718634},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1463365551,"summarization_SamSUM-fa_rougeL_recall":0.3856017289,"summarization_SamSUM-fa_rougeL_f1_score":0.2024070197},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2024225184,"translation-fa2en_fa2en_tep_bleu":0.1163127945,"translation-fa2en_fa2en_mizan_bleu":0.1649009947,"translation-fa2en_fa2en_quran_bleu":0.1513328968,"translation-fa2en_fa2en_epoque_bleu":0.4171232399,"translation-fa2en_fa2en_nahj_bleu":0.0857999462,"translation-fa2en_fa2en_sahife_bleu":0.0929479364},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0821020713,"translation-ar2fa_ar2fa_sahife_bleu":0.0730469461,"translation-ar2fa_ar2fa_nahj_bleu":0.0579031327,"translation-ar2fa_ar2fa_quran_bleu":0.1141461882},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.109255822,"summarization_PnSummary_rougeL_recall":0.3979273385,"summarization_PnSummary_rougeL_f1_score":0.1669061111},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0423318046,"translation-fa2ar_fa2ar_nahj_bleu":0.0329089717,"translation-fa2ar_fa2ar_sahife_bleu":0.0445101244,"translation-fa2ar_fa2ar_quran_bleu":0.0495763178},"nlg_score":0.1557270864}
|
| 20 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"120000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.2140114675,"question-generation_PersianQA_rougeL_recall":0.3382796762,"question-generation_PersianQA_rougeL_f1_score":0.2507426631},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1732020657,"translation-en2fa_en2fa_epoque_bleu":0.3837758669,"translation-en2fa_en2fa_mizan_bleu":0.1680666593,"translation-en2fa_en2fa_quran_bleu":0.1005711034,"translation-en2fa_en2fa_sahife_bleu":0.0761731989,"translation-en2fa_en2fa_nahj_bleu":0.0436218334,"translation-en2fa_en2fa_tep_bleu":0.0641477759},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1197038513,"summarization_SamSUM-fa_rougeL_recall":0.3691802463,"summarization_SamSUM-fa_rougeL_f1_score":0.1737939492},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.2010422229,"translation-fa2en_fa2en_tep_bleu":0.1296290178,"translation-fa2en_fa2en_mizan_bleu":0.1687085372,"translation-fa2en_fa2en_quran_bleu":0.1258778791,"translation-fa2en_fa2en_epoque_bleu":0.4180918256,"translation-fa2en_fa2en_nahj_bleu":0.0766886466,"translation-fa2en_fa2en_sahife_bleu":0.07624077},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.1064381612,"translation-ar2fa_ar2fa_sahife_bleu":0.089727948,"translation-ar2fa_ar2fa_nahj_bleu":0.0552602241,"translation-ar2fa_ar2fa_quran_bleu":0.1717674145},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0838678728,"summarization_PnSummary_rougeL_recall":0.3842899041,"summarization_PnSummary_rougeL_f1_score":0.1338531153},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0381651941,"translation-fa2ar_fa2ar_nahj_bleu":0.0246058927,"translation-fa2ar_fa2ar_sahife_bleu":0.0402564081,"translation-fa2ar_fa2ar_quran_bleu":0.0496332815},"nlg_score":0.1538910531}
|
| 21 |
{"Model Name":"Qwen3-4B","thinking_method":"β","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1744197112,"question-generation_PersianQA_rougeL_recall":0.2697024508,"question-generation_PersianQA_rougeL_f1_score":0.2017710943},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1217211215,"translation-en2fa_en2fa_epoque_bleu":0.2916268514,"translation-en2fa_en2fa_mizan_bleu":0.091925603,"translation-en2fa_en2fa_quran_bleu":0.065498518,"translation-en2fa_en2fa_sahife_bleu":0.0612237455,"translation-en2fa_en2fa_nahj_bleu":0.0385824628,"translation-en2fa_en2fa_tep_bleu":0.0453883692},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1429609514,"summarization_SamSUM-fa_rougeL_recall":0.397717388,"summarization_SamSUM-fa_rougeL_f1_score":0.2013136641},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1840809218,"translation-fa2en_fa2en_tep_bleu":0.1011436783,"translation-fa2en_fa2en_mizan_bleu":0.149157222,"translation-fa2en_fa2en_quran_bleu":0.1377761662,"translation-fa2en_fa2en_epoque_bleu":0.3802946233,"translation-fa2en_fa2en_nahj_bleu":0.0851756367,"translation-fa2en_fa2en_sahife_bleu":0.0857201524},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0636385541,"translation-ar2fa_ar2fa_sahife_bleu":0.0557180428,"translation-ar2fa_ar2fa_nahj_bleu":0.0539968488,"translation-ar2fa_ar2fa_quran_bleu":0.0807186853},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1067208324,"summarization_PnSummary_rougeL_recall":0.4109136551,"summarization_PnSummary_rougeL_f1_score":0.1648475797},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0351351131,"translation-fa2ar_fa2ar_nahj_bleu":0.0313503027,"translation-fa2ar_fa2ar_sahife_bleu":0.042075565,"translation-fa2ar_fa2ar_quran_bleu":0.0319794715},"nlg_score":0.1389297212}
|
| 22 |
{"Model Name":"gemini-2.5-flash","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.3121385499,"question-generation_PersianQA_rougeL_recall":0.4162991047,"question-generation_PersianQA_rougeL_f1_score":0.3445136596},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.0650794176,"translation-en2fa_en2fa_epoque_bleu":0.1194375779,"translation-en2fa_en2fa_mizan_bleu":0.0640649978,"translation-en2fa_en2fa_quran_bleu":0.0926514743,"translation-en2fa_en2fa_sahife_bleu":0.0392464347,"translation-en2fa_en2fa_nahj_bleu":0.022322883,"translation-en2fa_en2fa_tep_bleu":0.0184227674},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1525978605,"summarization_SamSUM-fa_rougeL_recall":0.3945587249,"summarization_SamSUM-fa_rougeL_f1_score":0.209852471},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.0559484689,"translation-fa2en_fa2en_tep_bleu":0.0190401646,"translation-fa2en_fa2en_mizan_bleu":0.0369126121,"translation-fa2en_fa2en_quran_bleu":0.0401048971,"translation-fa2en_fa2en_epoque_bleu":0.1381975553,"translation-fa2en_fa2en_nahj_bleu":0.0232788817,"translation-fa2en_fa2en_sahife_bleu":0.017477039},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0772188097,"translation-ar2fa_ar2fa_sahife_bleu":0.0610321929,"translation-ar2fa_ar2fa_nahj_bleu":0.0273061824,"translation-ar2fa_ar2fa_quran_bleu":0.1408224224},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.1070140366,"summarization_PnSummary_rougeL_recall":0.4357356292,"summarization_PnSummary_rougeL_f1_score":0.1672508999},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0382543341,"translation-fa2ar_fa2ar_nahj_bleu":0.032191006,"translation-fa2ar_fa2ar_sahife_bleu":0.028980881,"translation-fa2ar_fa2ar_quran_bleu":0.0535911152},"nlg_score":0.1368740087}
|
| 23 |
{"Model Name":"gpt-oss:20b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","question-generation_PersianQA":{"question-generation_PersianQA_rougeL_precision":0.1877254615,"question-generation_PersianQA_rougeL_recall":0.3036923298,"question-generation_PersianQA_rougeL_f1_score":0.2215402117},"translation-en2fa_en2fa":{"translation-en2fa_en2fa_bleu":0.1482164359,"translation-en2fa_en2fa_epoque_bleu":0.3332313032,"translation-en2fa_en2fa_mizan_bleu":0.1348649993,"translation-en2fa_en2fa_quran_bleu":0.0798910499,"translation-en2fa_en2fa_sahife_bleu":0.0724923326,"translation-en2fa_en2fa_nahj_bleu":0.0425031053,"translation-en2fa_en2fa_tep_bleu":0.0570157331},"summarization_SamSUM-fa":{"summarization_SamSUM-fa_rougeL_precision":0.1076131004,"summarization_SamSUM-fa_rougeL_recall":0.354952604,"summarization_SamSUM-fa_rougeL_f1_score":0.1578241504},"translation-fa2en_fa2en":{"translation-fa2en_fa2en_bleu":0.1811060704,"translation-fa2en_fa2en_tep_bleu":0.1180786789,"translation-fa2en_fa2en_mizan_bleu":0.1503794353,"translation-fa2en_fa2en_quran_bleu":0.1042682142,"translation-fa2en_fa2en_epoque_bleu":0.3794274854,"translation-fa2en_fa2en_nahj_bleu":0.0641545233,"translation-fa2en_fa2en_sahife_bleu":0.0772362522},"translation-ar2fa_ar2fa":{"translation-ar2fa_ar2fa_bleu":0.0704966071,"translation-ar2fa_ar2fa_sahife_bleu":0.0675578984,"translation-ar2fa_ar2fa_nahj_bleu":0.042116411,"translation-ar2fa_ar2fa_quran_bleu":0.1003965021},"summarization_PnSummary":{"summarization_PnSummary_rougeL_precision":0.0796516779,"summarization_PnSummary_rougeL_recall":0.3573917363,"summarization_PnSummary_rougeL_f1_score":0.1263677591},"translation-fa2ar_fa2ar":{"translation-fa2ar_fa2ar_bleu":0.0287298887,"translation-fa2ar_fa2ar_nahj_bleu":0.025061663,"translation-fa2ar_fa2ar_sahife_bleu":0.0355711393,"translation-fa2ar_fa2ar_quran_bleu":0.0255568639},"nlg_score":0.1334687319}
|
leaderboard/boards_data/persian_nlu.jsonl
CHANGED
|
@@ -17,7 +17,7 @@
|
|
| 17 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7518878101,"sentiment-analysis_deepsentipers_precision_modified":0.7333601788,"sentiment-analysis_deepsentipers_recall_modified":0.7850018857,"sentiment-analysis_deepsentipers_fscore_modified":0.7214827861,"sentiment-analysis_deepsentipers_acc":0.752293578,"sentiment-analysis_deepsentipers_precision":0.7337559479,"sentiment-analysis_deepsentipers_recall":0.785425524,"sentiment-analysis_deepsentipers_fscore":0.7218721454,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9587180795,"sts_SynPerSTS_corrcoef":0.9587180795,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.5030535945,"ner_arman_precision_mean":0.4617288155,"ner_arman_recall_mean":0.5952522845},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.2645652392,"keyword-extraction_SynKeywords_precision_mean":0.2349391249,"keyword-extraction_SynKeywords_recall_mean":0.3166666667},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.5776397516,"tone-classification_SynTone_precision_modified":0.4784671979,"tone-classification_SynTone_recall_modified":0.4996211806,"tone-classification_SynTone_fscore_modified":0.4487276462,"tone-classification_SynTone_acc":0.5849056604,"tone-classification_SynTone_precision":0.4844856532,"tone-classification_SynTone_recall":0.5059057238,"tone-classification_SynTone_fscore":0.4543720191,"tone-classification_SynTone_valid_output_ratio":0.9875776398},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8602460091,"sts_FarSICK_corrcoef":0.8602460091,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.7078005115,"nli_farstail_precision_modified":0.7418983007,"nli_farstail_recall_modified":0.70995102,"nli_farstail_fscore_modified":0.6987179454,"nli_farstail_acc":0.7091607944,"nli_farstail_precision":0.7433241143,"nli_farstail_recall":0.7113154358,"nli_farstail_fscore":0.700060773,"nli_farstail_valid_output_ratio":0.9980818414},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.866,"paraphrase-detection_parsinlu_precision_modified":0.8771198269,"paraphrase-detection_parsinlu_recall_modified":0.852753978,"paraphrase-detection_parsinlu_fscore_modified":0.8595688134,"paraphrase-detection_parsinlu_acc":0.866,"paraphrase-detection_parsinlu_precision":0.8771198269,"paraphrase-detection_parsinlu_recall":0.852753978,"paraphrase-detection_parsinlu_fscore":0.8595688134,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.458,"topic-classification_sid_precision_modified":0.3928192753,"topic-classification_sid_recall_modified":0.4072039595,"topic-classification_sid_fscore_modified":0.3544902396,"topic-classification_sid_acc":0.6378830084,"topic-classification_sid_precision":0.5471020547,"topic-classification_sid_recall":0.5671364338,"topic-classification_sid_fscore":0.4937189966,"topic-classification_sid_valid_output_ratio":0.718},"nlu_score":0.6552152029}
|
| 18 |
{"Model Name":"Qwen3-14B","thinking_method":"β","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7459546926,"sentiment-analysis_deepsentipers_precision_modified":0.696002467,"sentiment-analysis_deepsentipers_recall_modified":0.7725731976,"sentiment-analysis_deepsentipers_fscore_modified":0.7160207999,"sentiment-analysis_deepsentipers_acc":0.7459546926,"sentiment-analysis_deepsentipers_precision":0.696002467,"sentiment-analysis_deepsentipers_recall":0.7725731976,"sentiment-analysis_deepsentipers_fscore":0.7160207999,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9603899338,"sts_SynPerSTS_corrcoef":0.9603899338,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.492138652,"ner_arman_precision_mean":0.4553833929,"ner_arman_recall_mean":0.5783671037},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.233766167,"keyword-extraction_SynKeywords_precision_mean":0.1893302534,"keyword-extraction_SynKeywords_recall_mean":0.3297101449},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.4968944099,"tone-classification_SynTone_precision_modified":0.5363835928,"tone-classification_SynTone_recall_modified":0.5772956136,"tone-classification_SynTone_fscore_modified":0.4755414981,"tone-classification_SynTone_acc":0.4968944099,"tone-classification_SynTone_precision":0.5363835928,"tone-classification_SynTone_recall":0.5772956136,"tone-classification_SynTone_fscore":0.4755414981,"tone-classification_SynTone_valid_output_ratio":1.0},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8509006434,"sts_FarSICK_corrcoef":0.8509006434,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.7007672634,"nli_farstail_precision_modified":0.7596784307,"nli_farstail_recall_modified":0.7039816989,"nli_farstail_fscore_modified":0.6834876952,"nli_farstail_acc":0.7007672634,"nli_farstail_precision":0.7596784307,"nli_farstail_recall":0.7039816989,"nli_farstail_fscore":0.6834876952,"nli_farstail_valid_output_ratio":1.0},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.854,"paraphrase-detection_parsinlu_precision_modified":0.8742015099,"paraphrase-detection_parsinlu_recall_modified":0.8365157079,"paraphrase-detection_parsinlu_fscore_modified":0.8449177639,"paraphrase-detection_parsinlu_acc":0.854,"paraphrase-detection_parsinlu_precision":0.8742015099,"paraphrase-detection_parsinlu_recall":0.8365157079,"paraphrase-detection_parsinlu_fscore":0.8449177639,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":11.9431279621,"extractive-qa_PQuAD_f1":0.5054306037},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.636,"topic-classification_sid_precision_modified":0.6248180645,"topic-classification_sid_recall_modified":0.5200071748,"topic-classification_sid_fscore_modified":0.5212205085,"topic-classification_sid_acc":0.636,"topic-classification_sid_precision":0.6248180645,"topic-classification_sid_recall":0.5200071748,"topic-classification_sid_fscore":0.5212205085,"topic-classification_sid_valid_output_ratio":1.0},"nlu_score":0.6460328733}
|
| 19 |
{"Model Name":"gpt-4o-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7713052859,"sentiment-analysis_deepsentipers_precision_modified":0.7288724929,"sentiment-analysis_deepsentipers_recall_modified":0.803675275,"sentiment-analysis_deepsentipers_fscore_modified":0.753174206,"sentiment-analysis_deepsentipers_acc":0.7713052859,"sentiment-analysis_deepsentipers_precision":0.7288724929,"sentiment-analysis_deepsentipers_recall":0.803675275,"sentiment-analysis_deepsentipers_fscore":0.753174206,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9590342543,"sts_SynPerSTS_corrcoef":0.9590342543,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.0374396958,"ner_arman_precision_mean":0.0342669845,"ner_arman_recall_mean":0.0448549861},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.2783400189,"keyword-extraction_SynKeywords_precision_mean":0.2250927598,"keyword-extraction_SynKeywords_recall_mean":0.3842391304},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7478203083,"tone-classification_SynTone_recall_modified":0.6482356204,"tone-classification_SynTone_fscore_modified":0.6776329308,"tone-classification_SynTone_acc":0.850931677,"tone-classification_SynTone_precision":0.7478203083,"tone-classification_SynTone_recall":0.6482356204,"tone-classification_SynTone_fscore":0.6776329308,"tone-classification_SynTone_valid_output_ratio":1.0},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8671704383,"sts_FarSICK_corrcoef":0.8671704383,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.7647058824,"nli_farstail_precision_modified":0.7814499507,"nli_farstail_recall_modified":0.7670439826,"nli_farstail_fscore_modified":0.7573199649,"nli_farstail_acc":0.7656850192,"nli_farstail_precision":0.7824505269,"nli_farstail_recall":0.7680261132,"nli_farstail_fscore":0.7582896447,"nli_farstail_valid_output_ratio":0.9987212276},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8576776974,"paraphrase-detection_parsinlu_recall_modified":0.819624643,"paraphrase-detection_parsinlu_fscore_modified":0.8275649186,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8576776974,"paraphrase-detection_parsinlu_recall":0.819624643,"paraphrase-detection_parsinlu_fscore":0.8275649186,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":7.2037914692,"extractive-qa_PQuAD_f1":0.4722142546},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.6679390306,"topic-classification_sid_recall_modified":0.6259469635,"topic-classification_sid_fscore_modified":0.6265189311,"topic-classification_sid_acc":0.7054108216,"topic-classification_sid_precision":0.6692775858,"topic-classification_sid_recall":0.6272013662,"topic-classification_sid_fscore":0.62777448,"topic-classification_sid_valid_output_ratio":0.998},"nlu_score":0.6459120734}
|
| 20 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"
|
| 21 |
{"Model Name":"deepseek-reasoner","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.6278317152,"sentiment-analysis_deepsentipers_precision_modified":0.5954545705,"sentiment-analysis_deepsentipers_recall_modified":0.6239967818,"sentiment-analysis_deepsentipers_fscore_modified":0.6073033689,"sentiment-analysis_deepsentipers_acc":0.7288666249,"sentiment-analysis_deepsentipers_precision":0.691279132,"sentiment-analysis_deepsentipers_recall":0.7244145482,"sentiment-analysis_deepsentipers_fscore":0.7050347188,"sentiment-analysis_deepsentipers_valid_output_ratio":0.8613807983},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9581074422,"sts_SynPerSTS_corrcoef":0.9581074422,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.247080201,"ner_arman_precision_mean":0.2176003178,"ner_arman_recall_mean":0.3168653159},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.3267802104,"keyword-extraction_SynKeywords_precision_mean":0.2985915051,"keyword-extraction_SynKeywords_recall_mean":0.3825181159},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.4347826087,"tone-classification_SynTone_precision_modified":0.3863322077,"tone-classification_SynTone_recall_modified":0.383431452,"tone-classification_SynTone_fscore_modified":0.3837887153,"tone-classification_SynTone_acc":0.7865168539,"tone-classification_SynTone_precision":0.6988706228,"tone-classification_SynTone_recall":0.6936231884,"tone-classification_SynTone_fscore":0.6942694738,"tone-classification_SynTone_valid_output_ratio":0.5527950311},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8547994421,"sts_FarSICK_corrcoef":0.8547994421,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.6854219949,"nli_farstail_precision_modified":0.7452254514,"nli_farstail_recall_modified":0.6884495258,"nli_farstail_fscore_modified":0.6690112082,"nli_farstail_acc":0.6858605246,"nli_farstail_precision":0.7457022432,"nli_farstail_recall":0.6888899926,"nli_farstail_fscore":0.6694392384,"nli_farstail_valid_output_ratio":0.9993606138},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.8267131595,"paraphrase-detection_parsinlu_recall_modified":0.8165238678,"paraphrase-detection_parsinlu_fscore_modified":0.8200389709,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.8267131595,"paraphrase-detection_parsinlu_recall":0.8165238678,"paraphrase-detection_parsinlu_fscore":0.8200389709,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":36.7772511848,"extractive-qa_PQuAD_f1":0.7059801524},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.608,"topic-classification_sid_precision_modified":0.5971774069,"topic-classification_sid_recall_modified":0.5095088497,"topic-classification_sid_fscore_modified":0.5160494942,"topic-classification_sid_acc":0.6333333333,"topic-classification_sid_precision":0.6220597988,"topic-classification_sid_recall":0.5307383851,"topic-classification_sid_fscore":0.5375515565,"topic-classification_sid_valid_output_ratio":0.96},"nlu_score":0.6361186163}
|
| 22 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.738403452,"sentiment-analysis_deepsentipers_precision_modified":0.706763853,"sentiment-analysis_deepsentipers_recall_modified":0.7658510846,"sentiment-analysis_deepsentipers_fscore_modified":0.726373242,"sentiment-analysis_deepsentipers_acc":0.738403452,"sentiment-analysis_deepsentipers_precision":0.706763853,"sentiment-analysis_deepsentipers_recall":0.7658510846,"sentiment-analysis_deepsentipers_fscore":0.726373242,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9445586775,"sts_SynPerSTS_corrcoef":0.9445586775,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.0031613599,"ner_arman_precision_mean":0.0024235201,"ner_arman_recall_mean":0.0047675805},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.6770186335,"tone-classification_SynTone_precision_modified":0.6041666667,"tone-classification_SynTone_recall_modified":0.5499765318,"tone-classification_SynTone_fscore_modified":0.5393404488,"tone-classification_SynTone_acc":0.6770186335,"tone-classification_SynTone_precision":0.6041666667,"tone-classification_SynTone_recall":0.5499765318,"tone-classification_SynTone_fscore":0.5393404488,"tone-classification_SynTone_valid_output_ratio":1.0},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.6898976982,"nli_farstail_precision_modified":0.752223151,"nli_farstail_recall_modified":0.691698665,"nli_farstail_fscore_modified":0.6834607357,"nli_farstail_acc":0.6898976982,"nli_farstail_precision":0.752223151,"nli_farstail_recall":0.691698665,"nli_farstail_fscore":0.6834607357,"nli_farstail_valid_output_ratio":1.0},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.872,"paraphrase-detection_parsinlu_precision_modified":0.872593554,"paraphrase-detection_parsinlu_recall_modified":0.8654426765,"paraphrase-detection_parsinlu_fscore_modified":0.8682824025,"paraphrase-detection_parsinlu_acc":0.872,"paraphrase-detection_parsinlu_precision":0.872593554,"paraphrase-detection_parsinlu_recall":0.8654426765,"paraphrase-detection_parsinlu_fscore":0.8682824025,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":51.6587677725,"extractive-qa_PQuAD_f1":0.7997294818},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.682,"topic-classification_sid_precision_modified":0.5730049986,"topic-classification_sid_recall_modified":0.5717337985,"topic-classification_sid_fscore_modified":0.5606248332,"topic-classification_sid_acc":0.7239915074,"topic-classification_sid_precision":0.6082855612,"topic-classification_sid_recall":0.6069360918,"topic-classification_sid_fscore":0.595143135,"topic-classification_sid_valid_output_ratio":0.942},"nlu_score":0.6297634971}
|
| 23 |
{"Model Name":"gpt-oss:20b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7421790723,"sentiment-analysis_deepsentipers_precision_modified":0.705605232,"sentiment-analysis_deepsentipers_recall_modified":0.7565637786,"sentiment-analysis_deepsentipers_fscore_modified":0.7108099837,"sentiment-analysis_deepsentipers_acc":0.7674288901,"sentiment-analysis_deepsentipers_precision":0.7296107642,"sentiment-analysis_deepsentipers_recall":0.7823029813,"sentiment-analysis_deepsentipers_fscore":0.7349925877,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9670981661},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9496068485,"sts_SynPerSTS_corrcoef":0.9496068485,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.1239012808,"ner_arman_precision_mean":0.1171036949,"ner_arman_recall_mean":0.1388160509},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.1297217026,"keyword-extraction_SynKeywords_precision_mean":0.1052290945,"keyword-extraction_SynKeywords_recall_mean":0.1816123188},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.801242236,"tone-classification_SynTone_precision_modified":0.6325567597,"tone-classification_SynTone_recall_modified":0.5106323908,"tone-classification_SynTone_fscore_modified":0.5211970678,"tone-classification_SynTone_acc":0.8164556962,"tone-classification_SynTone_precision":0.6445673311,"tone-classification_SynTone_recall":0.5203279425,"tone-classification_SynTone_fscore":0.5310932146,"tone-classification_SynTone_valid_output_ratio":0.9813664596},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8438423833,"sts_FarSICK_corrcoef":0.8438423833,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8135376756,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8710840658,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7844142715,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7937868553,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8240620957,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8823529412,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7945619335,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8040557668,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9872286079},"nli_farstail":{"nli_farstail_acc_modified":0.7384910486,"nli_farstail_precision_modified":0.7662350641,"nli_farstail_recall_modified":0.7395626513,"nli_farstail_fscore_modified":0.7354972179,"nli_farstail_acc":0.7399103139,"nli_farstail_precision":0.7677076491,"nli_farstail_recall":0.7409839761,"nli_farstail_fscore":0.7369107296,"nli_farstail_valid_output_ratio":0.9980818414},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.796,"paraphrase-detection_parsinlu_precision_modified":0.8275307297,"paraphrase-detection_parsinlu_recall_modified":0.7707807282,"paraphrase-detection_parsinlu_fscore_modified":0.7792394123,"paraphrase-detection_parsinlu_acc":0.8139059305,"paraphrase-detection_parsinlu_precision":0.8461459404,"paraphrase-detection_parsinlu_recall":0.788119354,"paraphrase-detection_parsinlu_fscore":0.7967683152,"paraphrase-detection_parsinlu_valid_output_ratio":0.978},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":42.0853080569,"extractive-qa_PQuAD_f1":0.747356805},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.674,"topic-classification_sid_precision_modified":0.583557125,"topic-classification_sid_recall_modified":0.5945763405,"topic-classification_sid_fscore_modified":0.5786133505,"topic-classification_sid_acc":0.6962809917,"topic-classification_sid_precision":0.6028482696,"topic-classification_sid_recall":0.6142317567,"topic-classification_sid_fscore":0.5977410646,"topic-classification_sid_valid_output_ratio":0.968},"nlu_score":0.628506628}
|
|
|
|
| 17 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7518878101,"sentiment-analysis_deepsentipers_precision_modified":0.7333601788,"sentiment-analysis_deepsentipers_recall_modified":0.7850018857,"sentiment-analysis_deepsentipers_fscore_modified":0.7214827861,"sentiment-analysis_deepsentipers_acc":0.752293578,"sentiment-analysis_deepsentipers_precision":0.7337559479,"sentiment-analysis_deepsentipers_recall":0.785425524,"sentiment-analysis_deepsentipers_fscore":0.7218721454,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9587180795,"sts_SynPerSTS_corrcoef":0.9587180795,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.5030535945,"ner_arman_precision_mean":0.4617288155,"ner_arman_recall_mean":0.5952522845},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.2645652392,"keyword-extraction_SynKeywords_precision_mean":0.2349391249,"keyword-extraction_SynKeywords_recall_mean":0.3166666667},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.5776397516,"tone-classification_SynTone_precision_modified":0.4784671979,"tone-classification_SynTone_recall_modified":0.4996211806,"tone-classification_SynTone_fscore_modified":0.4487276462,"tone-classification_SynTone_acc":0.5849056604,"tone-classification_SynTone_precision":0.4844856532,"tone-classification_SynTone_recall":0.5059057238,"tone-classification_SynTone_fscore":0.4543720191,"tone-classification_SynTone_valid_output_ratio":0.9875776398},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8602460091,"sts_FarSICK_corrcoef":0.8602460091,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.7078005115,"nli_farstail_precision_modified":0.7418983007,"nli_farstail_recall_modified":0.70995102,"nli_farstail_fscore_modified":0.6987179454,"nli_farstail_acc":0.7091607944,"nli_farstail_precision":0.7433241143,"nli_farstail_recall":0.7113154358,"nli_farstail_fscore":0.700060773,"nli_farstail_valid_output_ratio":0.9980818414},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.866,"paraphrase-detection_parsinlu_precision_modified":0.8771198269,"paraphrase-detection_parsinlu_recall_modified":0.852753978,"paraphrase-detection_parsinlu_fscore_modified":0.8595688134,"paraphrase-detection_parsinlu_acc":0.866,"paraphrase-detection_parsinlu_precision":0.8771198269,"paraphrase-detection_parsinlu_recall":0.852753978,"paraphrase-detection_parsinlu_fscore":0.8595688134,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.458,"topic-classification_sid_precision_modified":0.3928192753,"topic-classification_sid_recall_modified":0.4072039595,"topic-classification_sid_fscore_modified":0.3544902396,"topic-classification_sid_acc":0.6378830084,"topic-classification_sid_precision":0.5471020547,"topic-classification_sid_recall":0.5671364338,"topic-classification_sid_fscore":0.4937189966,"topic-classification_sid_valid_output_ratio":0.718},"nlu_score":0.6552152029}
|
| 18 |
{"Model Name":"Qwen3-14B","thinking_method":"β","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7459546926,"sentiment-analysis_deepsentipers_precision_modified":0.696002467,"sentiment-analysis_deepsentipers_recall_modified":0.7725731976,"sentiment-analysis_deepsentipers_fscore_modified":0.7160207999,"sentiment-analysis_deepsentipers_acc":0.7459546926,"sentiment-analysis_deepsentipers_precision":0.696002467,"sentiment-analysis_deepsentipers_recall":0.7725731976,"sentiment-analysis_deepsentipers_fscore":0.7160207999,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9603899338,"sts_SynPerSTS_corrcoef":0.9603899338,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.492138652,"ner_arman_precision_mean":0.4553833929,"ner_arman_recall_mean":0.5783671037},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.233766167,"keyword-extraction_SynKeywords_precision_mean":0.1893302534,"keyword-extraction_SynKeywords_recall_mean":0.3297101449},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.4968944099,"tone-classification_SynTone_precision_modified":0.5363835928,"tone-classification_SynTone_recall_modified":0.5772956136,"tone-classification_SynTone_fscore_modified":0.4755414981,"tone-classification_SynTone_acc":0.4968944099,"tone-classification_SynTone_precision":0.5363835928,"tone-classification_SynTone_recall":0.5772956136,"tone-classification_SynTone_fscore":0.4755414981,"tone-classification_SynTone_valid_output_ratio":1.0},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8509006434,"sts_FarSICK_corrcoef":0.8509006434,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.7007672634,"nli_farstail_precision_modified":0.7596784307,"nli_farstail_recall_modified":0.7039816989,"nli_farstail_fscore_modified":0.6834876952,"nli_farstail_acc":0.7007672634,"nli_farstail_precision":0.7596784307,"nli_farstail_recall":0.7039816989,"nli_farstail_fscore":0.6834876952,"nli_farstail_valid_output_ratio":1.0},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.854,"paraphrase-detection_parsinlu_precision_modified":0.8742015099,"paraphrase-detection_parsinlu_recall_modified":0.8365157079,"paraphrase-detection_parsinlu_fscore_modified":0.8449177639,"paraphrase-detection_parsinlu_acc":0.854,"paraphrase-detection_parsinlu_precision":0.8742015099,"paraphrase-detection_parsinlu_recall":0.8365157079,"paraphrase-detection_parsinlu_fscore":0.8449177639,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":11.9431279621,"extractive-qa_PQuAD_f1":0.5054306037},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.636,"topic-classification_sid_precision_modified":0.6248180645,"topic-classification_sid_recall_modified":0.5200071748,"topic-classification_sid_fscore_modified":0.5212205085,"topic-classification_sid_acc":0.636,"topic-classification_sid_precision":0.6248180645,"topic-classification_sid_recall":0.5200071748,"topic-classification_sid_fscore":0.5212205085,"topic-classification_sid_valid_output_ratio":1.0},"nlu_score":0.6460328733}
|
| 19 |
{"Model Name":"gpt-4o-mini","thinking_method":"β","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7713052859,"sentiment-analysis_deepsentipers_precision_modified":0.7288724929,"sentiment-analysis_deepsentipers_recall_modified":0.803675275,"sentiment-analysis_deepsentipers_fscore_modified":0.753174206,"sentiment-analysis_deepsentipers_acc":0.7713052859,"sentiment-analysis_deepsentipers_precision":0.7288724929,"sentiment-analysis_deepsentipers_recall":0.803675275,"sentiment-analysis_deepsentipers_fscore":0.753174206,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9590342543,"sts_SynPerSTS_corrcoef":0.9590342543,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.0374396958,"ner_arman_precision_mean":0.0342669845,"ner_arman_recall_mean":0.0448549861},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.2783400189,"keyword-extraction_SynKeywords_precision_mean":0.2250927598,"keyword-extraction_SynKeywords_recall_mean":0.3842391304},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7478203083,"tone-classification_SynTone_recall_modified":0.6482356204,"tone-classification_SynTone_fscore_modified":0.6776329308,"tone-classification_SynTone_acc":0.850931677,"tone-classification_SynTone_precision":0.7478203083,"tone-classification_SynTone_recall":0.6482356204,"tone-classification_SynTone_fscore":0.6776329308,"tone-classification_SynTone_valid_output_ratio":1.0},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8671704383,"sts_FarSICK_corrcoef":0.8671704383,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.7647058824,"nli_farstail_precision_modified":0.7814499507,"nli_farstail_recall_modified":0.7670439826,"nli_farstail_fscore_modified":0.7573199649,"nli_farstail_acc":0.7656850192,"nli_farstail_precision":0.7824505269,"nli_farstail_recall":0.7680261132,"nli_farstail_fscore":0.7582896447,"nli_farstail_valid_output_ratio":0.9987212276},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8576776974,"paraphrase-detection_parsinlu_recall_modified":0.819624643,"paraphrase-detection_parsinlu_fscore_modified":0.8275649186,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8576776974,"paraphrase-detection_parsinlu_recall":0.819624643,"paraphrase-detection_parsinlu_fscore":0.8275649186,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":7.2037914692,"extractive-qa_PQuAD_f1":0.4722142546},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.6679390306,"topic-classification_sid_recall_modified":0.6259469635,"topic-classification_sid_fscore_modified":0.6265189311,"topic-classification_sid_acc":0.7054108216,"topic-classification_sid_precision":0.6692775858,"topic-classification_sid_recall":0.6272013662,"topic-classification_sid_fscore":0.62777448,"topic-classification_sid_valid_output_ratio":0.998},"nlu_score":0.6459120734}
|
| 20 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"120000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7842502697,"sentiment-analysis_deepsentipers_precision_modified":0.7475186413,"sentiment-analysis_deepsentipers_recall_modified":0.8040239865,"sentiment-analysis_deepsentipers_fscore_modified":0.7603028067,"sentiment-analysis_deepsentipers_acc":0.7842502697,"sentiment-analysis_deepsentipers_precision":0.7475186413,"sentiment-analysis_deepsentipers_recall":0.8040239865,"sentiment-analysis_deepsentipers_fscore":0.7603028067,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9492493767,"sts_SynPerSTS_corrcoef":0.9492493767,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.090559262,"ner_arman_precision_mean":0.0812673818,"ner_arman_recall_mean":0.1104290822},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.1543389439,"keyword-extraction_SynKeywords_precision_mean":0.1301371778,"keyword-extraction_SynKeywords_recall_mean":0.2038949275},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.8447204969,"tone-classification_SynTone_precision_modified":0.7777640248,"tone-classification_SynTone_recall_modified":0.5874594641,"tone-classification_SynTone_fscore_modified":0.5990203378,"tone-classification_SynTone_acc":0.8447204969,"tone-classification_SynTone_precision":0.7777640248,"tone-classification_SynTone_recall":0.5874594641,"tone-classification_SynTone_fscore":0.5990203378,"tone-classification_SynTone_valid_output_ratio":1.0},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8412365875,"sts_FarSICK_corrcoef":0.8412365875,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.840357599,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8909249564,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8134328358,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8241066053,"paraphrase-detection_FarsiParaphraseDetection_acc":0.840357599,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8909249564,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8134328358,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8241066053,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.7685421995,"nli_farstail_precision_modified":0.7858737557,"nli_farstail_recall_modified":0.7699859437,"nli_farstail_fscore_modified":0.7654192549,"nli_farstail_acc":0.7685421995,"nli_farstail_precision":0.7858737557,"nli_farstail_recall":0.7699859437,"nli_farstail_fscore":0.7654192549,"nli_farstail_valid_output_ratio":1.0},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.792,"paraphrase-detection_parsinlu_precision_modified":0.8370839341,"paraphrase-detection_parsinlu_recall_modified":0.7632802938,"paraphrase-detection_parsinlu_fscore_modified":0.7695689166,"paraphrase-detection_parsinlu_acc":0.792,"paraphrase-detection_parsinlu_precision":0.8370839341,"paraphrase-detection_parsinlu_recall":0.7632802938,"paraphrase-detection_parsinlu_fscore":0.7695689166,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":38.5781990521,"extractive-qa_PQuAD_f1":0.7377983931},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.722,"topic-classification_sid_precision_modified":0.6456808245,"topic-classification_sid_recall_modified":0.6300099701,"topic-classification_sid_fscore_modified":0.6126876785,"topic-classification_sid_acc":0.722,"topic-classification_sid_precision":0.6456808245,"topic-classification_sid_recall":0.6300099701,"topic-classification_sid_fscore":0.6126876785,"topic-classification_sid_valid_output_ratio":1.0},"nlu_score":0.6458443785}
|
| 21 |
{"Model Name":"deepseek-reasoner","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.6278317152,"sentiment-analysis_deepsentipers_precision_modified":0.5954545705,"sentiment-analysis_deepsentipers_recall_modified":0.6239967818,"sentiment-analysis_deepsentipers_fscore_modified":0.6073033689,"sentiment-analysis_deepsentipers_acc":0.7288666249,"sentiment-analysis_deepsentipers_precision":0.691279132,"sentiment-analysis_deepsentipers_recall":0.7244145482,"sentiment-analysis_deepsentipers_fscore":0.7050347188,"sentiment-analysis_deepsentipers_valid_output_ratio":0.8613807983},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9581074422,"sts_SynPerSTS_corrcoef":0.9581074422,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.247080201,"ner_arman_precision_mean":0.2176003178,"ner_arman_recall_mean":0.3168653159},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.3267802104,"keyword-extraction_SynKeywords_precision_mean":0.2985915051,"keyword-extraction_SynKeywords_recall_mean":0.3825181159},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.4347826087,"tone-classification_SynTone_precision_modified":0.3863322077,"tone-classification_SynTone_recall_modified":0.383431452,"tone-classification_SynTone_fscore_modified":0.3837887153,"tone-classification_SynTone_acc":0.7865168539,"tone-classification_SynTone_precision":0.6988706228,"tone-classification_SynTone_recall":0.6936231884,"tone-classification_SynTone_fscore":0.6942694738,"tone-classification_SynTone_valid_output_ratio":0.5527950311},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8547994421,"sts_FarSICK_corrcoef":0.8547994421,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.6854219949,"nli_farstail_precision_modified":0.7452254514,"nli_farstail_recall_modified":0.6884495258,"nli_farstail_fscore_modified":0.6690112082,"nli_farstail_acc":0.6858605246,"nli_farstail_precision":0.7457022432,"nli_farstail_recall":0.6888899926,"nli_farstail_fscore":0.6694392384,"nli_farstail_valid_output_ratio":0.9993606138},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.8267131595,"paraphrase-detection_parsinlu_recall_modified":0.8165238678,"paraphrase-detection_parsinlu_fscore_modified":0.8200389709,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.8267131595,"paraphrase-detection_parsinlu_recall":0.8165238678,"paraphrase-detection_parsinlu_fscore":0.8200389709,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":36.7772511848,"extractive-qa_PQuAD_f1":0.7059801524},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.608,"topic-classification_sid_precision_modified":0.5971774069,"topic-classification_sid_recall_modified":0.5095088497,"topic-classification_sid_fscore_modified":0.5160494942,"topic-classification_sid_acc":0.6333333333,"topic-classification_sid_precision":0.6220597988,"topic-classification_sid_recall":0.5307383851,"topic-classification_sid_fscore":0.5375515565,"topic-classification_sid_valid_output_ratio":0.96},"nlu_score":0.6361186163}
|
| 22 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.738403452,"sentiment-analysis_deepsentipers_precision_modified":0.706763853,"sentiment-analysis_deepsentipers_recall_modified":0.7658510846,"sentiment-analysis_deepsentipers_fscore_modified":0.726373242,"sentiment-analysis_deepsentipers_acc":0.738403452,"sentiment-analysis_deepsentipers_precision":0.706763853,"sentiment-analysis_deepsentipers_recall":0.7658510846,"sentiment-analysis_deepsentipers_fscore":0.726373242,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9445586775,"sts_SynPerSTS_corrcoef":0.9445586775,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.0031613599,"ner_arman_precision_mean":0.0024235201,"ner_arman_recall_mean":0.0047675805},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.6770186335,"tone-classification_SynTone_precision_modified":0.6041666667,"tone-classification_SynTone_recall_modified":0.5499765318,"tone-classification_SynTone_fscore_modified":0.5393404488,"tone-classification_SynTone_acc":0.6770186335,"tone-classification_SynTone_precision":0.6041666667,"tone-classification_SynTone_recall":0.5499765318,"tone-classification_SynTone_fscore":0.5393404488,"tone-classification_SynTone_valid_output_ratio":1.0},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0},"nli_farstail":{"nli_farstail_acc_modified":0.6898976982,"nli_farstail_precision_modified":0.752223151,"nli_farstail_recall_modified":0.691698665,"nli_farstail_fscore_modified":0.6834607357,"nli_farstail_acc":0.6898976982,"nli_farstail_precision":0.752223151,"nli_farstail_recall":0.691698665,"nli_farstail_fscore":0.6834607357,"nli_farstail_valid_output_ratio":1.0},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.872,"paraphrase-detection_parsinlu_precision_modified":0.872593554,"paraphrase-detection_parsinlu_recall_modified":0.8654426765,"paraphrase-detection_parsinlu_fscore_modified":0.8682824025,"paraphrase-detection_parsinlu_acc":0.872,"paraphrase-detection_parsinlu_precision":0.872593554,"paraphrase-detection_parsinlu_recall":0.8654426765,"paraphrase-detection_parsinlu_fscore":0.8682824025,"paraphrase-detection_parsinlu_valid_output_ratio":1.0},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":51.6587677725,"extractive-qa_PQuAD_f1":0.7997294818},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.682,"topic-classification_sid_precision_modified":0.5730049986,"topic-classification_sid_recall_modified":0.5717337985,"topic-classification_sid_fscore_modified":0.5606248332,"topic-classification_sid_acc":0.7239915074,"topic-classification_sid_precision":0.6082855612,"topic-classification_sid_recall":0.6069360918,"topic-classification_sid_fscore":0.595143135,"topic-classification_sid_valid_output_ratio":0.942},"nlu_score":0.6297634971}
|
| 23 |
{"Model Name":"gpt-oss:20b","thinking_method":"βοΈ","model_url":"https_google.com","parameters_count":"20000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers":{"sentiment-analysis_deepsentipers_acc_modified":0.7421790723,"sentiment-analysis_deepsentipers_precision_modified":0.705605232,"sentiment-analysis_deepsentipers_recall_modified":0.7565637786,"sentiment-analysis_deepsentipers_fscore_modified":0.7108099837,"sentiment-analysis_deepsentipers_acc":0.7674288901,"sentiment-analysis_deepsentipers_precision":0.7296107642,"sentiment-analysis_deepsentipers_recall":0.7823029813,"sentiment-analysis_deepsentipers_fscore":0.7349925877,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9670981661},"sts_SynPerSTS":{"sts_SynPerSTS_corrcoef_modified":0.9496068485,"sts_SynPerSTS_corrcoef":0.9496068485,"sts_SynPerSTS_valid_output_ratio":1.0},"ner_arman":{"ner_arman_f1_mean":0.1239012808,"ner_arman_precision_mean":0.1171036949,"ner_arman_recall_mean":0.1388160509},"keyword-extraction_SynKeywords":{"keyword-extraction_SynKeywords_f1_mean":0.1297217026,"keyword-extraction_SynKeywords_precision_mean":0.1052290945,"keyword-extraction_SynKeywords_recall_mean":0.1816123188},"tone-classification_SynTone":{"tone-classification_SynTone_acc_modified":0.801242236,"tone-classification_SynTone_precision_modified":0.6325567597,"tone-classification_SynTone_recall_modified":0.5106323908,"tone-classification_SynTone_fscore_modified":0.5211970678,"tone-classification_SynTone_acc":0.8164556962,"tone-classification_SynTone_precision":0.6445673311,"tone-classification_SynTone_recall":0.5203279425,"tone-classification_SynTone_fscore":0.5310932146,"tone-classification_SynTone_valid_output_ratio":0.9813664596},"sts_FarSICK":{"sts_FarSICK_corrcoef_modified":0.8438423833,"sts_FarSICK_corrcoef":0.8438423833,"sts_FarSICK_valid_output_ratio":1.0},"paraphrase-detection_FarsiParaphraseDetection":{"paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8135376756,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8710840658,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7844142715,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7937868553,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8240620957,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8823529412,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7945619335,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8040557668,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9872286079},"nli_farstail":{"nli_farstail_acc_modified":0.7384910486,"nli_farstail_precision_modified":0.7662350641,"nli_farstail_recall_modified":0.7395626513,"nli_farstail_fscore_modified":0.7354972179,"nli_farstail_acc":0.7399103139,"nli_farstail_precision":0.7677076491,"nli_farstail_recall":0.7409839761,"nli_farstail_fscore":0.7369107296,"nli_farstail_valid_output_ratio":0.9980818414},"paraphrase-detection_parsinlu":{"paraphrase-detection_parsinlu_acc_modified":0.796,"paraphrase-detection_parsinlu_precision_modified":0.8275307297,"paraphrase-detection_parsinlu_recall_modified":0.7707807282,"paraphrase-detection_parsinlu_fscore_modified":0.7792394123,"paraphrase-detection_parsinlu_acc":0.8139059305,"paraphrase-detection_parsinlu_precision":0.8461459404,"paraphrase-detection_parsinlu_recall":0.788119354,"paraphrase-detection_parsinlu_fscore":0.7967683152,"paraphrase-detection_parsinlu_valid_output_ratio":0.978},"extractive-qa_PQuAD":{"extractive-qa_PQuAD_exact_match":42.0853080569,"extractive-qa_PQuAD_f1":0.747356805},"topic-classification_sid":{"topic-classification_sid_acc_modified":0.674,"topic-classification_sid_precision_modified":0.583557125,"topic-classification_sid_recall_modified":0.5945763405,"topic-classification_sid_fscore_modified":0.5786133505,"topic-classification_sid_acc":0.6962809917,"topic-classification_sid_precision":0.6028482696,"topic-classification_sid_recall":0.6142317567,"topic-classification_sid_fscore":0.5977410646,"topic-classification_sid_valid_output_ratio":0.968},"nlu_score":0.628506628}
|
leaderboard/boards_data/question-generation_PersianQA.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2149535143,"question-generation_PersianQA_rougeL_recall":0.3019561885,"question-generation_PersianQA_rougeL_f1_score":0.2405115465,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2602516122,"question-generation_PersianQA_rougeL_recall":0.3803807526,"question-generation_PersianQA_rougeL_f1_score":0.2967852302,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2221178335,"question-generation_PersianQA_rougeL_recall":0.335306645,"question-generation_PersianQA_rougeL_f1_score":0.2552875817,"nlg_score":0.1643361642}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1365997259,"question-generation_PersianQA_rougeL_recall":0.3257934111,"question-generation_PersianQA_rougeL_f1_score":0.1803398036,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2576021626,"question-generation_PersianQA_rougeL_recall":0.3924501003,"question-generation_PersianQA_rougeL_f1_score":0.2985826349,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":null,"question-generation_PersianQA_rougeL_recall":null,"question-generation_PersianQA_rougeL_f1_score":null,"nlg_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2149535143,"question-generation_PersianQA_rougeL_recall":0.3019561885,"question-generation_PersianQA_rougeL_f1_score":0.2405115465,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2602516122,"question-generation_PersianQA_rougeL_recall":0.3803807526,"question-generation_PersianQA_rougeL_f1_score":0.2967852302,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2221178335,"question-generation_PersianQA_rougeL_recall":0.335306645,"question-generation_PersianQA_rougeL_f1_score":0.2552875817,"nlg_score":0.1643361642}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2140114675,"question-generation_PersianQA_rougeL_recall":0.3382796762,"question-generation_PersianQA_rougeL_f1_score":0.2507426631,"nlg_score":0.1538910531}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1365997259,"question-generation_PersianQA_rougeL_recall":0.3257934111,"question-generation_PersianQA_rougeL_f1_score":0.1803398036,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2576021626,"question-generation_PersianQA_rougeL_recall":0.3924501003,"question-generation_PersianQA_rougeL_f1_score":0.2985826349,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":null,"question-generation_PersianQA_rougeL_recall":null,"question-generation_PersianQA_rougeL_f1_score":null,"nlg_score":null}
|
leaderboard/boards_data/sentiment-analysis_deepsentipers.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.738403452,"sentiment-analysis_deepsentipers_precision_modified":0.706763853,"sentiment-analysis_deepsentipers_recall_modified":0.7658510846,"sentiment-analysis_deepsentipers_fscore_modified":0.726373242,"sentiment-analysis_deepsentipers_acc":0.738403452,"sentiment-analysis_deepsentipers_precision":0.706763853,"sentiment-analysis_deepsentipers_recall":0.7658510846,"sentiment-analysis_deepsentipers_fscore":0.726373242,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.806364617,"sentiment-analysis_deepsentipers_precision_modified":0.7705568258,"sentiment-analysis_deepsentipers_recall_modified":0.8234753765,"sentiment-analysis_deepsentipers_fscore_modified":0.7802386366,"sentiment-analysis_deepsentipers_acc":0.806364617,"sentiment-analysis_deepsentipers_precision":0.7705568258,"sentiment-analysis_deepsentipers_recall":0.8234753765,"sentiment-analysis_deepsentipers_fscore":0.7802386366,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7885652643,"sentiment-analysis_deepsentipers_precision_modified":0.757057239,"sentiment-analysis_deepsentipers_recall_modified":0.8134053732,"sentiment-analysis_deepsentipers_fscore_modified":0.7618040556,"sentiment-analysis_deepsentipers_acc":0.7911255411,"sentiment-analysis_deepsentipers_precision":0.7595152171,"sentiment-analysis_deepsentipers_recall":0.8160462998,"sentiment-analysis_deepsentipers_fscore":0.7642774453,"sentiment-analysis_deepsentipers_valid_output_ratio":0.996763754,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7518878101,"sentiment-analysis_deepsentipers_precision_modified":0.7333601788,"sentiment-analysis_deepsentipers_recall_modified":0.7850018857,"sentiment-analysis_deepsentipers_fscore_modified":0.7214827861,"sentiment-analysis_deepsentipers_acc":0.752293578,"sentiment-analysis_deepsentipers_precision":0.7337559479,"sentiment-analysis_deepsentipers_recall":0.785425524,"sentiment-analysis_deepsentipers_fscore":0.7218721454,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7661826532,"sentiment-analysis_deepsentipers_recall_modified":0.8089861144,"sentiment-analysis_deepsentipers_fscore_modified":0.7830417049,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7661826532,"sentiment-analysis_deepsentipers_recall":0.8089861144,"sentiment-analysis_deepsentipers_fscore":0.7830417049,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":null,"sentiment-analysis_deepsentipers_precision_modified":null,"sentiment-analysis_deepsentipers_recall_modified":null,"sentiment-analysis_deepsentipers_fscore_modified":null,"sentiment-analysis_deepsentipers_acc":null,"sentiment-analysis_deepsentipers_precision":null,"sentiment-analysis_deepsentipers_recall":null,"sentiment-analysis_deepsentipers_fscore":null,"sentiment-analysis_deepsentipers_valid_output_ratio":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.738403452,"sentiment-analysis_deepsentipers_precision_modified":0.706763853,"sentiment-analysis_deepsentipers_recall_modified":0.7658510846,"sentiment-analysis_deepsentipers_fscore_modified":0.726373242,"sentiment-analysis_deepsentipers_acc":0.738403452,"sentiment-analysis_deepsentipers_precision":0.706763853,"sentiment-analysis_deepsentipers_recall":0.7658510846,"sentiment-analysis_deepsentipers_fscore":0.726373242,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.806364617,"sentiment-analysis_deepsentipers_precision_modified":0.7705568258,"sentiment-analysis_deepsentipers_recall_modified":0.8234753765,"sentiment-analysis_deepsentipers_fscore_modified":0.7802386366,"sentiment-analysis_deepsentipers_acc":0.806364617,"sentiment-analysis_deepsentipers_precision":0.7705568258,"sentiment-analysis_deepsentipers_recall":0.8234753765,"sentiment-analysis_deepsentipers_fscore":0.7802386366,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7885652643,"sentiment-analysis_deepsentipers_precision_modified":0.757057239,"sentiment-analysis_deepsentipers_recall_modified":0.8134053732,"sentiment-analysis_deepsentipers_fscore_modified":0.7618040556,"sentiment-analysis_deepsentipers_acc":0.7911255411,"sentiment-analysis_deepsentipers_precision":0.7595152171,"sentiment-analysis_deepsentipers_recall":0.8160462998,"sentiment-analysis_deepsentipers_fscore":0.7642774453,"sentiment-analysis_deepsentipers_valid_output_ratio":0.996763754,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7842502697,"sentiment-analysis_deepsentipers_precision_modified":0.7475186413,"sentiment-analysis_deepsentipers_recall_modified":0.8040239865,"sentiment-analysis_deepsentipers_fscore_modified":0.7603028067,"sentiment-analysis_deepsentipers_acc":0.7842502697,"sentiment-analysis_deepsentipers_precision":0.7475186413,"sentiment-analysis_deepsentipers_recall":0.8040239865,"sentiment-analysis_deepsentipers_fscore":0.7603028067,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7518878101,"sentiment-analysis_deepsentipers_precision_modified":0.7333601788,"sentiment-analysis_deepsentipers_recall_modified":0.7850018857,"sentiment-analysis_deepsentipers_fscore_modified":0.7214827861,"sentiment-analysis_deepsentipers_acc":0.752293578,"sentiment-analysis_deepsentipers_precision":0.7337559479,"sentiment-analysis_deepsentipers_recall":0.785425524,"sentiment-analysis_deepsentipers_fscore":0.7218721454,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7661826532,"sentiment-analysis_deepsentipers_recall_modified":0.8089861144,"sentiment-analysis_deepsentipers_fscore_modified":0.7830417049,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7661826532,"sentiment-analysis_deepsentipers_recall":0.8089861144,"sentiment-analysis_deepsentipers_fscore":0.7830417049,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":null,"sentiment-analysis_deepsentipers_precision_modified":null,"sentiment-analysis_deepsentipers_recall_modified":null,"sentiment-analysis_deepsentipers_fscore_modified":null,"sentiment-analysis_deepsentipers_acc":null,"sentiment-analysis_deepsentipers_precision":null,"sentiment-analysis_deepsentipers_recall":null,"sentiment-analysis_deepsentipers_fscore":null,"sentiment-analysis_deepsentipers_valid_output_ratio":null,"nlu_score":null}
|
leaderboard/boards_data/sts_FarSICK.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8521163575,"sts_FarSICK_corrcoef":0.8521163575,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8404353896,"sts_FarSICK_corrcoef":0.8404353896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8602460091,"sts_FarSICK_corrcoef":0.8602460091,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8624442565,"sts_FarSICK_corrcoef":0.8624442565,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":null,"sts_FarSICK_corrcoef":null,"sts_FarSICK_valid_output_ratio":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8521163575,"sts_FarSICK_corrcoef":0.8521163575,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8404353896,"sts_FarSICK_corrcoef":0.8404353896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8412365875,"sts_FarSICK_corrcoef":0.8412365875,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8602460091,"sts_FarSICK_corrcoef":0.8602460091,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8624442565,"sts_FarSICK_corrcoef":0.8624442565,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":null,"sts_FarSICK_corrcoef":null,"sts_FarSICK_valid_output_ratio":null,"nlu_score":null}
|
leaderboard/boards_data/sts_SynPerSTS.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9445586775,"sts_SynPerSTS_corrcoef":0.9445586775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9405179912,"sts_SynPerSTS_corrcoef":0.9405179912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9417676956,"sts_SynPerSTS_corrcoef":0.9417676956,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9587180795,"sts_SynPerSTS_corrcoef":0.9587180795,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9620104912,"sts_SynPerSTS_corrcoef":0.9620104912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":null,"sts_SynPerSTS_corrcoef":null,"sts_SynPerSTS_valid_output_ratio":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9445586775,"sts_SynPerSTS_corrcoef":0.9445586775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9405179912,"sts_SynPerSTS_corrcoef":0.9405179912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9417676956,"sts_SynPerSTS_corrcoef":0.9417676956,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9492493767,"sts_SynPerSTS_corrcoef":0.9492493767,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9587180795,"sts_SynPerSTS_corrcoef":0.9587180795,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9620104912,"sts_SynPerSTS_corrcoef":0.9620104912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":null,"sts_SynPerSTS_corrcoef":null,"sts_SynPerSTS_valid_output_ratio":null,"nlu_score":null}
|
leaderboard/boards_data/summarization_PnSummary.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1227039295,"summarization_PnSummary_rougeL_recall":0.4315497639,"summarization_PnSummary_rougeL_f1_score":0.1856517383,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0917305447,"summarization_PnSummary_rougeL_recall":0.3893845098,"summarization_PnSummary_rougeL_f1_score":0.1447284086,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0830986853,"summarization_PnSummary_rougeL_recall":0.3565850313,"summarization_PnSummary_rougeL_f1_score":0.1308633101,"nlg_score":0.1643361642}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1072486168,"summarization_PnSummary_rougeL_recall":0.3966587345,"summarization_PnSummary_rougeL_f1_score":0.1627029568,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1234743619,"summarization_PnSummary_rougeL_recall":0.376111826,"summarization_PnSummary_rougeL_f1_score":0.1808600563,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":null,"summarization_PnSummary_rougeL_recall":null,"summarization_PnSummary_rougeL_f1_score":null,"nlg_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1227039295,"summarization_PnSummary_rougeL_recall":0.4315497639,"summarization_PnSummary_rougeL_f1_score":0.1856517383,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0917305447,"summarization_PnSummary_rougeL_recall":0.3893845098,"summarization_PnSummary_rougeL_f1_score":0.1447284086,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0830986853,"summarization_PnSummary_rougeL_recall":0.3565850313,"summarization_PnSummary_rougeL_f1_score":0.1308633101,"nlg_score":0.1643361642}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0838678728,"summarization_PnSummary_rougeL_recall":0.3842899041,"summarization_PnSummary_rougeL_f1_score":0.1338531153,"nlg_score":0.1538910531}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1072486168,"summarization_PnSummary_rougeL_recall":0.3966587345,"summarization_PnSummary_rougeL_f1_score":0.1627029568,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1234743619,"summarization_PnSummary_rougeL_recall":0.376111826,"summarization_PnSummary_rougeL_f1_score":0.1808600563,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":null,"summarization_PnSummary_rougeL_recall":null,"summarization_PnSummary_rougeL_f1_score":null,"nlg_score":null}
|
leaderboard/boards_data/summarization_SamSUM-fa.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1591262985,"summarization_SamSUM-fa_rougeL_recall":0.4163090512,"summarization_SamSUM-fa_rougeL_f1_score":0.2208876443,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1553547312,"summarization_SamSUM-fa_rougeL_recall":0.3357735524,"summarization_SamSUM-fa_rougeL_f1_score":0.2045988783,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1387162197,"summarization_SamSUM-fa_rougeL_recall":0.3472256524,"summarization_SamSUM-fa_rougeL_f1_score":0.1899415698,"nlg_score":0.1643361642}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1468362777,"summarization_SamSUM-fa_rougeL_recall":0.3858089513,"summarization_SamSUM-fa_rougeL_f1_score":0.2041300257,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1681357159,"summarization_SamSUM-fa_rougeL_recall":0.3567938895,"summarization_SamSUM-fa_rougeL_f1_score":0.2189693454,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":null,"summarization_SamSUM-fa_rougeL_recall":null,"summarization_SamSUM-fa_rougeL_f1_score":null,"nlg_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1591262985,"summarization_SamSUM-fa_rougeL_recall":0.4163090512,"summarization_SamSUM-fa_rougeL_f1_score":0.2208876443,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1553547312,"summarization_SamSUM-fa_rougeL_recall":0.3357735524,"summarization_SamSUM-fa_rougeL_f1_score":0.2045988783,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1387162197,"summarization_SamSUM-fa_rougeL_recall":0.3472256524,"summarization_SamSUM-fa_rougeL_f1_score":0.1899415698,"nlg_score":0.1643361642}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1197038513,"summarization_SamSUM-fa_rougeL_recall":0.3691802463,"summarization_SamSUM-fa_rougeL_f1_score":0.1737939492,"nlg_score":0.1538910531}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1468362777,"summarization_SamSUM-fa_rougeL_recall":0.3858089513,"summarization_SamSUM-fa_rougeL_f1_score":0.2041300257,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1681357159,"summarization_SamSUM-fa_rougeL_recall":0.3567938895,"summarization_SamSUM-fa_rougeL_f1_score":0.2189693454,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":null,"summarization_SamSUM-fa_rougeL_recall":null,"summarization_SamSUM-fa_rougeL_f1_score":null,"nlg_score":null}
|
leaderboard/boards_data/tone-classification_SynTone.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6770186335,"tone-classification_SynTone_precision_modified":0.6041666667,"tone-classification_SynTone_recall_modified":0.5499765318,"tone-classification_SynTone_fscore_modified":0.5393404488,"tone-classification_SynTone_acc":0.6770186335,"tone-classification_SynTone_precision":0.6041666667,"tone-classification_SynTone_recall":0.5499765318,"tone-classification_SynTone_fscore":0.5393404488,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8944099379,"tone-classification_SynTone_precision_modified":0.8288135593,"tone-classification_SynTone_recall_modified":0.6730414747,"tone-classification_SynTone_fscore_modified":0.7238560859,"tone-classification_SynTone_acc":0.8944099379,"tone-classification_SynTone_precision":0.8288135593,"tone-classification_SynTone_recall":0.6730414747,"tone-classification_SynTone_fscore":0.7238560859,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8447204969,"tone-classification_SynTone_precision_modified":0.6820625483,"tone-classification_SynTone_recall_modified":0.5681634349,"tone-classification_SynTone_fscore_modified":0.579448271,"tone-classification_SynTone_acc":0.85,"tone-classification_SynTone_precision":0.6863254393,"tone-classification_SynTone_recall":0.5717144564,"tone-classification_SynTone_fscore":0.5830698227,"tone-classification_SynTone_valid_output_ratio":0.9937888199,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5776397516,"tone-classification_SynTone_precision_modified":0.4784671979,"tone-classification_SynTone_recall_modified":0.4996211806,"tone-classification_SynTone_fscore_modified":0.4487276462,"tone-classification_SynTone_acc":0.5849056604,"tone-classification_SynTone_precision":0.4844856532,"tone-classification_SynTone_recall":0.5059057238,"tone-classification_SynTone_fscore":0.4543720191,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.8396595026,"tone-classification_SynTone_recall_modified":0.7058371736,"tone-classification_SynTone_fscore_modified":0.748745873,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.8396595026,"tone-classification_SynTone_recall":0.7058371736,"tone-classification_SynTone_fscore":0.748745873,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":null,"tone-classification_SynTone_precision_modified":null,"tone-classification_SynTone_recall_modified":null,"tone-classification_SynTone_fscore_modified":null,"tone-classification_SynTone_acc":null,"tone-classification_SynTone_precision":null,"tone-classification_SynTone_recall":null,"tone-classification_SynTone_fscore":null,"tone-classification_SynTone_valid_output_ratio":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6770186335,"tone-classification_SynTone_precision_modified":0.6041666667,"tone-classification_SynTone_recall_modified":0.5499765318,"tone-classification_SynTone_fscore_modified":0.5393404488,"tone-classification_SynTone_acc":0.6770186335,"tone-classification_SynTone_precision":0.6041666667,"tone-classification_SynTone_recall":0.5499765318,"tone-classification_SynTone_fscore":0.5393404488,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8944099379,"tone-classification_SynTone_precision_modified":0.8288135593,"tone-classification_SynTone_recall_modified":0.6730414747,"tone-classification_SynTone_fscore_modified":0.7238560859,"tone-classification_SynTone_acc":0.8944099379,"tone-classification_SynTone_precision":0.8288135593,"tone-classification_SynTone_recall":0.6730414747,"tone-classification_SynTone_fscore":0.7238560859,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8447204969,"tone-classification_SynTone_precision_modified":0.6820625483,"tone-classification_SynTone_recall_modified":0.5681634349,"tone-classification_SynTone_fscore_modified":0.579448271,"tone-classification_SynTone_acc":0.85,"tone-classification_SynTone_precision":0.6863254393,"tone-classification_SynTone_recall":0.5717144564,"tone-classification_SynTone_fscore":0.5830698227,"tone-classification_SynTone_valid_output_ratio":0.9937888199,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8447204969,"tone-classification_SynTone_precision_modified":0.7777640248,"tone-classification_SynTone_recall_modified":0.5874594641,"tone-classification_SynTone_fscore_modified":0.5990203378,"tone-classification_SynTone_acc":0.8447204969,"tone-classification_SynTone_precision":0.7777640248,"tone-classification_SynTone_recall":0.5874594641,"tone-classification_SynTone_fscore":0.5990203378,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5776397516,"tone-classification_SynTone_precision_modified":0.4784671979,"tone-classification_SynTone_recall_modified":0.4996211806,"tone-classification_SynTone_fscore_modified":0.4487276462,"tone-classification_SynTone_acc":0.5849056604,"tone-classification_SynTone_precision":0.4844856532,"tone-classification_SynTone_recall":0.5059057238,"tone-classification_SynTone_fscore":0.4543720191,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.8396595026,"tone-classification_SynTone_recall_modified":0.7058371736,"tone-classification_SynTone_fscore_modified":0.748745873,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.8396595026,"tone-classification_SynTone_recall":0.7058371736,"tone-classification_SynTone_fscore":0.748745873,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":null,"tone-classification_SynTone_precision_modified":null,"tone-classification_SynTone_recall_modified":null,"tone-classification_SynTone_fscore_modified":null,"tone-classification_SynTone_acc":null,"tone-classification_SynTone_precision":null,"tone-classification_SynTone_recall":null,"tone-classification_SynTone_fscore":null,"tone-classification_SynTone_valid_output_ratio":null,"nlu_score":null}
|
leaderboard/boards_data/topic-classification_sid.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.682,"topic-classification_sid_precision_modified":0.5730049986,"topic-classification_sid_recall_modified":0.5717337985,"topic-classification_sid_fscore_modified":0.5606248332,"topic-classification_sid_acc":0.7239915074,"topic-classification_sid_precision":0.6082855612,"topic-classification_sid_recall":0.6069360918,"topic-classification_sid_fscore":0.595143135,"topic-classification_sid_valid_output_ratio":0.942,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.77,"topic-classification_sid_precision_modified":0.6850986976,"topic-classification_sid_recall_modified":0.6830888647,"topic-classification_sid_fscore_modified":0.6616877857,"topic-classification_sid_acc":0.77,"topic-classification_sid_precision":0.6850986976,"topic-classification_sid_recall":0.6830888647,"topic-classification_sid_fscore":0.6616877857,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.5896102708,"topic-classification_sid_recall_modified":0.5884196886,"topic-classification_sid_fscore_modified":0.5823719558,"topic-classification_sid_acc":0.704,"topic-classification_sid_precision":0.5896102708,"topic-classification_sid_recall":0.5884196886,"topic-classification_sid_fscore":0.5823719558,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.458,"topic-classification_sid_precision_modified":0.3928192753,"topic-classification_sid_recall_modified":0.4072039595,"topic-classification_sid_fscore_modified":0.3544902396,"topic-classification_sid_acc":0.6378830084,"topic-classification_sid_precision":0.5471020547,"topic-classification_sid_recall":0.5671364338,"topic-classification_sid_fscore":0.4937189966,"topic-classification_sid_valid_output_ratio":0.718,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.748,"topic-classification_sid_precision_modified":0.6428566774,"topic-classification_sid_recall_modified":0.6633522535,"topic-classification_sid_fscore_modified":0.628605048,"topic-classification_sid_acc":0.748,"topic-classification_sid_precision":0.6428566774,"topic-classification_sid_recall":0.6633522535,"topic-classification_sid_fscore":0.628605048,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":null,"topic-classification_sid_precision_modified":null,"topic-classification_sid_recall_modified":null,"topic-classification_sid_fscore_modified":null,"topic-classification_sid_acc":null,"topic-classification_sid_precision":null,"topic-classification_sid_recall":null,"topic-classification_sid_fscore":null,"topic-classification_sid_valid_output_ratio":null,"nlu_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.682,"topic-classification_sid_precision_modified":0.5730049986,"topic-classification_sid_recall_modified":0.5717337985,"topic-classification_sid_fscore_modified":0.5606248332,"topic-classification_sid_acc":0.7239915074,"topic-classification_sid_precision":0.6082855612,"topic-classification_sid_recall":0.6069360918,"topic-classification_sid_fscore":0.595143135,"topic-classification_sid_valid_output_ratio":0.942,"nlu_score":0.6297634971}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.77,"topic-classification_sid_precision_modified":0.6850986976,"topic-classification_sid_recall_modified":0.6830888647,"topic-classification_sid_fscore_modified":0.6616877857,"topic-classification_sid_acc":0.77,"topic-classification_sid_precision":0.6850986976,"topic-classification_sid_recall":0.6830888647,"topic-classification_sid_fscore":0.6616877857,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7144353486}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.5896102708,"topic-classification_sid_recall_modified":0.5884196886,"topic-classification_sid_fscore_modified":0.5823719558,"topic-classification_sid_acc":0.704,"topic-classification_sid_precision":0.5896102708,"topic-classification_sid_recall":0.5884196886,"topic-classification_sid_fscore":0.5823719558,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6749652797}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.722,"topic-classification_sid_precision_modified":0.6456808245,"topic-classification_sid_recall_modified":0.6300099701,"topic-classification_sid_fscore_modified":0.6126876785,"topic-classification_sid_acc":0.722,"topic-classification_sid_precision":0.6456808245,"topic-classification_sid_recall":0.6300099701,"topic-classification_sid_fscore":0.6126876785,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6458443785}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.458,"topic-classification_sid_precision_modified":0.3928192753,"topic-classification_sid_recall_modified":0.4072039595,"topic-classification_sid_fscore_modified":0.3544902396,"topic-classification_sid_acc":0.6378830084,"topic-classification_sid_precision":0.5471020547,"topic-classification_sid_recall":0.5671364338,"topic-classification_sid_fscore":0.4937189966,"topic-classification_sid_valid_output_ratio":0.718,"nlu_score":0.6552152029}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.748,"topic-classification_sid_precision_modified":0.6428566774,"topic-classification_sid_recall_modified":0.6633522535,"topic-classification_sid_fscore_modified":0.628605048,"topic-classification_sid_acc":0.748,"topic-classification_sid_precision":0.6428566774,"topic-classification_sid_recall":0.6633522535,"topic-classification_sid_fscore":0.628605048,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":null,"topic-classification_sid_precision_modified":null,"topic-classification_sid_recall_modified":null,"topic-classification_sid_fscore_modified":null,"topic-classification_sid_acc":null,"topic-classification_sid_precision":null,"topic-classification_sid_recall":null,"topic-classification_sid_fscore":null,"topic-classification_sid_valid_output_ratio":null,"nlu_score":null}
|
leaderboard/boards_data/translation-ar2fa_ar2fa.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1414109272,"translation-ar2fa_ar2fa_sahife_bleu":0.136408042,"translation-ar2fa_ar2fa_nahj_bleu":0.0653197648,"translation-ar2fa_ar2fa_quran_bleu":0.2187004167,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1334075162,"translation-ar2fa_ar2fa_sahife_bleu":0.1143867102,"translation-ar2fa_ar2fa_nahj_bleu":0.063272709,"translation-ar2fa_ar2fa_quran_bleu":0.2190563892,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1183647194,"translation-ar2fa_ar2fa_sahife_bleu":0.0905358622,"translation-ar2fa_ar2fa_nahj_bleu":0.0522591914,"translation-ar2fa_ar2fa_quran_bleu":0.2089938281,"nlg_score":0.1643361642}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0226935201,"translation-ar2fa_ar2fa_sahife_bleu":0.0196359142,"translation-ar2fa_ar2fa_nahj_bleu":0.010693835,"translation-ar2fa_ar2fa_quran_bleu":0.0371508269,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1534130086,"translation-ar2fa_ar2fa_sahife_bleu":0.1250461134,"translation-ar2fa_ar2fa_nahj_bleu":0.0624466634,"translation-ar2fa_ar2fa_quran_bleu":0.2681979318,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":null,"translation-ar2fa_ar2fa_sahife_bleu":null,"translation-ar2fa_ar2fa_nahj_bleu":null,"translation-ar2fa_ar2fa_quran_bleu":null,"nlg_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1414109272,"translation-ar2fa_ar2fa_sahife_bleu":0.136408042,"translation-ar2fa_ar2fa_nahj_bleu":0.0653197648,"translation-ar2fa_ar2fa_quran_bleu":0.2187004167,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1334075162,"translation-ar2fa_ar2fa_sahife_bleu":0.1143867102,"translation-ar2fa_ar2fa_nahj_bleu":0.063272709,"translation-ar2fa_ar2fa_quran_bleu":0.2190563892,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1183647194,"translation-ar2fa_ar2fa_sahife_bleu":0.0905358622,"translation-ar2fa_ar2fa_nahj_bleu":0.0522591914,"translation-ar2fa_ar2fa_quran_bleu":0.2089938281,"nlg_score":0.1643361642}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1064381612,"translation-ar2fa_ar2fa_sahife_bleu":0.089727948,"translation-ar2fa_ar2fa_nahj_bleu":0.0552602241,"translation-ar2fa_ar2fa_quran_bleu":0.1717674145,"nlg_score":0.1538910531}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0226935201,"translation-ar2fa_ar2fa_sahife_bleu":0.0196359142,"translation-ar2fa_ar2fa_nahj_bleu":0.010693835,"translation-ar2fa_ar2fa_quran_bleu":0.0371508269,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1534130086,"translation-ar2fa_ar2fa_sahife_bleu":0.1250461134,"translation-ar2fa_ar2fa_nahj_bleu":0.0624466634,"translation-ar2fa_ar2fa_quran_bleu":0.2681979318,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":null,"translation-ar2fa_ar2fa_sahife_bleu":null,"translation-ar2fa_ar2fa_nahj_bleu":null,"translation-ar2fa_ar2fa_quran_bleu":null,"nlg_score":null}
|
leaderboard/boards_data/translation-en2fa_en2fa.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2018158808,"translation-en2fa_en2fa_epoque_bleu":0.4332944681,"translation-en2fa_en2fa_mizan_bleu":0.1925182751,"translation-en2fa_en2fa_quran_bleu":0.1530925462,"translation-en2fa_en2fa_sahife_bleu":0.1026499453,"translation-en2fa_en2fa_nahj_bleu":0.051968827,"translation-en2fa_en2fa_tep_bleu":0.0708487287,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1757431213,"translation-en2fa_en2fa_epoque_bleu":0.3745398253,"translation-en2fa_en2fa_mizan_bleu":0.1640890656,"translation-en2fa_en2fa_quran_bleu":0.1377843747,"translation-en2fa_en2fa_sahife_bleu":0.0895949257,"translation-en2fa_en2fa_nahj_bleu":0.0437585905,"translation-en2fa_en2fa_tep_bleu":0.0679088622,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1700971031,"translation-en2fa_en2fa_epoque_bleu":0.3619925896,"translation-en2fa_en2fa_mizan_bleu":0.1638764762,"translation-en2fa_en2fa_quran_bleu":0.1169026899,"translation-en2fa_en2fa_sahife_bleu":0.0785107337,"translation-en2fa_en2fa_nahj_bleu":0.0463016599,"translation-en2fa_en2fa_tep_bleu":0.0704424388,"nlg_score":0.1643361642}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0600629844,"translation-en2fa_en2fa_epoque_bleu":0.1538216141,"translation-en2fa_en2fa_mizan_bleu":0.0548001026,"translation-en2fa_en2fa_quran_bleu":0.0217436833,"translation-en2fa_en2fa_sahife_bleu":0.0205645274,"translation-en2fa_en2fa_nahj_bleu":0.0172427415,"translation-en2fa_en2fa_tep_bleu":0.0093260061,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1974288311,"translation-en2fa_en2fa_epoque_bleu":0.4102902123,"translation-en2fa_en2fa_mizan_bleu":0.1898606624,"translation-en2fa_en2fa_quran_bleu":0.1638084791,"translation-en2fa_en2fa_sahife_bleu":0.1095493859,"translation-en2fa_en2fa_nahj_bleu":0.0487097316,"translation-en2fa_en2fa_tep_bleu":0.0737497745,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":null,"translation-en2fa_en2fa_epoque_bleu":null,"translation-en2fa_en2fa_mizan_bleu":null,"translation-en2fa_en2fa_quran_bleu":null,"translation-en2fa_en2fa_sahife_bleu":null,"translation-en2fa_en2fa_nahj_bleu":null,"translation-en2fa_en2fa_tep_bleu":null,"nlg_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2018158808,"translation-en2fa_en2fa_epoque_bleu":0.4332944681,"translation-en2fa_en2fa_mizan_bleu":0.1925182751,"translation-en2fa_en2fa_quran_bleu":0.1530925462,"translation-en2fa_en2fa_sahife_bleu":0.1026499453,"translation-en2fa_en2fa_nahj_bleu":0.051968827,"translation-en2fa_en2fa_tep_bleu":0.0708487287,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1757431213,"translation-en2fa_en2fa_epoque_bleu":0.3745398253,"translation-en2fa_en2fa_mizan_bleu":0.1640890656,"translation-en2fa_en2fa_quran_bleu":0.1377843747,"translation-en2fa_en2fa_sahife_bleu":0.0895949257,"translation-en2fa_en2fa_nahj_bleu":0.0437585905,"translation-en2fa_en2fa_tep_bleu":0.0679088622,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1700971031,"translation-en2fa_en2fa_epoque_bleu":0.3619925896,"translation-en2fa_en2fa_mizan_bleu":0.1638764762,"translation-en2fa_en2fa_quran_bleu":0.1169026899,"translation-en2fa_en2fa_sahife_bleu":0.0785107337,"translation-en2fa_en2fa_nahj_bleu":0.0463016599,"translation-en2fa_en2fa_tep_bleu":0.0704424388,"nlg_score":0.1643361642}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1732020657,"translation-en2fa_en2fa_epoque_bleu":0.3837758669,"translation-en2fa_en2fa_mizan_bleu":0.1680666593,"translation-en2fa_en2fa_quran_bleu":0.1005711034,"translation-en2fa_en2fa_sahife_bleu":0.0761731989,"translation-en2fa_en2fa_nahj_bleu":0.0436218334,"translation-en2fa_en2fa_tep_bleu":0.0641477759,"nlg_score":0.1538910531}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0600629844,"translation-en2fa_en2fa_epoque_bleu":0.1538216141,"translation-en2fa_en2fa_mizan_bleu":0.0548001026,"translation-en2fa_en2fa_quran_bleu":0.0217436833,"translation-en2fa_en2fa_sahife_bleu":0.0205645274,"translation-en2fa_en2fa_nahj_bleu":0.0172427415,"translation-en2fa_en2fa_tep_bleu":0.0093260061,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1974288311,"translation-en2fa_en2fa_epoque_bleu":0.4102902123,"translation-en2fa_en2fa_mizan_bleu":0.1898606624,"translation-en2fa_en2fa_quran_bleu":0.1638084791,"translation-en2fa_en2fa_sahife_bleu":0.1095493859,"translation-en2fa_en2fa_nahj_bleu":0.0487097316,"translation-en2fa_en2fa_tep_bleu":0.0737497745,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":null,"translation-en2fa_en2fa_epoque_bleu":null,"translation-en2fa_en2fa_mizan_bleu":null,"translation-en2fa_en2fa_quran_bleu":null,"translation-en2fa_en2fa_sahife_bleu":null,"translation-en2fa_en2fa_nahj_bleu":null,"translation-en2fa_en2fa_tep_bleu":null,"nlg_score":null}
|
leaderboard/boards_data/translation-fa2ar_fa2ar.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0922998074,"translation-fa2ar_fa2ar_nahj_bleu":0.0511154919,"translation-fa2ar_fa2ar_sahife_bleu":0.0589808221,"translation-fa2ar_fa2ar_quran_bleu":0.1668031083,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0720575119,"translation-fa2ar_fa2ar_nahj_bleu":0.037394526,"translation-fa2ar_fa2ar_sahife_bleu":0.0636064419,"translation-fa2ar_fa2ar_quran_bleu":0.1151715676,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0564204856,"translation-fa2ar_fa2ar_nahj_bleu":0.0345882932,"translation-fa2ar_fa2ar_sahife_bleu":0.0554604649,"translation-fa2ar_fa2ar_quran_bleu":0.0792126988,"nlg_score":0.1643361642}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0086214593,"translation-fa2ar_fa2ar_nahj_bleu":0.006894051,"translation-fa2ar_fa2ar_sahife_bleu":0.009695506,"translation-fa2ar_fa2ar_quran_bleu":0.0092748209,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0827618418,"translation-fa2ar_fa2ar_nahj_bleu":0.038434531,"translation-fa2ar_fa2ar_sahife_bleu":0.0781455938,"translation-fa2ar_fa2ar_quran_bleu":0.1317054007,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":null,"translation-fa2ar_fa2ar_nahj_bleu":null,"translation-fa2ar_fa2ar_sahife_bleu":null,"translation-fa2ar_fa2ar_quran_bleu":null,"nlg_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0922998074,"translation-fa2ar_fa2ar_nahj_bleu":0.0511154919,"translation-fa2ar_fa2ar_sahife_bleu":0.0589808221,"translation-fa2ar_fa2ar_quran_bleu":0.1668031083,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0720575119,"translation-fa2ar_fa2ar_nahj_bleu":0.037394526,"translation-fa2ar_fa2ar_sahife_bleu":0.0636064419,"translation-fa2ar_fa2ar_quran_bleu":0.1151715676,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0564204856,"translation-fa2ar_fa2ar_nahj_bleu":0.0345882932,"translation-fa2ar_fa2ar_sahife_bleu":0.0554604649,"translation-fa2ar_fa2ar_quran_bleu":0.0792126988,"nlg_score":0.1643361642}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0381651941,"translation-fa2ar_fa2ar_nahj_bleu":0.0246058927,"translation-fa2ar_fa2ar_sahife_bleu":0.0402564081,"translation-fa2ar_fa2ar_quran_bleu":0.0496332815,"nlg_score":0.1538910531}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0086214593,"translation-fa2ar_fa2ar_nahj_bleu":0.006894051,"translation-fa2ar_fa2ar_sahife_bleu":0.009695506,"translation-fa2ar_fa2ar_quran_bleu":0.0092748209,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0827618418,"translation-fa2ar_fa2ar_nahj_bleu":0.038434531,"translation-fa2ar_fa2ar_sahife_bleu":0.0781455938,"translation-fa2ar_fa2ar_quran_bleu":0.1317054007,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":null,"translation-fa2ar_fa2ar_nahj_bleu":null,"translation-fa2ar_fa2ar_sahife_bleu":null,"translation-fa2ar_fa2ar_quran_bleu":null,"nlg_score":null}
|
leaderboard/boards_data/translation-fa2en_fa2en.jsonl
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2337569687,"translation-fa2en_fa2en_tep_bleu":0.1386371644,"translation-fa2en_fa2en_mizan_bleu":0.2129637469,"translation-fa2en_fa2en_quran_bleu":0.1702102457,"translation-fa2en_fa2en_epoque_bleu":0.478211182,"translation-fa2en_fa2en_nahj_bleu":0.083013513,"translation-fa2en_fa2en_sahife_bleu":0.072000292,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2435498156,"translation-fa2en_fa2en_tep_bleu":0.1656898075,"translation-fa2en_fa2en_mizan_bleu":0.2055420364,"translation-fa2en_fa2en_quran_bleu":0.1726910304,"translation-fa2en_fa2en_epoque_bleu":0.4912890145,"translation-fa2en_fa2en_nahj_bleu":0.0882784037,"translation-fa2en_fa2en_sahife_bleu":0.0952319793,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2293783795,"translation-fa2en_fa2en_tep_bleu":0.1348246647,"translation-fa2en_fa2en_mizan_bleu":0.1880942935,"translation-fa2en_fa2en_quran_bleu":0.1642751236,"translation-fa2en_fa2en_epoque_bleu":0.4821448205,"translation-fa2en_fa2en_nahj_bleu":0.0857659109,"translation-fa2en_fa2en_sahife_bleu":0.0914041173,"nlg_score":0.1643361642}
|
| 7 |
-
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0196181945,"translation-fa2en_fa2en_tep_bleu":0.0101636027,"translation-fa2en_fa2en_mizan_bleu":0.0153753718,"translation-fa2en_fa2en_quran_bleu":0.0231110679,"translation-fa2en_fa2en_epoque_bleu":0.0359429205,"translation-fa2en_fa2en_nahj_bleu":0.0119451943,"translation-fa2en_fa2en_sahife_bleu":0.0117936527,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2307102128,"translation-fa2en_fa2en_tep_bleu":0.1527807458,"translation-fa2en_fa2en_mizan_bleu":0.1927067243,"translation-fa2en_fa2en_quran_bleu":0.1628198329,"translation-fa2en_fa2en_epoque_bleu":0.4676472481,"translation-fa2en_fa2en_nahj_bleu":0.0810494281,"translation-fa2en_fa2en_sahife_bleu":0.1009417344,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":null,"translation-fa2en_fa2en_tep_bleu":null,"translation-fa2en_fa2en_mizan_bleu":null,"translation-fa2en_fa2en_quran_bleu":null,"translation-fa2en_fa2en_epoque_bleu":null,"translation-fa2en_fa2en_nahj_bleu":null,"translation-fa2en_fa2en_sahife_bleu":null,"nlg_score":null}
|
|
|
|
| 4 |
{"Model Name":"c4ai-command-r-plus","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2337569687,"translation-fa2en_fa2en_tep_bleu":0.1386371644,"translation-fa2en_fa2en_mizan_bleu":0.2129637469,"translation-fa2en_fa2en_quran_bleu":0.1702102457,"translation-fa2en_fa2en_epoque_bleu":0.478211182,"translation-fa2en_fa2en_nahj_bleu":0.083013513,"translation-fa2en_fa2en_sahife_bleu":0.072000292,"nlg_score":0.1880477876}
|
| 5 |
{"Model Name":"gpt-5-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2435498156,"translation-fa2en_fa2en_tep_bleu":0.1656898075,"translation-fa2en_fa2en_mizan_bleu":0.2055420364,"translation-fa2en_fa2en_quran_bleu":0.1726910304,"translation-fa2en_fa2en_epoque_bleu":0.4912890145,"translation-fa2en_fa2en_nahj_bleu":0.0882784037,"translation-fa2en_fa2en_sahife_bleu":0.0952319793,"nlg_score":0.181552926}
|
| 6 |
{"Model Name":"gpt-5-nano","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2293783795,"translation-fa2en_fa2en_tep_bleu":0.1348246647,"translation-fa2en_fa2en_mizan_bleu":0.1880942935,"translation-fa2en_fa2en_quran_bleu":0.1642751236,"translation-fa2en_fa2en_epoque_bleu":0.4821448205,"translation-fa2en_fa2en_nahj_bleu":0.0857659109,"translation-fa2en_fa2en_sahife_bleu":0.0914041173,"nlg_score":0.1643361642}
|
| 7 |
+
{"Model Name":"gpt-oss:120b","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"120000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2010422229,"translation-fa2en_fa2en_tep_bleu":0.1296290178,"translation-fa2en_fa2en_mizan_bleu":0.1687085372,"translation-fa2en_fa2en_quran_bleu":0.1258778791,"translation-fa2en_fa2en_epoque_bleu":0.4180918256,"translation-fa2en_fa2en_nahj_bleu":0.0766886466,"translation-fa2en_fa2en_sahife_bleu":0.07624077,"nlg_score":0.1538910531}
|
| 8 |
{"Model Name":"gemma-3n-E4B-it","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0196181945,"translation-fa2en_fa2en_tep_bleu":0.0101636027,"translation-fa2en_fa2en_mizan_bleu":0.0153753718,"translation-fa2en_fa2en_quran_bleu":0.0231110679,"translation-fa2en_fa2en_epoque_bleu":0.0359429205,"translation-fa2en_fa2en_nahj_bleu":0.0119451943,"translation-fa2en_fa2en_sahife_bleu":0.0117936527,"nlg_score":0.0940241349}
|
| 9 |
{"Model Name":"gpt-4.1","thinking_method":"β","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2307102128,"translation-fa2en_fa2en_tep_bleu":0.1527807458,"translation-fa2en_fa2en_mizan_bleu":0.1927067243,"translation-fa2en_fa2en_quran_bleu":0.1628198329,"translation-fa2en_fa2en_epoque_bleu":0.4676472481,"translation-fa2en_fa2en_nahj_bleu":0.0810494281,"translation-fa2en_fa2en_sahife_bleu":0.1009417344,"nlg_score":0.194675133}
|
| 10 |
{"Model Name":"o4-mini","thinking_method":"βοΈ","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":null,"translation-fa2en_fa2en_tep_bleu":null,"translation-fa2en_fa2en_mizan_bleu":null,"translation-fa2en_fa2en_quran_bleu":null,"translation-fa2en_fa2en_epoque_bleu":null,"translation-fa2en_fa2en_nahj_bleu":null,"translation-fa2en_fa2en_sahife_bleu":null,"nlg_score":null}
|
leaderboard/leaderboard.py
CHANGED
|
@@ -26,14 +26,14 @@ class ColumnConfig:
|
|
| 26 |
default_task_tab_names = {
|
| 27 |
"all": "Overall", "mt_bench": "MT-Bench", "ifeval": "IFEval",
|
| 28 |
"MMLU": "MMLU", "persian_csr": "PerCoR",
|
| 29 |
-
"
|
| 30 |
}
|
| 31 |
default_column_names = {
|
| 32 |
"Model Name": "Model", "model_url": "URL",
|
| 33 |
"parameters_count": "βοΈ Params", "source_type": "Source",
|
| 34 |
"Average": "Average", "Rank": "π Rank", "score_mean": "score_mean (main)",
|
| 35 |
"strict_instruction_accuracy": "strict_instruction_accuracy (main)", "acc": "accuracy (main)",
|
| 36 |
-
"
|
| 37 |
}
|
| 38 |
|
| 39 |
if self.config_path and self.config_path.exists():
|
|
|
|
| 26 |
default_task_tab_names = {
|
| 27 |
"all": "Overall", "mt_bench": "MT-Bench", "ifeval": "IFEval",
|
| 28 |
"MMLU": "MMLU", "persian_csr": "PerCoR",
|
| 29 |
+
"persian_nlu": "Persian NLU", "persian_nlg": "Persian NLG",
|
| 30 |
}
|
| 31 |
default_column_names = {
|
| 32 |
"Model Name": "Model", "model_url": "URL",
|
| 33 |
"parameters_count": "βοΈ Params", "source_type": "Source",
|
| 34 |
"Average": "Average", "Rank": "π Rank", "score_mean": "score_mean (main)",
|
| 35 |
"strict_instruction_accuracy": "strict_instruction_accuracy (main)", "acc": "accuracy (main)",
|
| 36 |
+
"nlu_score": "nlu_score (main)", "nlg_score": "nlg_score (main)",
|
| 37 |
}
|
| 38 |
|
| 39 |
if self.config_path and self.config_path.exists():
|
leaderboard/leaderboard_config.yaml
CHANGED
|
@@ -14,8 +14,9 @@ column_names:
|
|
| 14 |
"score_mean": "score_mean (main)"
|
| 15 |
"strict_instruction_accuracy": "strict_instruction_accuracy (main)"
|
| 16 |
"acc": "accuracy (main)"
|
| 17 |
-
"nlg_score": "nlg_score (main)"
|
| 18 |
"nlu_score": "nlu_score (main)"
|
|
|
|
|
|
|
| 19 |
|
| 20 |
# Common score columns (these are examples, use your actual metric names from .jsonl)
|
| 21 |
# "Average": "Overall AVG" # For the 'all' table summary
|
|
@@ -37,8 +38,8 @@ task_display_names:
|
|
| 37 |
ifeval: "Persian IFEval"
|
| 38 |
MMLU: "PerMMLU"
|
| 39 |
persian_csr: "PerCoR"
|
| 40 |
-
persian_nlg: "Persian NLG" # Overview tab
|
| 41 |
persian_nlu: "Persian NLU"
|
|
|
|
| 42 |
question-generation_PersianQA: "PersianQA (QG)"
|
| 43 |
translation-en2fa_en2fa: "Translation (en2fa)"
|
| 44 |
translation-fa2en_fa2en: "Translation (fa2en)"
|
|
@@ -333,8 +334,8 @@ global_settings:
|
|
| 333 |
- "ifeval"
|
| 334 |
- "MMLU"
|
| 335 |
- "persian_csr"
|
| 336 |
-
- "persian_nlg"
|
| 337 |
- "persian_nlu"
|
|
|
|
| 338 |
|
| 339 |
numeric_score_columns_for_bolding: # List of ORIGINAL column names
|
| 340 |
# For the "Overall Benchmark" tab (all.jsonl)
|
|
|
|
| 14 |
"score_mean": "score_mean (main)"
|
| 15 |
"strict_instruction_accuracy": "strict_instruction_accuracy (main)"
|
| 16 |
"acc": "accuracy (main)"
|
|
|
|
| 17 |
"nlu_score": "nlu_score (main)"
|
| 18 |
+
"nlg_score": "nlg_score (main)"
|
| 19 |
+
|
| 20 |
|
| 21 |
# Common score columns (these are examples, use your actual metric names from .jsonl)
|
| 22 |
# "Average": "Overall AVG" # For the 'all' table summary
|
|
|
|
| 38 |
ifeval: "Persian IFEval"
|
| 39 |
MMLU: "PerMMLU"
|
| 40 |
persian_csr: "PerCoR"
|
|
|
|
| 41 |
persian_nlu: "Persian NLU"
|
| 42 |
+
persian_nlg: "Persian NLG"
|
| 43 |
question-generation_PersianQA: "PersianQA (QG)"
|
| 44 |
translation-en2fa_en2fa: "Translation (en2fa)"
|
| 45 |
translation-fa2en_fa2en: "Translation (fa2en)"
|
|
|
|
| 334 |
- "ifeval"
|
| 335 |
- "MMLU"
|
| 336 |
- "persian_csr"
|
|
|
|
| 337 |
- "persian_nlu"
|
| 338 |
+
- "persian_nlg"
|
| 339 |
|
| 340 |
numeric_score_columns_for_bolding: # List of ORIGINAL column names
|
| 341 |
# For the "Overall Benchmark" tab (all.jsonl)
|