Files changed (2) hide show
  1. app.py +14 -2
  2. leaderboard_general.csv +2 -1
app.py CHANGED
@@ -30,11 +30,23 @@ with gr.Blocks() as demo:
30
  discord_link = 'https://discord.com/invite/nfgaTG3H'
31
  gr.Markdown('''
32
  I modelli sottostanti sono stati testati con [lm_evaluation_harness](https://github.com/EleutherAI/lm-evaluation-harness) su task specifici per l'italiano introdotti con questa [PR](https://github.com/EleutherAI/lm-evaluation-harness/pull/1358).
33
- L'intero progetto, i modelli e i dataset sono rigorosamente open source e tutti i risultati sono riproducibili lanciando dei comandi come questo:
34
- * lm_eval --model hf --model_args pretrained=HUGGINGFACE_MODEL_ID --tasks xcopa_it,hellaswag_it,lambada_openai_mt_it,belebele_ita_Latn,m_mmlu_it --device cuda:0 --batch_size 8
 
 
35
  ''')
36
  gr.DataFrame(get_data_classifica, every=3600)
37
  gr.Markdown(f"Contributore principale: @giux78")
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  with gr.Tab('Classifica RAG'):
 
30
  discord_link = 'https://discord.com/invite/nfgaTG3H'
31
  gr.Markdown('''
32
  I modelli sottostanti sono stati testati con [lm_evaluation_harness](https://github.com/EleutherAI/lm-evaluation-harness) su task specifici per l'italiano introdotti con questa [PR](https://github.com/EleutherAI/lm-evaluation-harness/pull/1358).
33
+ L'intero progetto, i modelli e i dataset sono rigorosamente open source e tutti i risultati sono riproducibili lanciando dei comandi come questo:
34
+ ```
35
+ lm_eval --model hf --model_args pretrained=HUGGINGFACE_MODEL_ID --tasks xcopa_it,hellaswag_it,lambada_openai_mt_it,belebele_ita_Latn,arc_it --device cuda:0 --batch_size 8
36
+ ```
37
  ''')
38
  gr.DataFrame(get_data_classifica, every=3600)
39
  gr.Markdown(f"Contributore principale: @giux78")
40
+ gr.Markdown('''
41
+ ### Reference on Italian task from mixtral paper
42
+
43
+ | Model | Arc-c | HellaS | MMUL | AVG |
44
+ | --- | --- | --- | --- | --- |
45
+ | Mixtral 7x8 | 52.8 | 75.1 | 70.9 | 66.26666667 |
46
+ | LLama2 70b | 49.4 | 70.9 | 65.1 | 61.8 |
47
+ | LLama1 34B | 42.9 | 65.4 | 49.0 | 52.43333333 |
48
+ | Mistral 7B | 41.49 | 61.22 | 52.53 | 51.74 |
49
+ ''')
50
 
51
 
52
  with gr.Tab('Classifica RAG'):
leaderboard_general.csv CHANGED
@@ -9,4 +9,5 @@ galatolo/cerbero-7B,,0.5137,0.4867,0.5089,0.5089,0.4722,0.6135,23.4551,0.4964,0.
9
  mii-11m/maestrale-chat-v0.3-alpha,,0.5164,0.4774,0.5911,0.5911,0.5046,0.66,38.2427,0.4378,0.692,0.1343,0.4568
10
  giux78/zefiro-7b-dpo-qlora-ITA-v0.7,0.508,0.5203,0.4717,0.4778,0.4778,0.4914,0.6428,23.6041,0.5174,0.684,0.1805,0.4611
11
  mii-llm/maestrale-chat-v0.3-beta,,0.5129,,0.5644,0.5644,0.5067,0.6581,53.0646,0.4207,0.72,0.1463,0.4559
12
- swap-uniba/LLaMAntino-2-7b-hf-ITA,,0.3696,,0.2433,0.2433,0.4113,0.5428,33.6146,0.4696,0.678,0.139,0.3456
 
 
9
  mii-11m/maestrale-chat-v0.3-alpha,,0.5164,0.4774,0.5911,0.5911,0.5046,0.66,38.2427,0.4378,0.692,0.1343,0.4568
10
  giux78/zefiro-7b-dpo-qlora-ITA-v0.7,0.508,0.5203,0.4717,0.4778,0.4778,0.4914,0.6428,23.6041,0.5174,0.684,0.1805,0.4611
11
  mii-llm/maestrale-chat-v0.3-beta,,0.5129,,0.5644,0.5644,0.5067,0.6581,53.0646,0.4207,0.72,0.1463,0.4559
12
+ swap-uniba/LLaMAntino-2-7b-hf-ITA,,0.3696,,0.2433,0.2433,0.4113,0.5428,33.6146,0.4696,0.678,0.139,0.3456
13
+ mistralai/Mistral-7B-v0.1,,0.5253,,0.41,0.41,0.4486,0.6122,30.2635,0.4894,0.658,0.1061,0.4149