Upload news summarization eval results for 69d59c104adb81dd2de75563
Browse files
eval/news_summarization/results/jobs/69d59c104adb81dd2de75563/results.json
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"summary": {
|
| 3 |
+
"provider": "hf-transformers",
|
| 4 |
+
"model": "google/gemma-3-12b-it",
|
| 5 |
+
"dataset": "bbc2024_qwen_reference",
|
| 6 |
+
"prompt_style": "simple",
|
| 7 |
+
"cases": 10,
|
| 8 |
+
"average_token_f1": 0.4922,
|
| 9 |
+
"average_rouge_l_f1_approx": 0.3793,
|
| 10 |
+
"average_word_count": 55.2,
|
| 11 |
+
"median_latency_ms": 34305.84,
|
| 12 |
+
"average_rouge1_f1": 0.5295,
|
| 13 |
+
"average_rouge2_f1": 0.2673,
|
| 14 |
+
"average_rougeL_f1": 0.3921,
|
| 15 |
+
"average_bertscore_f1": 0.9167
|
| 16 |
+
},
|
| 17 |
+
"rows": [
|
| 18 |
+
{
|
| 19 |
+
"case_id": "news-001",
|
| 20 |
+
"article_chars": 4019,
|
| 21 |
+
"reference_summary": "West Ham captain Dagny Brynjarsdottir has been praised for a documentary that showcases her pregnancy journey as an elite footballer, with the Iceland midfielder's film 'Omarsson' highlighting the challenges and joys of balancing motherhood and sport. The documentary, released by West Ham, is being seen as a milestone for female athletes, with Brynjarsdottir's story inspiring others and demonstrating the support now available for players starting families.",
|
| 22 |
+
"prediction": "West Ham captain Dagny Brynjarsdottir is featured in a new documentary, \"Omarsson,\" showcasing her pregnancy journey and the support she received from her club while continuing her career as an elite footballer. The film highlights the changing landscape of professional sports, with increased support for athletes balancing motherhood, inspiring others like Katrina Gorry and Emma Harries.",
|
| 23 |
+
"source_url": null,
|
| 24 |
+
"latency_ms": 35545.39,
|
| 25 |
+
"scores": {
|
| 26 |
+
"token_f1": 0.5082,
|
| 27 |
+
"rouge_l_f1_approx": 0.377,
|
| 28 |
+
"rouge1_f1": 0.5484,
|
| 29 |
+
"rouge2_f1": 0.2295,
|
| 30 |
+
"rougeL_f1": 0.4194,
|
| 31 |
+
"word_count": 56,
|
| 32 |
+
"bertscore_f1": 0.9108
|
| 33 |
+
},
|
| 34 |
+
"provider_metadata": {
|
| 35 |
+
"model": "google/gemma-3-12b-it"
|
| 36 |
+
}
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"case_id": "news-002",
|
| 40 |
+
"article_chars": 2779,
|
| 41 |
+
"reference_summary": "US authorities have charged Takeshi Ebisawa, alleged to be a senior member of the Japanese Yakuza crime syndicate, with conspiring to traffic nuclear materials, attempting to sell uranium and plutonium to what he believed were Iranian agents for a nuclear bomb. Ebisawa and a Thai co-defendant, Somphop Singhasiri, face life imprisonment if convicted, with Ebisawa also accused of seeking to acquire military-grade weapons for a Myanmar rebel group. The nuclear materials, sourced from Myanmar, were seized in Thailand and confirmed to contain uranium and weapons-grade plutonium by a US laboratory.",
|
| 42 |
+
"prediction": "A senior Yakuza figure, Takeshi Ebisawa, has been charged in the US with conspiring to traffic nuclear materials to Iran, allegedly offering uranium and plutonium to an undercover agent. He also faces charges related to weapons trafficking and money laundering, alongside a Thai co-defendant, with authorities seizing nuclear samples and uncovering plans to acquire military-grade weapons.",
|
| 43 |
+
"source_url": null,
|
| 44 |
+
"latency_ms": 34398.38,
|
| 45 |
+
"scores": {
|
| 46 |
+
"token_f1": 0.4795,
|
| 47 |
+
"rouge_l_f1_approx": 0.3288,
|
| 48 |
+
"rouge1_f1": 0.543,
|
| 49 |
+
"rouge2_f1": 0.2282,
|
| 50 |
+
"rougeL_f1": 0.3444,
|
| 51 |
+
"word_count": 56,
|
| 52 |
+
"bertscore_f1": 0.9077
|
| 53 |
+
},
|
| 54 |
+
"provider_metadata": {
|
| 55 |
+
"model": "google/gemma-3-12b-it"
|
| 56 |
+
}
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"case_id": "news-003",
|
| 60 |
+
"article_chars": 6612,
|
| 61 |
+
"reference_summary": "Heavy fighting has erupted around al-Amal Hospital in Khan Younis, Gaza, as Israeli ground forces encircle the city, with reports of troops advancing and intense clashes near medical facilities. At least 195 Palestinians and 24 Israeli soldiers have been killed in the recent escalation, with the Israeli military targeting Hamas leaders believed to be hiding in tunnels. The World Health Organization expresses concern over attacks on hospitals, while residents face difficulties fleeing the area.",
|
| 62 |
+
"prediction": "Intense fighting continues in Khan Younis, southern Gaza, as Israeli forces encircle the city and advance into Hamas strongholds, leading to civilian displacement and concerns over the safety of hospitals. Amidst rising casualties on both sides, negotiations for a ceasefire are ongoing, but a deal remains elusive as Hamas seeks a permanent cessation of hostilities.",
|
| 63 |
+
"source_url": null,
|
| 64 |
+
"latency_ms": 33228.81,
|
| 65 |
+
"scores": {
|
| 66 |
+
"token_f1": 0.3256,
|
| 67 |
+
"rouge_l_f1_approx": 0.2481,
|
| 68 |
+
"rouge1_f1": 0.3538,
|
| 69 |
+
"rouge2_f1": 0.125,
|
| 70 |
+
"rougeL_f1": 0.2615,
|
| 71 |
+
"word_count": 55,
|
| 72 |
+
"bertscore_f1": 0.8949
|
| 73 |
+
},
|
| 74 |
+
"provider_metadata": {
|
| 75 |
+
"model": "google/gemma-3-12b-it"
|
| 76 |
+
}
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"case_id": "news-004",
|
| 80 |
+
"article_chars": 3452,
|
| 81 |
+
"reference_summary": "Simon Rimmer's vegetarian restaurant, Greens in Manchester, has closed after 33 years due to a rise in rent and operating costs, making the business unviable. The TV chef expressed his heartbreak over the closure on social media, thanking staff, customers, and suppliers.",
|
| 82 |
+
"prediction": "The vegetarian restaurant Greens, co-owned by TV chef Simon Rimmer, has closed after 33 years due to rising rent and operating costs. Rimmer announced the immediate closure on social media, expressing heartbreak and thanking staff, customers, and suppliers, while noting that its sister restaurant in Sale remains open.",
|
| 83 |
+
"source_url": null,
|
| 84 |
+
"latency_ms": 30203.54,
|
| 85 |
+
"scores": {
|
| 86 |
+
"token_f1": 0.6667,
|
| 87 |
+
"rouge_l_f1_approx": 0.5333,
|
| 88 |
+
"rouge1_f1": 0.7174,
|
| 89 |
+
"rouge2_f1": 0.4444,
|
| 90 |
+
"rougeL_f1": 0.5435,
|
| 91 |
+
"word_count": 48,
|
| 92 |
+
"bertscore_f1": 0.9438
|
| 93 |
+
},
|
| 94 |
+
"provider_metadata": {
|
| 95 |
+
"model": "google/gemma-3-12b-it"
|
| 96 |
+
}
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"case_id": "news-005",
|
| 100 |
+
"article_chars": 3581,
|
| 101 |
+
"reference_summary": "NASA's Artemis III mission, which aims to return astronauts to the Moon, has been delayed until September 2026 due to the need for further technology development, including a landing craft and updated spacesuits. The precursor Artemis II mission, originally planned for the end of 2022, is now scheduled for September 2025. SpaceX's Starship vehicle is set to be the human landing system for Artemis III, requiring extensive testing and demonstrations before it can be used for a lunar landing.",
|
| 102 |
+
"prediction": "NASA has announced delays for the Artemis program, pushing the return of humans to the Moon to September 2026 at the earliest and a fly-around-the-Moon mission to September 2025, citing the need for further technology development and safety considerations. The delays are primarily due to challenges with the lunar landing craft being developed by SpaceX, as well as concerns about the Orion capsule's life-support system and heat shield.",
|
| 103 |
+
"source_url": null,
|
| 104 |
+
"latency_ms": 43632.87,
|
| 105 |
+
"scores": {
|
| 106 |
+
"token_f1": 0.449,
|
| 107 |
+
"rouge_l_f1_approx": 0.2857,
|
| 108 |
+
"rouge1_f1": 0.5065,
|
| 109 |
+
"rouge2_f1": 0.1711,
|
| 110 |
+
"rougeL_f1": 0.2857,
|
| 111 |
+
"word_count": 68,
|
| 112 |
+
"bertscore_f1": 0.8979
|
| 113 |
+
},
|
| 114 |
+
"provider_metadata": {
|
| 115 |
+
"model": "google/gemma-3-12b-it"
|
| 116 |
+
}
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"case_id": "news-006",
|
| 120 |
+
"article_chars": 5882,
|
| 121 |
+
"reference_summary": "Multiple news agencies, including PA, Getty Images, AFP, Reuters, and Associated Press, have withdrawn a photo of the Princess of Wales and her children due to concerns it may have been digitally manipulated. The image, taken by Prince William for Mother's Day and the first official photo of the princess since her surgery, was initially posted by Kensington Palace on social media. The agencies are seeking clarification from the palace, and in the absence of it, have removed the photo from their services.",
|
| 122 |
+
"prediction": "Major news agencies have retracted a Mother's Day photo of the Princess of Wales and her children due to concerns about digital manipulation and inconsistencies within the image. The photo, released by Kensington Palace, was intended to reassure the public about her recovery following abdominal surgery but instead fueled further speculation.",
|
| 123 |
+
"source_url": null,
|
| 124 |
+
"latency_ms": 27817.1,
|
| 125 |
+
"scores": {
|
| 126 |
+
"token_f1": 0.4478,
|
| 127 |
+
"rouge_l_f1_approx": 0.3582,
|
| 128 |
+
"rouge1_f1": 0.4853,
|
| 129 |
+
"rouge2_f1": 0.2836,
|
| 130 |
+
"rougeL_f1": 0.3824,
|
| 131 |
+
"word_count": 51,
|
| 132 |
+
"bertscore_f1": 0.9142
|
| 133 |
+
},
|
| 134 |
+
"provider_metadata": {
|
| 135 |
+
"model": "google/gemma-3-12b-it"
|
| 136 |
+
}
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"case_id": "news-007",
|
| 140 |
+
"article_chars": 4085,
|
| 141 |
+
"reference_summary": "The death of Dragon Ball creator Akira Toriyama has been mourned by fans worldwide, with memories shared of a 2018 diplomatic incident between Japan and Mexico over unauthorized public screenings of the series. Despite copyright issues, a free screening of Dragon Ball Super's episode #130 was eventually allowed in Ciudad Juarez, attracting thousands of fans.",
|
| 142 |
+
"prediction": "Following the death of Dragon Ball creator Akira Toriyama, fans worldwide are celebrating his legacy, recalling how the series sparked a diplomatic incident in Mexico six years ago over unauthorized public screenings. Despite initial copyright concerns and intervention from the Japanese Embassy, a screening in Ciudad Juarez was ultimately approved, drawing an estimated 15,000 attendees.",
|
| 143 |
+
"source_url": null,
|
| 144 |
+
"latency_ms": 34305.84,
|
| 145 |
+
"scores": {
|
| 146 |
+
"token_f1": 0.5273,
|
| 147 |
+
"rouge_l_f1_approx": 0.4545,
|
| 148 |
+
"rouge1_f1": 0.5179,
|
| 149 |
+
"rouge2_f1": 0.2727,
|
| 150 |
+
"rougeL_f1": 0.4464,
|
| 151 |
+
"word_count": 55,
|
| 152 |
+
"bertscore_f1": 0.9387
|
| 153 |
+
},
|
| 154 |
+
"provider_metadata": {
|
| 155 |
+
"model": "google/gemma-3-12b-it"
|
| 156 |
+
}
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"case_id": "news-008",
|
| 160 |
+
"article_chars": 3631,
|
| 161 |
+
"reference_summary": "North Korea fired over 200 artillery shells off its west coast towards South Korea's Yeonpyeong island, prompting the South to hold live fire drills and order civilians to seek shelter; no damage was reported, but the South called it a provocative act, while the North claimed it was a response to military actions by the South. This tension follows North Korea's suspension of a military deal with the South and its recent satellite launch, which led to heightened tensions along the border.",
|
| 162 |
+
"prediction": "North Korea fired over 200 artillery shells towards South Korea's Yeonpyeong island, prompting South Korea to conduct live fire drills and issue shelter warnings to nearby residents. This exchange of fire, following North Korea's withdrawal from a military agreement and heightened tensions, raises concerns about peace and stability on the Korean peninsula.",
|
| 163 |
+
"source_url": null,
|
| 164 |
+
"latency_ms": 33145.14,
|
| 165 |
+
"scores": {
|
| 166 |
+
"token_f1": 0.4627,
|
| 167 |
+
"rouge_l_f1_approx": 0.4478,
|
| 168 |
+
"rouge1_f1": 0.4928,
|
| 169 |
+
"rouge2_f1": 0.2941,
|
| 170 |
+
"rougeL_f1": 0.4783,
|
| 171 |
+
"word_count": 52,
|
| 172 |
+
"bertscore_f1": 0.9288
|
| 173 |
+
},
|
| 174 |
+
"provider_metadata": {
|
| 175 |
+
"model": "google/gemma-3-12b-it"
|
| 176 |
+
}
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"case_id": "news-009",
|
| 180 |
+
"article_chars": 4821,
|
| 181 |
+
"reference_summary": "Trainer Willie Mullins achieved a historic 100th Cheltenham Festival victory as his son Patrick won the Champion Bumper on Jasmin De Vaux, becoming the first person to reach a century of wins at the event. Mullins, with six victories this year, celebrated the milestone with a hat doff, and his success is a testament to his dedication, team, and innovative approach, outpacing second-placed Nicky Henderson's 73 wins.",
|
| 182 |
+
"prediction": "Willie Mullins achieved a historic 100th victory at the Cheltenham Festival, solidifying his dominance in the racing world and extending his lead over rivals like Nicky Henderson. The milestone win, secured by his son Patrick in the Champion Bumper, caps a remarkable career marked by innovation, a strong team, and resilience in the face of setbacks.",
|
| 183 |
+
"source_url": null,
|
| 184 |
+
"latency_ms": 34445.15,
|
| 185 |
+
"scores": {
|
| 186 |
+
"token_f1": 0.4715,
|
| 187 |
+
"rouge_l_f1_approx": 0.2927,
|
| 188 |
+
"rouge1_f1": 0.512,
|
| 189 |
+
"rouge2_f1": 0.2276,
|
| 190 |
+
"rougeL_f1": 0.288,
|
| 191 |
+
"word_count": 56,
|
| 192 |
+
"bertscore_f1": 0.9019
|
| 193 |
+
},
|
| 194 |
+
"provider_metadata": {
|
| 195 |
+
"model": "google/gemma-3-12b-it"
|
| 196 |
+
}
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"case_id": "news-010",
|
| 200 |
+
"article_chars": 2448,
|
| 201 |
+
"reference_summary": "Cambridge University Library has acquired an archive of posters and memorabilia from the Poems On The Underground project, which since 1986 has featured literary works on London's Tube network, including a letter from Philip Larkin who never saw his poetry displayed. The archive, now open to the public, also contains a note from Seamus Heaney praising the project's impact on poetry's presence in public spaces.",
|
| 202 |
+
"prediction": "Hundreds of posters and memorabilia from the \"Poems On The Underground\" public art project have been donated to Cambridge University Library, including a letter from Philip Larkin who never saw his work displayed. The archive, which also features correspondence from Seamus Heaney and details readings and publications, is now publicly accessible for research and study.",
|
| 203 |
+
"source_url": null,
|
| 204 |
+
"latency_ms": 31064.33,
|
| 205 |
+
"scores": {
|
| 206 |
+
"token_f1": 0.5833,
|
| 207 |
+
"rouge_l_f1_approx": 0.4667,
|
| 208 |
+
"rouge1_f1": 0.6179,
|
| 209 |
+
"rouge2_f1": 0.3967,
|
| 210 |
+
"rougeL_f1": 0.4715,
|
| 211 |
+
"word_count": 55,
|
| 212 |
+
"bertscore_f1": 0.9284
|
| 213 |
+
},
|
| 214 |
+
"provider_metadata": {
|
| 215 |
+
"model": "google/gemma-3-12b-it"
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
]
|
| 219 |
+
}
|