diff --git a/Gen_llm_eval_output.py b/Gen_llm_eval_output.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c75d62b2c324c2cd142ff728af991ef9cdab188
--- /dev/null
+++ b/Gen_llm_eval_output.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+
+#python Gen_llm_eval_output.py   --p1 csv_files/llm_scores_p1.xlsx   --p2 csv_files/llm_scores_p2.xlsx   --p3 csv_files/llm_scores_p3.xlsx   --output-dir csv_files/outputs
+import argparse
+import os
+import re
+import math
+import pandas as pd
+import numpy as np
+
+REQUIRED_COLS = ["model", "task", "language", "configuration", "prompts", "f1"]
+
+def read_scores(path: str) -> pd.DataFrame:
+    df = pd.read_excel(path)
+    # normalize columns
+    df.columns = [c.strip().lower() for c in df.columns]
+    if "prompts" not in df.columns and "prompt" in df.columns:
+        df["prompts"] = df["prompt"]
+    missing = [c for c in REQUIRED_COLS if c not in df.columns]
+    if missing:
+        raise ValueError(f"{path} is missing required columns: {missing}")
+    # keep only required, coerce f1 to numeric
+    df = df[REQUIRED_COLS].copy()
+    df["f1"] = pd.to_numeric(df["f1"], errors="coerce")
+    df = df.dropna(subset=["f1"])
+    return df
+
+def sanitize_filename(s: str) -> str:
+    return re.sub(r"[^0-9A-Za-z._\-+]+", "_", str(s).strip())
+
+def format_float(x):
+    if x is None or (isinstance(x, float) and (math.isnan(x) or math.isinf(x))):
+        return "nan"
+    return f"{x:.4f}"
+
+def prompt_order_key(label: str):
+    # Sort by the number in "prompt-<n>" if present; fallback to string
+    m = re.search(r"(\d+)", str(label))
+    return (0, int(m.group(1))) if m else (1, str(label))
+
+def render_group_table(g: pd.DataFrame, model: str, language: str, configuration: str) -> str:
+    # Collect all prompt-level f1 values (across tasks and prompts)
+    prompt_values = g["f1"].to_numpy(dtype=float)
+    if prompt_values.size > 0:
+        gen_value = float(np.mean(prompt_values))
+        gen_stderr = float(np.std(prompt_values, ddof=1) / math.sqrt(len(prompt_values))) if len(prompt_values) > 1 else 0.0
+    else:
+        gen_value, gen_stderr = float("nan"), 0.0
+
+    # Build table text
+    if configuration=="0shot" : configuration='0'
+    if configuration=="10shot" : configuration='10'
+    model = model.split("__")[0]+'/'+model.split("__")[1]
+    #if model =='Henrychur__MMed-Llama-3-8B' : model='Henrychur/MMed-Llama-3-8B'
+    #if model =='HiTZ__Medical-mT5-large' : model=''
+    #if model =='Qwen__Qwen2.5-14B-Instruct-1M' : model='Qwen/'+model
+    #if model =='Qwen__Qwen2.5-32B-Instruct' : model='Qwen/'+model
+    #if model =='Qwen__Qwen3-30B-A3B-Instruct-2507' : model='Qwen/'+model
+    #if model =='deepseek-ai__DeepSeek-R1-Distill-Qwen-32B' : model=''
+    #if model =='epfl-llm__meditron-7b' : model=''
+    #if model =='google__gemma-2-9b-it' : model=''
+    #if model =='google__gemma-3-27b-it' : model=''
+    #if model =='google__medgemma-27b-text-it' : model=''
+    #if model =='google__medgemma-4b-it' : model=''
+    #if model =='microsoft__MediPhi-Clinical' : model=''
+    #if model =='microsoft__MediPhi-Instruct' : model=''
+    #if model =='mistralai__Mistral-7B-Instruct-v0.2' : model=''
+    #if model =='mistralai__Mistral-Nemo-Instruct-2407' : model=''
+    #if model =='tiiuae__Falcon3-10B-Instruct' : model=''
+    #if model =='unsloth__phi-4' : model=''
+    #if model =='Henrychur__MMed-Llama-3-8B' : model=''
+    
+    header = f"hf (pretrained={model} ), num_fewshot: {configuration}, batch_size: 1"
+    lines = [
+        "|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|",
+        "|-------|-------|------|------|------|----|------|---|------|",
+        #f"|Gen   |       |      |      |f1    |    |{format_float(gen_value)} |---| {format_float(gen_stderr)} |",
+    ]
+
+    # For each task, add task row (mean over prompts) then prompt rows
+    for task, df_task in g.groupby("task", sort=False):
+        f1s = df_task["f1"].to_numpy(dtype=float)
+        task_mean = float(np.mean(f1s)) if f1s.size else float("nan")
+        lines.append(f"| - {task.upper()}        |       |      |      |f1    |   | {format_float(task_mean)} |   |0 |")
+
+        # Prompt-level rows, sorted by prompt number if available
+        df_task = df_task.copy()
+        df_task["_order"] = df_task["prompts"].map(prompt_order_key)
+        df_task = df_task.sort_values("_order")
+        for _, r in df_task.iterrows():
+            prompt_label = str(r["prompts"])
+            lines.append(f"|   - {prompt_label}  |       |      |      |f1    |   | {format_float(r['f1'])} |   | 0 |")
+
+    return header + "\n" + "\n".join(lines) + "\n"
+
+def main():
+    ap = argparse.ArgumentParser(description="Build per-(model,language,configuration) summaries from three prompt Excel files.")
+    ap.add_argument("--p1", required=True, help="Path to llm_scores_p1.xlsx")
+    ap.add_argument("--p2", required=True, help="Path to llm_scores_p2.xlsx")
+    ap.add_argument("--p3", required=True, help="Path to llm_scores_p3.xlsx")
+    ap.add_argument("--output-dir", required=True, help="Directory to write output files")
+    args = ap.parse_args()
+
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    df = pd.concat([read_scores(args.p1), read_scores(args.p2), read_scores(args.p3)], ignore_index=True)
+
+    # One file per (model, language, configuration)
+    for (model, language, config), g in df.groupby(["model", "language", "configuration"], sort=False):
+        content = render_group_table(g, model, language, config)
+        fname = f"{sanitize_filename(model)}__{sanitize_filename(language)}__{sanitize_filename(config)}.txt"
+        out_path = os.path.join(args.output_dir, fname)
+        with open(out_path, "w", encoding="utf-8") as f:
+            f.write(content)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..b5685772804c8af4235a8504dc6752bfc9ae5d1d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,13 @@
+.PHONY: style format
+
+
+style:
+	python -m black --line-length 119 .
+	python -m isort .
+	ruff check --fix .
+
+
+quality:
+	python -m black --check --line-length 119 .
+	python -m isort --check-only .
+	ruff check .
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..f38df7c717e152c32a40642146efb5db18c70532
--- /dev/null
+++ b/app.py
@@ -0,0 +1,1144 @@
+import gradio as gr
+from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
+import pandas as pd
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
+from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE
+from src.tasks import TASK_DESCRIPTIONS, MEASURE_DESCRIPTION
+from src.display.css_html_js import custom_css
+from src.display.utils import BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES, AutoEvalColumn, ModelType, fields, WeightType, Precision
+from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
+from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.submission.submit import add_new_eval
+import random
+import matplotlib.pyplot as plt
+import re
+import plotly.express as px
+import plotly.graph_objects as go
+import numpy as np
+
+
+
+
+# === NEW: helper for prompt sensitivity (simple: only NER/REL and 3 prompts) ===
+def calculate_prompt_sensitivity(dataframe, tasks, prompt_ids):
+    """
+    Computes a simple Prompt Sensitivity Index (PSI) over the tasks 
+    using the distribution of 'Best Prompt Id' across the provided prompt_ids.
+    """
+    cv_per_task = []
+    for task in tasks:
+        prompt_col = f"{task} Best Prompt Id"
+        task_accuracies = []
+        for pid in prompt_ids:
+            total = len(dataframe[prompt_col].dropna()) if prompt_col in dataframe.columns else 0
+            count = (dataframe[prompt_col] == pid).sum() if prompt_col in dataframe.columns else 0
+            acc = (count / total * 100) if total > 0 else 0
+            task_accuracies.append(acc)
+        if task_accuracies:
+            mean_acc = np.mean(task_accuracies)
+            std_acc = np.std(task_accuracies)
+            cv_per_task.append((std_acc / mean_acc) if mean_acc > 0 else 0)
+        else:
+            cv_per_task.append(0)
+    mean_cv = np.mean(cv_per_task) if cv_per_task else 0
+    psi = 1.0 if mean_cv >= 0.5 else (mean_cv / 0.5)
+    return psi, mean_cv, cv_per_task
+
+def create_best_model_comparison_table(dataframe, lang: str | None = None, shot: str | None = None):
+    """
+    Table with best overall model per task  and the model with the best prompt score.
+    Applies optional filters:
+      - lang in {EN, IT, SL, SK, GR, PL} or None/"All"
+      - shot in {"0","10"} or None/"All" (mapped to IS_FS False/True)
+    """
+    tasks = ["NER", "REL", "RML", "HIS", "DIA"]
+    df = dataframe.copy()
+    
+    if lang and lang != "All" and "LANG" in df.columns:
+        df = df[df["LANG"] == lang]
+    if shot and shot != "All" and "IS_FS" in df.columns:
+        df = df[df["IS_FS"] == (shot == "10")]
+
+    table_data = {'Task': [], 'Best Overall Model': [], 'CPS': [], 'Best Prompt Model': [], 'Acc.': []}
+
+    for task in tasks:
+        if task not in df.columns or df.empty:
+            continue
+        # Best overall on task
+        #max_idx = df[task].idxmax()
+        max_idx = pd.to_numeric(df[task], errors='coerce').idxmax()
+        try: 
+          model_raw = df.loc[max_idx, 'Model']
+        except Exception as e:
+          break
+
+        if isinstance(model_raw, str) and '<' in model_raw:
+            match = re.search(r'>([^<]+)<', model_raw)
+            model_name = match.group(1) if match else model_raw
+        else:
+            model_name = str(model_raw)
+        comb_perf_value = df.loc[max_idx, task]
+
+        # Best prompt row for task
+        best_prompt_column = f"{task} Best Prompt"
+        if best_prompt_column in df.columns and df[best_prompt_column].notna().any():
+            best_prompt_idx= pd.to_numeric(df[best_prompt_column],errors='coerce').idxmax()
+            try:
+             best_prompt_model_raw = df.loc[best_prompt_idx, 'Model']
+            except Exception as e:
+             break
+            if isinstance(best_prompt_model_raw, str) and '<' in best_prompt_model_raw:
+                match = re.search(r'>([^<]+)<', best_prompt_model_raw)
+                best_prompt_model = match.group(1) if match else best_prompt_model_raw
+            else:
+                best_prompt_model = str(best_prompt_model_raw)
+            best_prompt_accuracy = df.loc[best_prompt_idx, best_prompt_column]
+        else:
+            best_prompt_model = "n/a"
+            best_prompt_accuracy = float('nan')
+
+        table_data['Task'].append(task)
+        table_data['Best Overall Model'].append(model_name)
+        table_data['CPS'].append(f"{comb_perf_value:.2f}")
+        table_data['Best Prompt Model'].append(best_prompt_model)
+        table_data['Acc.'].append(f"{best_prompt_accuracy:.2f}" if isinstance(best_prompt_accuracy, (int, float)) else "n/a")
+
+    fig = go.Figure(data=[go.Table(
+        columnwidth=[60, 220, 60, 220, 60],
+        header=dict(
+            values=[f'<b>{col}</b>' for col in table_data.keys()],
+            fill_color=['#2171b5', '#2171b5', '#2171b5', '#4292c6', '#4292c6'],
+            font=dict(color='white', size=12, family='Arial'),
+            align='center', height=30
+        ),
+        cells=dict(
+            values=list(table_data.values()),
+            fill_color=[['#f0f0f0' if i % 2 == 0 else 'white' for i in range(len(table_data['Task']))]],
+            font=dict(color='#2c3e50', size=11, family='Arial'),
+            align=['center', 'left', 'center', 'left', 'center'],
+            height=30
+        )
+    )])
+
+    subtitle = []
+    subtitle.append(lang if (lang and lang != "All") else "All languages")
+    subtitle.append(f"{shot}-shot" if (shot and shot != "All") else "All shots")
+
+    fig.update_layout(
+        title={'text': f"Top Model per Task: CPS & Best Prompt  — {', '.join(subtitle)}",
+               'font': {'family': 'Arial', 'size': 14, 'color': '#2c3e50'}},
+        font=dict(family="Arial", size=11),
+        height=420, margin=dict(l=20, r=20, t=50, b=80)
+    )
+    return fig
+
+
+
+# === NEW: Best-model comparison table (only NER, REL) ===
+def create_best_model_comparison_table_without_lang(dataframe):
+    """
+    Table with the best overall model per task (NER, REL,) and the model that
+    achieves the best score with its own best prompt.
+    """
+    tasks = ["NER", "REL", "RML", "HIS", "DIA"]
+    table_data = {'Task': [], 'Best Overall Model': [], 'CPS': [], 'Best Prompt Model': [], 'Acc.': []}
+
+    for task in tasks:
+        if task not in dataframe.columns:
+            continue
+
+        # Best overall on the task's combined performance
+        max_idx = dataframe[task].idxmax()
+        model_raw = dataframe.loc[max_idx, 'Model']
+        if isinstance(model_raw, str) and '<' in model_raw:
+            match = re.search(r'>([^<]+)<', model_raw)
+            model_name = match.group(1) if match else model_raw
+        else:
+            model_name = str(model_raw)
+        comb_perf_value = dataframe.loc[max_idx, task]
+
+        # Model with the best prompt for this task
+        best_prompt_column = f"{task} Best Prompt"
+        if best_prompt_column in dataframe.columns:
+            best_prompt_idx = dataframe[best_prompt_column].idxmax()
+            best_prompt_model_raw = dataframe.loc[best_prompt_idx, 'Model']
+            if isinstance(best_prompt_model_raw, str) and '<' in best_prompt_model_raw:
+                match = re.search(r'>([^<]+)<', best_prompt_model_raw)
+                best_prompt_model = match.group(1) if match else best_prompt_model_raw
+            else:
+                best_prompt_model = str(best_prompt_model_raw)
+            best_prompt_accuracy = dataframe.loc[best_prompt_idx, best_prompt_column]
+        else:
+            best_prompt_model = "n/a"
+            best_prompt_accuracy = float('nan')
+
+        table_data['Task'].append(task)
+        table_data['Best Overall Model'].append(model_name)
+        table_data['CPS'].append(f"{comb_perf_value:.2f}")
+        table_data['Best Prompt Model'].append(best_prompt_model)
+        table_data['Acc.'].append(f"{best_prompt_accuracy:.2f}" if isinstance(best_prompt_accuracy, (int, float)) else "n/a")
+
+    fig = go.Figure(data=[go.Table(
+        columnwidth=[60, 220, 60, 220, 60],
+        header=dict(
+            values=[f'<b>{col}</b>' for col in table_data.keys()],
+            fill_color=['#2171b5', '#2171b5', '#2171b5', '#4292c6', '#4292c6'],
+            font=dict(color='white', size=12, family='Arial'),
+            align='center', height=30
+        ),
+        cells=dict(
+            values=list(table_data.values()),
+            fill_color=[['#f0f0f0' if i % 2 == 0 else 'white' for i in range(len(table_data['Task']))]],
+            font=dict(color='#2c3e50', size=11, family='Arial'),
+            align=['center', 'left', 'center', 'left', 'center'],
+            height=30
+        )
+    )])
+    fig.update_layout(
+        title={'text': "Top Model per Task: CPS & Best Prompt (NER/REL)",
+               'font': {'family': 'Arial', 'size': 14, 'color': '#2c3e50'}},
+        font=dict(family="Arial", size=11),
+        height=420, margin=dict(l=20, r=20, t=50, b=80)
+    )
+    fig.add_annotation(
+        text=("Best Overall Model uses the task's primary metric (CPS). "
+              "Best Prompt Model is the one whose own best prompt yields the highest score."),
+        xref="paper", yref="paper", x=0.5, y=-0.20, showarrow=False,
+        font=dict(size=11, color="gray", family="Arial"), align="center", xanchor="center"
+    )
+    return fig
+
+def create_prompt_heatmap(dataframe, lang: str | None = None, shot: str | None = None):
+    """
+    Heatmap of share (%) of models whose BEST prompt is each pid, for NER/REL with prompts p1..p3.
+    Optional filters:
+      - lang: None or one of EN/IT/SL/SK/GR/PL (None means All)
+      - shot: None or "0"/"10" (None means All) mapped to IS_FS False/True
+    """
+    tasks = ["NER", "REL", "RML", "HIS", "DIA"]
+
+    df = dataframe.copy()
+    # Language filter
+    if lang and lang != "All" and "LANG" in df.columns:
+        df = df[df["LANG"] == lang]
+    # Shot filter -> IS_FS (10-shot=True, 0-shot=False)
+    if shot and shot != "All" and "IS_FS" in df.columns:
+        df = df[df["IS_FS"] == (shot == "10")]
+
+    # Collect prompt ids present, normalize labels to p1..p3
+    def label_for(pid):
+        if isinstance(pid, str): return pid
+        try: return f"p{int(pid)}"
+        except Exception: return str(pid)
+
+    all_ids = set()
+    for task in tasks:
+        col = f"{task} Best Prompt Id"
+        if col in df.columns:
+            all_ids.update(df[col].dropna().unique())
+    prompt_ids_raw = sorted(list(all_ids), key=lambda x: int(re.sub(r'[^0-9]', '', str(x)) or 0))
+    prompt_ids_raw = [pid for pid in prompt_ids_raw if label_for(pid) in {"p1", "p2", "p3"}] or [1, 2, 3]
+    y_tick_labels = [label_for(pid) for pid in prompt_ids_raw]
+
+    matrix, hovers = [], []
+    for pid in prompt_ids_raw:
+        row, hover_row = [], []
+        for task in tasks:
+            col = f"{task} Best Prompt Id"
+            if col in df.columns and len(df[col].dropna()) > 0:
+                series = df[col].dropna()
+
+                def same_pid(v):
+                    a = re.sub(r'[^0-9]', '', str(v))
+                    b = re.sub(r'[^0-9]', '', str(pid))
+                    return a == b and a != ""
+
+                total = len(series)
+                count = sum(same_pid(v) for v in series)
+                pct = (count / total * 100) if total > 0 else 0
+                row.append(pct)
+                hover_row.append(f"<b>{task} — {label_for(pid)}</b><br>Models: {count}/{total}<br>Percentage: {pct:.1f}%")
+            else:
+                row.append(0); hover_row.append(f"<b>{task} — {label_for(pid)}</b><br>No data")
+        matrix.append(row); hovers.append(hover_row)
+
+    fig = go.Figure(data=go.Heatmap(
+        z=matrix, x=tasks, y=y_tick_labels,
+        colorscale=[[0,'#f7fbff'],[0.2,'#deebf7'],[0.4,'#9ecae1'],[0.6,'#4292c6'],[0.8,'#2171b5'],[1,'#08519c']],
+        text=[[f"{val:.0f}%" if val is not None else "" for val in row] for row in matrix],
+        texttemplate="%{text}", textfont={"size": 11, "family": "Arial"},
+        hovertemplate='%{customdata}<extra></extra>', customdata=hovers,
+        colorbar=dict(title="% Models", ticksuffix="%"),
+        zmin=0, zmax=100
+    ))
+
+    title_parts = []
+    title_parts.append(lang if (lang and lang != "All") else "All languages")
+    title_parts.append(f"{shot}-shot" if (shot and shot != "All") else "All shots")
+    fig.update_layout(
+        title={'text': f"Most Effective Prompts  — {', '.join(title_parts)}",
+               'font': {'family': 'Arial', 'size': 14, 'color': '#2c3e50'}},
+        xaxis_title="Task", yaxis_title="Prompt",
+        font=dict(family="Arial", size=11), margin=dict(b=100),
+        template="plotly_white", dragmode=False, height=420
+    )
+    fig.update_xaxes(fixedrange=True); fig.update_yaxes(fixedrange=True)
+    return fig
+
+
+# === NEW: Prompt heatmap (only NER, REL; 3 prompts p1, p2, p3) ===
+def create_prompt_heatmap_without_lang(dataframe):
+    """
+    Heatmap of the share of models (in %) whose BEST prompt for the task is each prompt id,
+    for tasks NER and REL, with exactly 3 prompts (p1, p2, p3). It supports columns storing
+    ids as integers (1/2/3) or strings ('p1'/'p2'/'p3').
+    """
+    tasks = ["NER", "REL", "RML", "HIS", "DIA"]
+
+    # Collect unique prompt ids as they appear (int or 'pX'); restrict to 3 prompts
+    all_ids = set()
+    for task in tasks:
+        col = f"{task} Best Prompt Id"
+        if col in dataframe.columns:
+            all_ids.update(dataframe[col].dropna().unique())
+
+    # Normalize to display labels and preserve the original values as keys
+    def label_for(pid):
+        if isinstance(pid, str):
+            return pid  # e.g., 'p1'
+        try:
+            return f"p{int(pid)}"
+        except Exception:
+            return str(pid)
+
+    prompt_ids_raw = sorted(list(all_ids), key=lambda x: int(re.sub(r'[^0-9]', '', str(x)) or 0))
+    # Optional: hard-limit to p1/p2/p3 if extra noise exists
+    prompt_ids_raw = [pid for pid in prompt_ids_raw if label_for(pid) in {"p1", "p2", "p3"}]
+
+    if not prompt_ids_raw:
+        # Fallback to p1..p3
+        prompt_ids_raw = [1, 2, 3]
+
+    y_tick_labels = [label_for(pid) for pid in prompt_ids_raw]
+
+    matrix, hovers = [], []
+    for pid in prompt_ids_raw:
+        row, hover_row = [], []
+        for task in tasks:
+            col = f"{task} Best Prompt Id"
+            if col in dataframe.columns:
+                series = dataframe[col].dropna()
+                # match values regardless of 'p1' vs 1 vs '1'
+                def same_pid(v):
+                    a = re.sub(r'[^0-9]', '', str(v))
+                    b = re.sub(r'[^0-9]', '', str(pid))
+                    return a == b and a != ""
+                total = len(series)
+                count = sum(same_pid(v) for v in series)
+                pct = (count / total * 100) if total > 0 else 0
+                row.append(pct)
+                hover_row.append(
+                    f"<b>{task} — {label_for(pid)}</b><br>Models: {count}/{total}<br>Percentage: {pct:.1f}%"
+                )
+            else:
+                row.append(0); hover_row.append(f"<b>{task} — {label_for(pid)}</b><br>No data")
+        matrix.append(row)
+        hovers.append(hover_row)
+
+    fig = go.Figure(data=go.Heatmap(
+        z=matrix, x=tasks, y=y_tick_labels,
+        colorscale=[[0,'#f7fbff'],[0.2,'#deebf7'],[0.4,'#9ecae1'],[0.6,'#4292c6'],[0.8,'#2171b5'],[1,'#08519c']],
+        text=[[f"{val:.0f}%" if val is not None else "" for val in row] for row in matrix],
+        texttemplate="%{text}",
+        textfont={"size": 11, "family": "Arial"},
+        hovertemplate='%{customdata}<extra></extra>',
+        customdata=hovers,
+        colorbar=dict(title="% Models", ticksuffix="%"),
+        zmin=0, zmax=100
+    ))
+    fig.update_layout(
+        title={'text': "Most Effective Prompts Across Models (NER/REL)",
+               'font': {'family': 'Arial', 'size': 14, 'color': '#2c3e50'}},
+        xaxis_title="Task", yaxis_title="Prompt",
+        font=dict(family="Arial", size=11),
+        margin=dict(b=120), template="plotly_white", dragmode=False, height=420
+    )
+
+    # PSI (optional info line)
+    psi, mean_cv, _ = calculate_prompt_sensitivity(
+        dataframe, tasks, prompt_ids_raw
+    )
+    fig.add_annotation(
+        text=f"Prompt Sensitivity (mean CV): {mean_cv:.2f}",
+        xref="paper", yref="paper", x=0.3, y=1.12, showarrow=False,
+        font=dict(size=11, color="#2c3e50", family="Arial")
+    )
+
+    fig.update_xaxes(fixedrange=True); fig.update_yaxes(fixedrange=True)
+    return fig
+
+
+
+
+
+
+
+def mean_of_max_per_field(df):
+    """
+    Calcola il massimo per ciascun campo e poi la media dei massimi.
+
+    Args:
+        df (pd.DataFrame): DataFrame con colonne TE, SA, HS, AT, WIC, FAQ, LS, SU, NER, REL, RML, DIA, HIS
+
+    Returns:
+        float: media dei valori massimi dei campi
+    """
+    #fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
+    fields = ["NER", "REL", "RML", "DIA", "HIS"]
+    #print(df.columns)
+
+    # Controlla che tutte le colonne esistano nel DataFrame
+    missing = [f for f in fields if f not in df.columns]
+    if missing:
+        raise ValueError(f"Le seguenti colonne mancano nel DataFrame: {missing}")
+
+    # Calcola il massimo per ciascun campo
+    max_values = df[fields].apply(pd.to_numeric, errors='coerce').max(skipna=True)
+
+    # Calcola la media dei massimi
+    mean_max = max_values.mean()
+
+    return mean_max
+
+
+def barplot_mean_few_minus_zero_shot(dataframe, tasks=None):
+    if tasks is None:
+        tasks = [ "NER", "REL", "RML", "DIA", "HIS"]
+ 
+    task_means = {}
+
+    for task in tasks:
+        if task not in dataframe.columns:
+            continue
+
+        # Separa few-shot e zero-shot
+        few_shot = dataframe[dataframe['IS_FS'] == True][["Model", task]]
+        zero_shot = dataframe[dataframe['IS_FS'] == False][["Model", task]]
+
+        # Allinea i modelli
+        merged = pd.merge(few_shot, zero_shot, on="Model", suffixes=("_few", "_zero"))
+
+        # Rimuovi righe con valori mancanti
+        merged = merged.dropna(subset=[f"{task}_few", f"{task}_zero"])
+
+        if merged.empty:
+            continue
+
+        # Calcola differenza few - zero
+        diff = merged[f"{task}_few"] - merged[f"{task}_zero"]
+
+        # Calcola la media
+        task_means[task] = diff.mean()
+
+    # Crea barplot
+    fig = go.Figure([go.Bar(
+        x=list(task_means.keys()),
+        y=list(task_means.values()),
+        marker_color="#ff7f0e",
+        text=[f"{v:.2f}" for v in task_means.values()],
+        textposition="outside",
+        hovertemplate="<b>%{x}</b><br>Mean Delta Accuracy: %{y:.2f}%<extra></extra>"
+    )])
+
+    # Linea di riferimento a 0
+    '''
+    fig.add_shape(
+        type="line",
+        x0=-0.5, x1=len(task_means) - 0.5,
+        y0=0, y1=0,
+        line=dict(color="black", width=2, dash="dash"),
+        xref="x", yref="y"
+    )
+    '''
+
+    fig.update_layout(
+        title="Mean Accuracy Difference (Few-shot − Zero-shot) per Task",
+        xaxis_title="",
+        yaxis_title="Mean Delta Combined Performance",
+        template="plotly_white",
+        font=dict(family="Arial", size=13),
+        #margin=dict(b=100)
+    )
+
+    fig.add_annotation(
+        text="10-shot learning generally outperforms zero-shot. <br>"
+             "",
+        xref="paper", yref="paper",
+        x=0, y=-0.2,
+        showarrow=False,
+        font=dict(size=11, color="gray"),
+        align="left"
+    )
+
+    return fig
+
+
+def boxplot_per_task(dataframe=None, baselines=None, references=None):
+
+    #print(dataframe.columns)
+
+    #tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
+    tasks =["NER", "REL", "RML", "HIS", "DIA"]
+    if dataframe is None:
+        np.random.seed(42)
+        dataframe = pd.DataFrame({
+            task: np.random.uniform(0.4, 0.9, 20) * 100
+            for task in tasks
+        })
+
+    if baselines is None:
+        baselines = {task: np.random.randint(50, 70) for task in tasks}
+
+    colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
+              "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
+
+    fig = go.Figure()
+
+    for i, task in enumerate(tasks):
+        if task in dataframe.columns:
+            y_data = dataframe[task].dropna().tolist()
+
+            # boxplot
+            fig.add_trace(go.Box(
+                y=y_data,
+                name=task,
+                marker=dict(color=colors[i]),
+                line=dict(color="black", width=2),
+                fillcolor=colors[i],
+                opacity=0.7,
+                hovertemplate="<b>"+task+"</b><br>Accuracy: %{y:.2f}%<extra></extra>",
+                width=0.6,
+                whiskerwidth=0.2,
+                quartilemethod="linear"
+            ))
+
+            # baseline
+            #if task in baselines and baselines[task] is not None:
+                #fig.add_shape(
+                #    type="line",
+                #    x0=i - 0.3, x1=i + 0.3,
+                #    y0=baselines[task], y1=baselines[task],
+                #   line=dict(color="black", width=2, dash="dot"),  # più visibile
+                 #   xref="x", yref="y"
+                #)
+                #'''
+                #fig.add_annotation(
+                    #x=i, y=baselines[task],
+                    #text=f"{baselines[task]}%",
+                    #showarrow=False,
+                    #yshift=10,
+                    #font=dict(size=10, color="black")
+                #)
+                #'''
+
+            # reference GPT-4o
+           # if task in references and references[task] is not None:
+             #   fig.add_shape(
+            #        type="line",
+           #         x0=i - 0.3, x1=i + 0.3,
+           #         y0=references[task], y1=references[task],
+           #         line=dict(color="red", width=2, dash="dashdot"),
+           #         xref="x", yref="y"
+           #     )
+
+    fig.update_layout(
+        title="Distribution of Model Accuracy by Task",
+        xaxis_title="Task",
+        yaxis_title="Combined Performance",
+        template="plotly_white",
+        boxmode="group",
+        dragmode=False,
+        font=dict(family="Arial", size=10),
+        margin=dict(b=80),
+    )
+
+    fig.add_annotation(
+        text=(""
+            #"In tasks like TE and SA, models approach the accuracy of supervised <br>"
+            #"models at EVALITA (dashed black line); in NER and REL they remain lower. <br>"
+           # "Dashed red lines show GPT-4o reference results for generative tasks."
+        ),
+        xref="paper", yref="paper",
+        x=0.5, y=-0.30,
+        showarrow=False,
+        font=dict(size=11, color="gray"),
+        align="left"
+    )
+
+    fig.update_yaxes(range=[0, 100], fixedrange=True)
+
+    return fig
+
+# EVALITA results
+BASELINES = {
+    "TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
+    "LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
+}
+
+# GPT-4o
+REFERENCES = {
+    "NER": 79.11,
+    "REL": 63.32,
+    "LS": 59.25,
+    "SU": 33.04
+
+}
+
+
+def boxplot_prompts_per_task(dataframe, tasks=None):
+    if tasks is None:
+        tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
+
+    # Lista delle colonne da aggiornare
+    cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
+    # Applichiamo la trasformazione
+    for col in cols_to_update:
+        dataframe[col] = dataframe[col].replace({1: 7, 2: 8})
+
+    fig = go.Figure()
+
+    # Liste per creare una sola voce in legenda per Average e Best
+    avg_x, avg_y = [], []
+    best_x, best_y, best_text = [], [], []
+
+    for task in tasks:
+        avg_col = f"{task} Prompt Average"
+        best_col = f"{task} Best Prompt"
+        best_id_col = f"{task} Best Prompt Id"
+
+        if all(col in dataframe.columns for col in [avg_col, best_col, best_id_col]):
+            avg_value = dataframe[avg_col].mean()
+            avg_x.append(task)
+            avg_y.append(avg_value)
+
+            best_value = dataframe[best_col].mean()
+            best_x.append(task)
+            best_y.append(best_value)
+            best_id = dataframe[best_id_col].mode()[0]  # Most frequent best prompt id
+            best_text.append(f"P:{best_id}")
+
+    # Barre Average Accuracy (azzurro)
+    fig.add_trace(go.Bar(
+        x=avg_x,
+        y=avg_y,
+        name="Avg. Accuracy",
+        marker_color="#1f77b4",
+    ))
+
+    # Barre Best Prompt (rosso)
+    fig.add_trace(go.Bar(
+        x=best_x,
+        y=best_y,
+        name="Best Prompt",
+        marker_color="#d62728",
+    ))
+
+    # Testo sopra barre Best Prompt con ID
+    for x, y, text in zip(best_x, best_y, best_text):
+        fig.add_annotation(
+            x=x,
+            y=y + 3,  # leggermente sopra la barra
+            text=text,
+            showarrow=False,
+            font=dict(size=12, color="black")
+        )
+
+    fig.update_layout(
+        title= "Prompt Accuracy: Avg vs Best",
+        xaxis_title="Task",
+        yaxis_title="Combined Performance",
+        barmode='group',
+        template="plotly_white",
+        font=dict(family="Arial", size=10),
+        yaxis=dict(range=[0, 100], fixedrange=True)
+    )
+
+    # caption come annotazione separata
+    fig.add_annotation(
+        text="There is no single prompt that performs best across all tasks.<br>"
+             "Different prompts achieve the highest accuracy on different tasks.",
+        xref="paper", yref="paper",
+        x=0.5, y=-0.3,
+        showarrow=False,
+        font=dict(size=11, color="gray"),
+        align="center",
+        xanchor="center"
+    )
+
+    return fig
+
+
+def line_chart(dataframe):
+
+    # Normalizza le dimensioni per avere marker non troppo piccoli né enormi
+    def scale_sizes(values, min_size=8, max_size=30):
+        vmin, vmax = min(values), max(values)
+        return [
+            min_size + (val - vmin) / (vmax - vmin) * (max_size - min_size) if vmax > vmin else (min_size + max_size) / 2
+            for val in values
+        ]
+
+    # dati in base a IS_FS
+    df_true = dataframe[dataframe['IS_FS'] == True]
+    df_false = dataframe[dataframe['IS_FS'] == False]
+
+    # Estrai valori x, y e labels
+    x_true = df_true['#Params (B)'].tolist()
+    y_true = df_true['Avg. Comb. Perf. ⬆️'].tolist()
+    labels_true = [re.search(r'>([^<]+)<', m).group(1) for m in df_true['Model'].tolist()]
+
+    x_false = df_false['#Params (B)'].tolist()
+    y_false = df_false['Avg. Comb. Perf. ⬆️'].tolist()
+    labels_false = [re.search(r'>([^<]+)<', m).group(1) for m in df_false['Model'].tolist()]
+
+    fig = go.Figure()
+
+    # Punti IS_FS=True
+    fig.add_trace(go.Scatter(
+        x=x_true,
+        y=y_true,
+        mode='markers',
+        name='10-Shot',
+        marker=dict(
+            color='blue',
+            size=scale_sizes(x_true)
+        ),
+        hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
+        customdata=labels_true
+    ))
+
+    # Punti IS_FS=False
+    fig.add_trace(go.Scatter(
+        x=x_false,
+        y=y_false,
+        mode='markers',
+        name='0-Shot',
+        marker=dict(
+            color='red',
+            size=scale_sizes(x_false)
+        ),
+        hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
+        customdata=labels_false
+    ))
+
+    # Trova il massimo tra tutti i modelli
+    all_y = y_true + y_false
+    all_x = x_true + x_false
+    all_labels = labels_true + labels_false
+    max_idx = all_y.index(max(all_y))
+    max_x = all_x[max_idx]
+    max_y = all_y[max_idx]
+    max_label = all_labels[max_idx]
+
+    # Aggiungi annotazione visibile per il modello migliore
+    fig.add_annotation(
+        x=max_x,
+        y=max_y,
+        #text=f"Top: {max_label} ({max_y:.1f}%)",
+        text=f"{max_label}",
+        showarrow=True,
+        arrowhead=2,
+        arrowsize=1,
+        arrowwidth=2,
+        arrowcolor="black",
+        font=dict(size=11, color="black"),
+        xshift=10,
+        yshift=10,
+        ax = -30, ay = -20,  # sposta la label a sinistra e sopra il punto
+        xanchor = "right"  # allinea la label a destra rispetto al punto
+    )
+
+    fig.update_layout(
+        title="Avg. Combined Performance vs #Params",
+        xaxis_title="#Params (B)",
+        yaxis_title="Avg. Combined Performance",
+        template="plotly_white",
+        hovermode="closest",
+        font=dict(family="Arial", size=10),
+        dragmode=False,
+        xaxis=dict(
+            tickvals=[0, 25, 50, 75, 100, 125],
+            ticktext=["0", "25", "50", "75", "100"]
+        ),
+        yaxis=dict(
+            tickvals=[0, 20, 40, 60, 80, 100],  # 👈 tick fissi
+            range=[0, 100]  # 👈 range bloccato
+        )
+    )
+
+    # Caption
+    fig.add_annotation(
+        text="Accuracy generally rises with #Params, but smaller models <br>"
+             "with 10-shot can outperform larger zero-shot models.",
+        xref="paper", yref="paper",
+        x=0.5, y=-0.3,  # 👈 centrata
+        showarrow=False,
+        font=dict(size=11, color="gray"),
+        align="center",
+        xanchor="center"  # 👈 ancora centrata rispetto al testo
+    )
+
+    fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
+    fig.update_yaxes(fixedrange=True)
+
+    return fig
+
+
+# Define task metadata (icons, names, descriptions)
+TASK_METADATA_MULTIPLECHOICE = {
+    #"TE": {"icon": "📊", "name": "Textual Entailment", "tooltip": ""},
+    #"SA": {"icon": "😃", "name": "Sentiment Analysis", "tooltip": ""},
+    #"HS": {"icon": "⚠️", "name": "Hate Speech", "tooltip": ""},
+    #"AT": {"icon": "🏥", "name": "Admission Test", "tooltip": ""},
+    #"WIC": {"icon": "🔤", "name": "Word in Context", "tooltip": ""},
+    #"FAQ": {"icon": "❓", "name": "Frequently Asked Questions", "tooltip": ""}
+}
+
+# Define task metadata (icons, names, descriptions)
+TASK_METADATA_GENERATIVE = {
+    
+    "NER": {"icon": "🏷️", "name": "Named Entity Recognition", "tooltip": ""},
+    "REL": {"icon": "🔗", "name": "Relation Extraction", "tooltip": ""},
+    "RML": {"icon": "😃", "name": "CRF RML", "tooltip": "CRF RML"},
+    "DIA": {"icon": "🏥", "name": "CRF Diagnosis", "tooltip": "CRF Diagnosis"},
+    "HIS": {"icon": "📝", "name": "CRF History", "tooltip": "CRF History"},
+}
+
+def restart_space():
+    """Restart the Hugging Face space."""
+    API.restart_space(repo_id=REPO_ID)
+
+
+def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
+    """
+    Initialize and return the leaderboard when it is first loaded or when 'benchmark' is selected.
+    The table is sorted based on the "Avg. Combined Performance" field.
+    """
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+
+    #print("????????????????????????????????", mean_of_max_per_field(dataframe))
+
+    sorted_dataframe = dataframe.sort_values(by="Avg. Comb. Perf. ⬆️", ascending=False)
+
+    sorted_dataframe = sorted_dataframe.reset_index(drop=True)
+    sorted_dataframe["Rank"] = sorted_dataframe.index + 1
+
+    # Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
+    large_medal_fs_assigned = False
+    medium_medal_fs_assigned = False
+    small_medal_fs_assigned = False
+
+    large_medal_0shot_assigned = False
+    medium_medal_0shot_assigned = False
+    small_medal_0shot_assigned = False
+
+    # Lista temporanea per salvare i nuovi valori della colonna Model
+    new_model_column = []
+
+    for _, row in sorted_dataframe.iterrows():
+        if row['IS_FS']:  # 10-Few-Shot
+            if row["Size"] == "🔵🔵🔵" and not large_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🔵🏆")
+                large_medal_fs_assigned = True
+            elif row["Size"] == "🔵🔵" and not medium_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🏆")
+                medium_medal_fs_assigned = True
+            elif row["Size"] == "🔵" and not small_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🏆")
+                small_medal_fs_assigned = True
+            else:
+                new_model_column.append(row["Model"])
+        else:  # 0-Shot
+            if row["Size"] == "🔵🔵🔵" and not large_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🔵🎖️")
+                large_medal_0shot_assigned = True
+            elif row["Size"] == "🔵🔵" and not medium_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🎖️")
+                medium_medal_0shot_assigned = True
+            elif row["Size"] == "🔵" and not small_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🎖️")
+                small_medal_0shot_assigned = True
+            else:
+                new_model_column.append(row["Model"])
+
+    # Lista delle colonne da aggiornare
+    #cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
+    # Applichiamo la trasformazione
+    #for col in cols_to_update:
+    #    dataframe[col] = dataframe[col].replace({1: 7, 2: 8})
+
+    # Aggiorna la colonna Model
+    sorted_dataframe["Model"] = new_model_column
+
+    field_list = fields(AutoEvalColumn)
+
+    return Leaderboard(
+        value=sorted_dataframe,
+        datatype=[c.type for c in field_list],
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
+            ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languges: "),
+            ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max = 100, default = [0,100], label="Select the number of parameters (B)"),
+        ],
+        bool_checkboxgroup_label="Evaluation Mode",
+        interactive=False,
+    )
+
+def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=None):
+    """
+    Update and return the leaderboard when a specific task is selected.
+    The table is sorted based on the "Combined Performance" field.
+    """
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+    #sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
+    clean_df = dataframe.assign( **{"Combined Performance": pd.to_numeric(dataframe["Combined Performance"], errors="coerce")}).loc[lambda df: df["Combined Performance"].notna() & (df["Combined Performance"] != 0)]
+  
+    sorted_dataframe = clean_df.sort_values(by="Combined Performance", ascending=False)
+
+    # aggiungo la colonna rank in base alla posizione
+    sorted_dataframe = sorted_dataframe.reset_index(drop=True)
+    sorted_dataframe["Rank"] = sorted_dataframe.index + 1
+
+    # Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
+    large_medal_fs_assigned = False
+    medium_medal_fs_assigned = False
+    small_medal_fs_assigned = False
+
+    large_medal_0shot_assigned = False
+    medium_medal_0shot_assigned = False
+    small_medal_0shot_assigned = False
+
+    # Lista temporanea per salvare i nuovi valori della colonna Model
+    new_model_column = []
+
+    for _, row in sorted_dataframe.iterrows():
+        if row['IS_FS']:  # 5-Few-Shot
+            if row["Size"] == "🔵🔵🔵" and not large_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🔵🏆")
+                large_medal_fs_assigned = True
+            elif row["Size"] == "🔵🔵" and not medium_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🏆")
+                medium_medal_fs_assigned = True
+            elif row["Size"] == "🔵" and not small_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🏆")
+                small_medal_fs_assigned = True
+            else:
+                new_model_column.append(row["Model"])
+        else:  # 0-Shot
+            if row["Size"] == "🔵🔵🔵" and not large_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🔵🎖️")
+                large_medal_0shot_assigned = True
+            elif row["Size"] == "🔵🔵" and not medium_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🎖️")
+                medium_medal_0shot_assigned = True
+            elif row["Size"] == "🔵" and not small_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🎖️")
+                small_medal_0shot_assigned = True
+            else:
+                new_model_column.append(row["Model"])
+
+    # Aggiorna la colonna Model
+    sorted_dataframe["Model"] = new_model_column
+
+    pd.set_option('display.max_colwidth', None)
+    #print("========================", dataframe['Model'])
+
+    #print(sorted_dataframe['Combined Performance'])
+
+    field_list = fields(AutoEvalColumn)
+
+    return Leaderboard(
+        value=sorted_dataframe,
+        #datatype=[c.type for c in field_list],
+        datatype=[c.type for c in field_list] + [int],
+        #select_columns=SelectColumns(
+        #    default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
+        #    cant_deselect=[c.name for c in field_list if c.never_hidden],
+        #    label="Select Columns to Display:",
+        #),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS): "),
+            ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languges:  "),
+
+            ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max=100, default=[0, 100],
+                         label="Select the number of parameters (B)"),
+        ],
+        bool_checkboxgroup_label="Evaluation Mode",
+        interactive=False
+    )
+
+
+
+def download_snapshot(repo, local_dir):
+    """Try to download a snapshot from Hugging Face Hub."""
+    try:
+        print(f"Downloading from {repo} to {local_dir}...")
+        snapshot_download(repo_id=repo, local_dir=local_dir, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN)
+    except Exception as e:
+        print(f"Error downloading {repo}: {e}")
+        restart_space()
+
+
+# Initialize the app by downloading snapshots
+download_snapshot(QUEUE_REPO, EVAL_REQUESTS_PATH)
+download_snapshot(RESULTS_REPO, EVAL_RESULTS_PATH)
+
+# Load leaderboard data
+LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
+finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+#print(LEADERBOARD_DF.columns.tolist())
+
+theoretical_max_combined_perf = mean_of_max_per_field(LEADERBOARD_DF)
+
+# Prepare the main interface
+demo = gr.Blocks(css=custom_css)
+with demo:
+    #gr.HTML(TITLE)
+    gr.HTML(
+        """
+        <div style="display: flex; align-items: center; position: relative; width: 100%; height: 60px; padding: 10px 0;">
+            <h1 style="
+                margin: 0 auto; 
+                font-weight: 900; 
+                font-size: 5.5em; 
+                letter-spacing: 2px; 
+                text-transform: uppercase; 
+                color: red;
+                background: linear-gradient(90deg, #1f77b4, #00c6ff); 
+                -webkit-background-clip: text; 
+                -webkit-text-fill-color: transparent; 
+                text-shadow: 2px 2px 8px rgba(0.2,0,0,0);
+            ">
+                ECREAM-LLM Leaderboard
+            </h1>
+        </div>
+        """
+    )
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+
+    # ⬇️ QUI aggiungiamo i grafici subito sotto la barra del titolo e sopra le tabs
+    with gr.Row():
+        gr.Plot(value=line_chart(LEADERBOARD_DF), elem_id="line-chart")
+        gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES, REFERENCES), elem_id="boxplot-task")
+
+    # === NEW: second row with the 2 extra plots (NER/REL + p1..p3) ===
+    #with gr.Row():
+        #gr.Plot(value=create_prompt_heatmap(LEADERBOARD_DF), elem_id="prompt-heatmap")
+        #gr.Plot(value=create_best_model_comparison_table(LEADERBOARD_DF), elem_id="best-model-table")
+    # === NEW: gray background wrapper for combos ===
+    with gr.Row(elem_id="filters-wrap"):
+      lang_dd = gr.Dropdown(
+        choices=["All", "EN", "IT", "SL", "SK", "GR", "PL"],
+        value="All", label="Language: ", scale=1
+      )
+      shot_dd = gr.Dropdown(
+        choices=["All", "0", "10"],
+        value="All", label="N-Shot: ", scale=1
+      )
+
+    with gr.Row():
+      heatmap_plot = gr.Plot(value=create_prompt_heatmap(LEADERBOARD_DF, None, None), elem_id="prompt-heatmap")
+      table_plot = gr.Plot(value=create_best_model_comparison_table(LEADERBOARD_DF, None, None), elem_id="best-model-table")
+
+    def _update_both(lang, shot):
+      return (
+        create_prompt_heatmap(LEADERBOARD_DF, None if lang == "All" else lang, None if shot == "All" else shot),
+        create_best_model_comparison_table(LEADERBOARD_DF, None if lang == "All" else lang, None if shot == "All" else shot)
+      )
+
+    lang_dd.change(_update_both, inputs=[lang_dd, shot_dd], outputs=[heatmap_plot, table_plot])
+    shot_dd.change(_update_both, inputs=[lang_dd, shot_dd], outputs=[heatmap_plot, table_plot])
+
+
+  
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+
+        # Main leaderboard tab
+        with gr.TabItem("🏅 Benchmark"):
+
+            leaderboard = init_leaderboard(
+                LEADERBOARD_DF,
+                default_selection=['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL", "RML", "DIA", "HIS"],
+                hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL", "RML", "DIA", "HIS"]]
+            )
+
+
+        # About tab
+        with gr.TabItem("📝 About"):
+            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+
+        # Task-specific leaderboards
+        for task, metadata in TASK_METADATA_MULTIPLECHOICE.items():
+
+            with gr.TabItem(f"{metadata['icon']}{task}"):
+
+                task_description = TASK_DESCRIPTIONS.get(task, "Description not available.")
+                gr.Markdown(task_description, elem_classes="markdown-text")
+
+                leaderboard = update_task_leaderboard(
+                    LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
+                    default_selection=['Rank', 'Size','LANG', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
+                    hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size','LANG', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id']]
+                )
+
+        # About tab
+        with gr.TabItem("│", interactive=False):
+            gr.Markdown("", elem_classes="markdown-text")
+
+        # Task-specific leaderboards
+        for task, metadata in TASK_METADATA_GENERATIVE.items():
+            with gr.TabItem(f"{metadata['icon']}{task}"):
+                task_description = TASK_DESCRIPTIONS.get(task, "Description not available.")
+                gr.Markdown(task_description, elem_classes="markdown-text1")
+                #print (LEADERBOARD_DF)
+                leaderboard = update_task_leaderboard(
+                    LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average",
+                                                   f"{task} Prompt Std": "Prompt Std",
+                                                   f"{task} Best Prompt": "Best Prompt",
+                                                   f"{task} Best Prompt Id": "Best Prompt Id",
+                                                   task: "Combined Performance"}),
+                    default_selection=['Rank', 'Size', 'LANG', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt',
+                                       'Best Prompt Id'],
+                    hidden_columns=[col for col in LEADERBOARD_DF.columns if
+                                    col not in ['Rank', 'Size','LANG', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std',
+                                                'Best Prompt', 'Best Prompt Id']]
+                )
+
+    # Citation section
+    with gr.Accordion("📙 Citation", open=False):
+        gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True)
+
+    with gr.Accordion("📙 Credits", open=False):
+        gr.Markdown(
+            """
+            ***This project has been funded by the European Union under:
+
+                   Horizon Europe eCREAM Project (Grant Agreement No.101057726)  
+            """
+        )
+
+# Background job to restart space
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=1800)
+scheduler.start()
+
+# Launch the app with concurrent queueing
+demo.queue(default_concurrency_limit=40).launch(debug=True,  # Enable Gradio debug mode
+        show_error=True)
\ No newline at end of file
diff --git a/app_17_10_2025.py b/app_17_10_2025.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfc58ad1c422019017f845125f491be834d1f1e9
--- /dev/null
+++ b/app_17_10_2025.py
@@ -0,0 +1,815 @@
+import gradio as gr
+from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
+import pandas as pd
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
+from src.about import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE
+from src.tasks import TASK_DESCRIPTIONS, MEASURE_DESCRIPTION
+from src.display.css_html_js import custom_css
+from src.display.utils import BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES, AutoEvalColumn, ModelType, fields, WeightType, Precision
+from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
+from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.submission.submit import add_new_eval
+import random
+import matplotlib.pyplot as plt
+import re
+import plotly.express as px
+import plotly.graph_objects as go
+import numpy as np
+
+
+def mean_of_max_per_field(df):
+    """
+    Calcola il massimo per ciascun campo e poi la media dei massimi.
+
+    Args:
+        df (pd.DataFrame): DataFrame con colonne TE, SA, HS, AT, WIC, FAQ, LS, SU, NER, REL
+
+    Returns:
+        float: media dei valori massimi dei campi
+    """
+    #fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
+    fields = ["NER", "REL"]
+    #print(df.columns)
+
+    # Controlla che tutte le colonne esistano nel DataFrame
+    missing = [f for f in fields if f not in df.columns]
+    if missing:
+        raise ValueError(f"Le seguenti colonne mancano nel DataFrame: {missing}")
+
+    # Calcola il massimo per ciascun campo
+    max_values = df[fields].max()
+
+    # Calcola la media dei massimi
+    mean_max = max_values.mean()
+
+    return mean_max
+
+
+def barplot_mean_few_minus_zero_shot(dataframe, tasks=None):
+    if tasks is None:
+        tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
+ 
+    task_means = {}
+
+    for task in tasks:
+        if task not in dataframe.columns:
+            continue
+
+        # Separa few-shot e zero-shot
+        few_shot = dataframe[dataframe['IS_FS'] == True][["Model", task]]
+        zero_shot = dataframe[dataframe['IS_FS'] == False][["Model", task]]
+
+        # Allinea i modelli
+        merged = pd.merge(few_shot, zero_shot, on="Model", suffixes=("_few", "_zero"))
+
+        # Rimuovi righe con valori mancanti
+        merged = merged.dropna(subset=[f"{task}_few", f"{task}_zero"])
+
+        if merged.empty:
+            continue
+
+        # Calcola differenza few - zero
+        diff = merged[f"{task}_few"] - merged[f"{task}_zero"]
+
+        # Calcola la media
+        task_means[task] = diff.mean()
+
+    # Crea barplot
+    fig = go.Figure([go.Bar(
+        x=list(task_means.keys()),
+        y=list(task_means.values()),
+        marker_color="#ff7f0e",
+        text=[f"{v:.2f}" for v in task_means.values()],
+        textposition="outside",
+        hovertemplate="<b>%{x}</b><br>Mean Delta Accuracy: %{y:.2f}%<extra></extra>"
+    )])
+
+    # Linea di riferimento a 0
+    '''
+    fig.add_shape(
+        type="line",
+        x0=-0.5, x1=len(task_means) - 0.5,
+        y0=0, y1=0,
+        line=dict(color="black", width=2, dash="dash"),
+        xref="x", yref="y"
+    )
+    '''
+
+    fig.update_layout(
+        title="Mean Accuracy Difference (Few-shot − Zero-shot) per Task",
+        xaxis_title="",
+        yaxis_title="Mean Delta Combined Performance",
+        template="plotly_white",
+        font=dict(family="Arial", size=13),
+        #margin=dict(b=100)
+    )
+
+    fig.add_annotation(
+        text="10-shot learning generally outperforms zero-shot. <br>"
+             "",
+        xref="paper", yref="paper",
+        x=0, y=-0.2,
+        showarrow=False,
+        font=dict(size=11, color="gray"),
+        align="left"
+    )
+
+    return fig
+
+
+def boxplot_per_task(dataframe=None, baselines=None, references=None):
+
+    #print(dataframe.columns)
+
+    #tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
+    tasks =["NER", "REL"]
+    if dataframe is None:
+        np.random.seed(42)
+        dataframe = pd.DataFrame({
+            task: np.random.uniform(0.4, 0.9, 20) * 100
+            for task in tasks
+        })
+
+    if baselines is None:
+        baselines = {task: np.random.randint(50, 70) for task in tasks}
+
+    colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
+              "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
+
+    fig = go.Figure()
+
+    for i, task in enumerate(tasks):
+        if task in dataframe.columns:
+            y_data = dataframe[task].dropna().tolist()
+
+            # boxplot
+            fig.add_trace(go.Box(
+                y=y_data,
+                name=task,
+                marker=dict(color=colors[i]),
+                line=dict(color="black", width=2),
+                fillcolor=colors[i],
+                opacity=0.7,
+                hovertemplate="<b>"+task+"</b><br>Accuracy: %{y:.2f}%<extra></extra>",
+                width=0.6,
+                whiskerwidth=0.2,
+                quartilemethod="linear"
+            ))
+
+            # baseline
+            if task in baselines and baselines[task] is not None:
+                fig.add_shape(
+                    type="line",
+                    x0=i - 0.3, x1=i + 0.3,
+                    y0=baselines[task], y1=baselines[task],
+                    line=dict(color="black", width=2, dash="dot"),  # più visibile
+                    xref="x", yref="y"
+                )
+                '''
+                fig.add_annotation(
+                    x=i, y=baselines[task],
+                    text=f"{baselines[task]}%",
+                    showarrow=False,
+                    yshift=10,
+                    font=dict(size=10, color="black")
+                )
+                '''
+
+            # reference GPT-4o
+            if task in references and references[task] is not None:
+                fig.add_shape(
+                    type="line",
+                    x0=i - 0.3, x1=i + 0.3,
+                    y0=references[task], y1=references[task],
+                    line=dict(color="red", width=2, dash="dashdot"),
+                    xref="x", yref="y"
+                )
+
+    fig.update_layout(
+        title="Distribution of Model Accuracy by Task",
+        xaxis_title="Task",
+        yaxis_title="Combined Performance",
+        template="plotly_white",
+        boxmode="group",
+        dragmode=False,
+        font=dict(family="Arial", size=10),
+        margin=dict(b=80),
+    )
+
+    fig.add_annotation(
+        text=(""
+            #"In tasks like TE and SA, models approach the accuracy of supervised <br>"
+            #"models at EVALITA (dashed black line); in NER and REL they remain lower. <br>"
+           # "Dashed red lines show GPT-4o reference results for generative tasks."
+        ),
+        xref="paper", yref="paper",
+        x=0.5, y=-0.30,
+        showarrow=False,
+        font=dict(size=11, color="gray"),
+        align="left"
+    )
+
+    fig.update_yaxes(range=[0, 100], fixedrange=True)
+
+    return fig
+
+# EVALITA results
+BASELINES = {
+    "TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
+    "LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
+}
+
+# GPT-4o
+REFERENCES = {
+    "NER": 79.11,
+    "REL": 63.32,
+    "LS": 59.25,
+    "SU": 33.04
+
+}
+
+
+def boxplot_prompts_per_task(dataframe, tasks=None):
+    if tasks is None:
+        tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
+
+    # Lista delle colonne da aggiornare
+    cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
+    # Applichiamo la trasformazione
+    for col in cols_to_update:
+        dataframe[col] = dataframe[col].replace({1: 7, 2: 8})
+
+    fig = go.Figure()
+
+    # Liste per creare una sola voce in legenda per Average e Best
+    avg_x, avg_y = [], []
+    best_x, best_y, best_text = [], [], []
+
+    for task in tasks:
+        avg_col = f"{task} Prompt Average"
+        best_col = f"{task} Best Prompt"
+        best_id_col = f"{task} Best Prompt Id"
+
+        if all(col in dataframe.columns for col in [avg_col, best_col, best_id_col]):
+            avg_value = dataframe[avg_col].mean()
+            avg_x.append(task)
+            avg_y.append(avg_value)
+
+            best_value = dataframe[best_col].mean()
+            best_x.append(task)
+            best_y.append(best_value)
+            best_id = dataframe[best_id_col].mode()[0]  # Most frequent best prompt id
+            best_text.append(f"P:{best_id}")
+
+    # Barre Average Accuracy (azzurro)
+    fig.add_trace(go.Bar(
+        x=avg_x,
+        y=avg_y,
+        name="Avg. Accuracy",
+        marker_color="#1f77b4",
+        #hovertemplate="%{y:.2f}%<extra></extra>"
+        #hovertemplate="<b>" + task + "</b><br>Accuracy: %{y:.2f}%<extra></extra>",
+    ))
+
+    # Barre Best Prompt (rosso)
+    fig.add_trace(go.Bar(
+        x=best_x,
+        y=best_y,
+        name="Best Prompt",
+        marker_color="#d62728",
+        #hovertemplate="%{y:.2f}%<extra></extra>"
+        #hovertemplate = "<b>" + task + "</b><br>Accuracy: %{y:.2f}%<extra></extra>",
+    ))
+
+    # Testo sopra barre Best Prompt con ID
+    for x, y, text in zip(best_x, best_y, best_text):
+        fig.add_annotation(
+            x=x,
+            y=y + 3,  # leggermente sopra la barra
+            text=text,
+            showarrow=False,
+            font=dict(size=12, color="black")
+        )
+
+    fig.update_layout(
+        title= "Prompt Accuracy: Avg vs Best",
+        xaxis_title="Task",
+        yaxis_title="Combined Performance",
+        barmode='group',
+        template="plotly_white",
+        font=dict(family="Arial", size=10),
+        yaxis=dict(range=[0, 100], fixedrange=True)
+    )
+
+    # caption come annotazione separata
+    fig.add_annotation(
+        text="There is no single prompt that performs best across all tasks.<br>"
+             "Different prompts achieve the highest accuracy on different tasks.",
+        xref="paper", yref="paper",
+        x=0.5, y=-0.3,
+        showarrow=False,
+        font=dict(size=11, color="gray"),
+        align="center",
+        xanchor="center"
+    )
+
+    return fig
+
+
+def line_chart(dataframe):
+
+    # Normalizza le dimensioni per avere marker non troppo piccoli né enormi
+    def scale_sizes(values, min_size=8, max_size=30):
+        vmin, vmax = min(values), max(values)
+        return [
+            min_size + (val - vmin) / (vmax - vmin) * (max_size - min_size) if vmax > vmin else (min_size + max_size) / 2
+            for val in values
+        ]
+
+    # dati in base a IS_FS
+    df_true = dataframe[dataframe['IS_FS'] == True]
+    df_false = dataframe[dataframe['IS_FS'] == False]
+
+    # Estrai valori x, y e labels
+    x_true = df_true['#Params (B)'].tolist()
+    y_true = df_true['Avg. Comb. Perf. ⬆️'].tolist()
+    labels_true = [re.search(r'>([^<]+)<', m).group(1) for m in df_true['Model'].tolist()]
+
+    x_false = df_false['#Params (B)'].tolist()
+    y_false = df_false['Avg. Comb. Perf. ⬆️'].tolist()
+    labels_false = [re.search(r'>([^<]+)<', m).group(1) for m in df_false['Model'].tolist()]
+
+    fig = go.Figure()
+
+    # Punti IS_FS=True
+    fig.add_trace(go.Scatter(
+        x=x_true,
+        y=y_true,
+        mode='markers',
+        name='10-Shot',
+        marker=dict(
+            color='blue',
+            size=scale_sizes(x_true)
+        ),
+        hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
+        customdata=labels_true
+    ))
+
+    # Punti IS_FS=False
+    fig.add_trace(go.Scatter(
+        x=x_false,
+        y=y_false,
+        mode='markers',
+        name='0-Shot',
+        marker=dict(
+            color='red',
+            size=scale_sizes(x_false)
+        ),
+        hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
+        customdata=labels_false
+    ))
+
+    # Trova il massimo tra tutti i modelli
+    all_y = y_true + y_false
+    all_x = x_true + x_false
+    all_labels = labels_true + labels_false
+    max_idx = all_y.index(max(all_y))
+    max_x = all_x[max_idx]
+    max_y = all_y[max_idx]
+    max_label = all_labels[max_idx]
+
+    # Aggiungi annotazione visibile per il modello migliore
+    fig.add_annotation(
+        x=max_x,
+        y=max_y,
+        #text=f"Top: {max_label} ({max_y:.1f}%)",
+        text=f"{max_label}",
+        showarrow=True,
+        arrowhead=2,
+        arrowsize=1,
+        arrowwidth=2,
+        arrowcolor="black",
+        font=dict(size=11, color="black"),
+        xshift=10,
+        yshift=10,
+        ax = -30, ay = -20,  # sposta la label a sinistra e sopra il punto
+        xanchor = "right"  # allinea la label a destra rispetto al punto
+    )
+
+    fig.update_layout(
+        title="Avg. Combined Performance vs #Params",
+        xaxis_title="#Params (B)",
+        yaxis_title="Avg. Combined Performance",
+        template="plotly_white",
+        hovermode="closest",
+        font=dict(family="Arial", size=10),
+        dragmode=False,
+        xaxis=dict(
+            tickvals=[0, 25, 50, 75, 100, 125],
+            ticktext=["0", "25", "50", "75", "100"]
+        ),
+        yaxis=dict(
+            tickvals=[0, 20, 40, 60, 80, 100],  # 👈 tick fissi
+            range=[0, 100]  # 👈 range bloccato
+        )
+    )
+
+    # Caption
+    fig.add_annotation(
+        text="Accuracy generally rises with #Params, but smaller models <br>"
+             "with 10-shot can outperform larger zero-shot models.",
+        xref="paper", yref="paper",
+        x=0.5, y=-0.3,  # 👈 centrata
+        showarrow=False,
+        font=dict(size=11, color="gray"),
+        align="center",
+        xanchor="center"  # 👈 ancora centrata rispetto al testo
+    )
+
+    fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
+    fig.update_yaxes(fixedrange=True)
+
+    return fig
+
+
+# Define task metadata (icons, names, descriptions)
+TASK_METADATA_MULTIPLECHOICE = {
+    #"TE": {"icon": "📊", "name": "Textual Entailment", "tooltip": ""},
+    #"SA": {"icon": "😃", "name": "Sentiment Analysis", "tooltip": ""},
+    #"HS": {"icon": "⚠️", "name": "Hate Speech", "tooltip": ""},
+    #"AT": {"icon": "🏥", "name": "Admission Test", "tooltip": ""},
+    #"WIC": {"icon": "🔤", "name": "Word in Context", "tooltip": ""},
+    #"FAQ": {"icon": "❓", "name": "Frequently Asked Questions", "tooltip": ""}
+}
+
+# Define task metadata (icons, names, descriptions)
+TASK_METADATA_GENERATIVE = {
+    #"LS": {"icon": "🔄", "name": "Lexical Substitution", "tooltip": ""},
+    #"SU": {"icon": "📝", "name": "Summarization", "tooltip": ""},
+    "NER": {"icon": "🏷️", "name": "Named Entity Recognition", "tooltip": ""},
+    "REL": {"icon": "🔗", "name": "Relation Extraction", "tooltip": ""},
+}
+
+def restart_space():
+    """Restart the Hugging Face space."""
+    API.restart_space(repo_id=REPO_ID)
+
+
+def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
+    """
+    Initialize and return the leaderboard when it is first loaded or when 'benchmark' is selected.
+    The table is sorted based on the "Avg. Combined Performance" field.
+    """
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+
+    #print("????????????????????????????????", mean_of_max_per_field(dataframe))
+
+    sorted_dataframe = dataframe.sort_values(by="Avg. Comb. Perf. ⬆️", ascending=False)
+
+    sorted_dataframe = sorted_dataframe.reset_index(drop=True)
+    sorted_dataframe["Rank"] = sorted_dataframe.index + 1
+
+    # Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
+    large_medal_fs_assigned = False
+    medium_medal_fs_assigned = False
+    small_medal_fs_assigned = False
+
+    large_medal_0shot_assigned = False
+    medium_medal_0shot_assigned = False
+    small_medal_0shot_assigned = False
+
+    # Lista temporanea per salvare i nuovi valori della colonna Model
+    new_model_column = []
+
+    for _, row in sorted_dataframe.iterrows():
+        if row['IS_FS']:  # 10-Few-Shot
+            if row["Size"] == "🔵🔵🔵" and not large_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🔵🏆")
+                large_medal_fs_assigned = True
+            elif row["Size"] == "🔵🔵" and not medium_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🏆")
+                medium_medal_fs_assigned = True
+            elif row["Size"] == "🔵" and not small_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🏆")
+                small_medal_fs_assigned = True
+            else:
+                new_model_column.append(row["Model"])
+        else:  # 0-Shot
+            if row["Size"] == "🔵🔵🔵" and not large_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🔵🎖️")
+                large_medal_0shot_assigned = True
+            elif row["Size"] == "🔵🔵" and not medium_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🎖️")
+                medium_medal_0shot_assigned = True
+            elif row["Size"] == "🔵" and not small_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🎖️")
+                small_medal_0shot_assigned = True
+            else:
+                new_model_column.append(row["Model"])
+
+    # Lista delle colonne da aggiornare
+    #cols_to_update = ["REL Best Prompt Id", "NER Best Prompt Id", "SU Best Prompt Id", "LS Best Prompt Id"]
+    # Applichiamo la trasformazione
+    #for col in cols_to_update:
+    #    dataframe[col] = dataframe[col].replace({1: 7, 2: 8})
+
+    # Aggiorna la colonna Model
+    sorted_dataframe["Model"] = new_model_column
+
+    field_list = fields(AutoEvalColumn)
+
+    return Leaderboard(
+        value=sorted_dataframe,
+        datatype=[c.type for c in field_list],
+        #select_columns=SelectColumns(
+        #    default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
+        #    cant_deselect=[c.name for c in field_list if c.never_hidden],
+        #    label="Select Columns to Display:",
+        #),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS)"),
+            #ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Few-Shot Learning (FS)",
+            #             default=[["0️⃣", "0️⃣"]]),
+            ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languges "),
+
+            ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max = 100, default = [0,100], label="Select the number of parameters (B)"),
+        ],
+        #filter_columns=[
+        #    ColumnFilter("IS_FS", type="checkbox", default=False, label="5-Few-Shot")
+        #    #ColumnFilter("FS", type="dropdown", label="5-Few-Shot")
+        #],
+        bool_checkboxgroup_label="Evaluation Mode",
+        interactive=False,
+    )
+
+def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=None):
+    """
+    Update and return the leaderboard when a specific task is selected.
+    The table is sorted based on the "Combined Performance" field.
+    """
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+
+    sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
+
+    # aggiungo la colonna rank in base alla posizione
+    sorted_dataframe = sorted_dataframe.reset_index(drop=True)
+    sorted_dataframe["Rank"] = sorted_dataframe.index + 1
+
+    # Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
+    large_medal_fs_assigned = False
+    medium_medal_fs_assigned = False
+    small_medal_fs_assigned = False
+
+    large_medal_0shot_assigned = False
+    medium_medal_0shot_assigned = False
+    small_medal_0shot_assigned = False
+
+    # Lista temporanea per salvare i nuovi valori della colonna Model
+    new_model_column = []
+
+    for _, row in sorted_dataframe.iterrows():
+        if row['IS_FS']:  # 5-Few-Shot
+            if row["Size"] == "🔵🔵🔵" and not large_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🔵🏆")
+                large_medal_fs_assigned = True
+            elif row["Size"] == "🔵🔵" and not medium_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🏆")
+                medium_medal_fs_assigned = True
+            elif row["Size"] == "🔵" and not small_medal_fs_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🏆")
+                small_medal_fs_assigned = True
+            else:
+                new_model_column.append(row["Model"])
+        else:  # 0-Shot
+            if row["Size"] == "🔵🔵🔵" and not large_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🔵🎖️")
+                large_medal_0shot_assigned = True
+            elif row["Size"] == "🔵🔵" and not medium_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🔵🎖️")
+                medium_medal_0shot_assigned = True
+            elif row["Size"] == "🔵" and not small_medal_0shot_assigned:
+                new_model_column.append(f"{row['Model']} 🔵🎖️")
+                small_medal_0shot_assigned = True
+            else:
+                new_model_column.append(row["Model"])
+
+    # Aggiorna la colonna Model
+    sorted_dataframe["Model"] = new_model_column
+
+    pd.set_option('display.max_colwidth', None)
+    #print("========================", dataframe['Model'])
+
+    #print(sorted_dataframe['Combined Performance'])
+
+    field_list = fields(AutoEvalColumn)
+
+    return Leaderboard(
+        value=sorted_dataframe,
+        #datatype=[c.type for c in field_list],
+        datatype=[c.type for c in field_list] + [int],
+        #select_columns=SelectColumns(
+        #    default_selection=default_selection or [c.name for c in field_list if c.displayed_by_default],
+        #    cant_deselect=[c.name for c in field_list if c.never_hidden],
+        #    label="Select Columns to Display:",
+        #),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=hidden_columns or [c.name for c in field_list if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.fewshot_symbol.name, type="checkboxgroup", label="N-Shot Learning (FS)"),
+            ColumnFilter(AutoEvalColumn.LANG.name, type="checkboxgroup", label="Languges "),
+
+            ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max=100, default=[0, 100],
+                         label="Select the number of parameters (B)"),
+        ],
+        bool_checkboxgroup_label="Evaluation Mode",
+        interactive=False
+    )
+
+'''
+# Helper function for leaderboard initialization
+def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
+    """Initialize and return a leaderboard."""
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+
+    return Leaderboard(
+        value=dataframe,
+        datatype=[c.type for c in fields(AutoEvalColumn)],
+        select_columns=SelectColumns(
+            default_selection=default_selection or [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
+            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
+            label="Select Columns to Display:",
+        ),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=hidden_columns or [c.name for c in fields(AutoEvalColumn) if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.fewshot_type.name, type="checkboxgroup", label="N-Few-Shot Learning (FS)"),
+            ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0, max=150, label="Select the number of parameters (B)"),
+        ],
+        bool_checkboxgroup_label="Hide models",
+        interactive=False,
+    )
+'''
+
+def download_snapshot(repo, local_dir):
+    """Try to download a snapshot from Hugging Face Hub."""
+    try:
+        print(f"Downloading from {repo} to {local_dir}...")
+        snapshot_download(repo_id=repo, local_dir=local_dir, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN)
+    except Exception as e:
+        print(f"Error downloading {repo}: {e}")
+        restart_space()
+
+
+# Initialize the app by downloading snapshots
+download_snapshot(QUEUE_REPO, EVAL_REQUESTS_PATH)
+download_snapshot(RESULTS_REPO, EVAL_RESULTS_PATH)
+
+# Load leaderboard data
+LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
+finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+#print(LEADERBOARD_DF.columns.tolist())
+
+theoretical_max_combined_perf = mean_of_max_per_field(LEADERBOARD_DF)
+
+# Prepare the main interface
+demo = gr.Blocks(css=custom_css)
+with demo:
+    #gr.HTML(TITLE)
+    gr.HTML(
+        """
+        <div style="display: flex; align-items: center; position: relative; width: 100%; height: 60px; padding: 10px 0;">
+            <h1 style="
+                margin: 0 auto; 
+                font-weight: 900; 
+                font-size: 5.5em; 
+                letter-spacing: 2px; 
+                text-transform: uppercase; 
+                color: red;
+                background: linear-gradient(90deg, #1f77b4, #00c6ff); 
+                -webkit-background-clip: text; 
+                -webkit-text-fill-color: transparent; 
+                text-shadow: 2px 2px 8px rgba(0.2,0,0,0);
+            ">
+                ECREAM-LLM Leaderboard
+            </h1>
+        </div>
+        """
+    )
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+
+    # ⬇️ QUI aggiungiamo i grafici subito sotto la barra del titolo e sopra le tabs
+    with gr.Row():
+        gr.Plot(value=line_chart(LEADERBOARD_DF), elem_id="line-chart")
+        gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES, REFERENCES), elem_id="boxplot-task")
+        #gr.Plot(value=boxplot_prompts_per_task(LEADERBOARD_DF), elem_id="boxplot-prompt-task")
+
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+
+        # Main leaderboard tab
+        with gr.TabItem("🏅 Benchmark"):
+
+            leaderboard = init_leaderboard(
+                LEADERBOARD_DF,
+                default_selection=['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
+                hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size', 'LANG', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
+            )
+
+           # gr.HTML(
+             #   f"""
+            #            <div style="
+            #                border: 2px solid #1f77b4;
+            #                border-radius: 10px;
+            #                padding: 10px;
+            #                background-color: #f0f8ff;
+            #                font-weight: bold;
+            #                font-size: 14px;
+            #                display: inline-block;
+            #            ">
+        #                    Theoretical performance of a model that scores the highest on every individual task: <span style="color:#d62728; font-size:18px;">{theoretical_max_combined_perf:.2f}</span>
+          #              </div>
+         # $              """
+          #  )
+
+        '''
+        with gr.TabItem("📈 Charts"):
+            #gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
+            #gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
+            gr.Plot(value=line_chart(LEADERBOARD_DF))
+            gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES))
+            gr.Plot(value=boxplot_prompts_per_task(LEADERBOARD_DF))
+            gr.Plot(value=barplot_mean_few_minus_zero_shot(LEADERBOARD_DF))
+        '''
+
+        # About tab
+        with gr.TabItem("📝 About"):
+            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+
+        # About tab
+        #with gr.TabItem("║", interactive=False):
+           # gr.Markdown("", elem_classes="markdown-text")
+
+
+        # Task-specific leaderboards
+        for task, metadata in TASK_METADATA_MULTIPLECHOICE.items():
+
+            with gr.TabItem(f"{metadata['icon']}{task}"):
+
+                task_description = TASK_DESCRIPTIONS.get(task, "Description not available.")
+                gr.Markdown(task_description, elem_classes="markdown-text")
+
+                leaderboard = update_task_leaderboard(
+                    LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average", f"{task} Prompt Std": "Prompt Std", f"{task} Best Prompt": "Best Prompt", f"{task} Best Prompt Id": "Best Prompt Id", task: "Combined Performance"}),
+                    default_selection=['Rank', 'Size','LANG', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id'],
+                    hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['Rank', 'Size','LANG', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt', 'Best Prompt Id']]
+                )
+
+        # About tab
+        with gr.TabItem("│", interactive=False):
+            gr.Markdown("", elem_classes="markdown-text")
+
+        # Task-specific leaderboards
+        for task, metadata in TASK_METADATA_GENERATIVE.items():
+            with gr.TabItem(f"{metadata['icon']}{task}"):
+                task_description = TASK_DESCRIPTIONS.get(task, "Description not available.")
+                gr.Markdown(task_description, elem_classes="markdown-text1")
+                #print (LEADERBOARD_DF)
+                leaderboard = update_task_leaderboard(
+                    LEADERBOARD_DF.rename(columns={f"{task} Prompt Average": "Prompt Average",
+                                                   f"{task} Prompt Std": "Prompt Std",
+                                                   f"{task} Best Prompt": "Best Prompt",
+                                                   f"{task} Best Prompt Id": "Best Prompt Id",
+                                                   task: "Combined Performance"}),
+                    default_selection=['Rank', 'Size', 'LANG', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std', 'Best Prompt',
+                                       'Best Prompt Id'],
+                    hidden_columns=[col for col in LEADERBOARD_DF.columns if
+                                    col not in ['Rank', 'Size','LANG', 'FS', 'Model', 'Combined Performance', 'Prompt Average', 'Prompt Std',
+                                                'Best Prompt', 'Best Prompt Id']]
+                )
+
+    # Citation section
+    with gr.Accordion("📙 Citation", open=False):
+        gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True)
+
+    with gr.Accordion("📙 Credits", open=False):
+        gr.Markdown(
+            """
+            ***This project has been funded by the European Union under:
+
+                   Horizon Europe eCREAM Project (Grant Agreement No.101057726)  
+            """
+        )
+
+# Background job to restart space
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=1800)
+scheduler.start()
+
+# Launch the app with concurrent queueing
+demo.queue(default_concurrency_limit=40).launch(debug=True,  # Enable Gradio debug mode
+        show_error=True)
\ No newline at end of file
diff --git a/csv_files/llm_scores_p1.xlsx b/csv_files/llm_scores_p1.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..a6d5790fd6e7b7ce5f7d819925710d57f375f3ce
Binary files /dev/null and b/csv_files/llm_scores_p1.xlsx differ
diff --git a/csv_files/llm_scores_p2.xlsx b/csv_files/llm_scores_p2.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..26af80a6cd45b0ec82097dafc6d4b8105945ecda
Binary files /dev/null and b/csv_files/llm_scores_p2.xlsx differ
diff --git a/csv_files/llm_scores_p3.xlsx b/csv_files/llm_scores_p3.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..3621768a991e0d757b6e5af462af6dc618f9f293
Binary files /dev/null and b/csv_files/llm_scores_p3.xlsx differ
diff --git a/csv_files/outputs/.ipynb_checkpoints/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot-checkpoint.txt b/csv_files/outputs/.ipynb_checkpoints/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot-checkpoint.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37a0a0b3fcec4413d915fb8b17302fe6c93286c4
--- /dev/null
+++ b/csv_files/outputs/.ipynb_checkpoints/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot-checkpoint.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2877 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1963 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3459 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3208 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4430 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4487 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4492 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4311 |   | 0 |
diff --git a/csv_files/outputs/.ipynb_checkpoints/epfl-llm__meditron-7b__it__10shot-checkpoint.txt b/csv_files/outputs/.ipynb_checkpoints/epfl-llm__meditron-7b__it__10shot-checkpoint.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2aa7fd7dc8637dbd14ef01f078eceecddd04f15
--- /dev/null
+++ b/csv_files/outputs/.ipynb_checkpoints/epfl-llm__meditron-7b__it__10shot-checkpoint.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3288 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2991 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3563 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3311 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0896 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0832 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0887 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0968 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..adb6649f69212f89096131405d496954e948d037
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0918 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0629 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1041 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1083 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2604 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1287 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3394 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3131 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..970790d8c37ee624a941d7838baf752c1418f32c
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2142 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2189 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2243 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1994 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1681 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1189 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1668 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2185 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..256806ae1aae91613bb15e7f61973bb2c3d373e9
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0611 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0620 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0592 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0620 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0863 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1017 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0506 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1065 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1968c70df7ae59de71b96c9719693f1041cc591
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1474 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1667 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1089 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1667 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0970 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0821 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1053 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1036 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de76936f997964ba608d49c32bded3be64711fed
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0416 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0435 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0429 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0384 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1413 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0672 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2266 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1300 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1324843bcf3efaa493662c82d523957a6e202c45
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3753 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3299 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4023 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3938 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1331 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0977 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1226 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1789 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..922dce80469337edc75e7835aa6a600369523091
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0379 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0379 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0378 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0379 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0891 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0602 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1293 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0778 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dee6185f81350fdc85c72cb4a61be93b071cc61
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3966 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3992 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3916 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3992 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1003 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0998 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1055 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0956 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a25091bbb80e0a2681548322565c52cb0858b07
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0385 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0387 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0380 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0387 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0174 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0121 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0280 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0121 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c591c7f0a88ced816e237245a16bdc6d688db83
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3507 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3444 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3632 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3444 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0884 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0734 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1045 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0875 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__0shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af66f9c26430a2440fce61f08cdf1c00204b2cf0
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0438 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0429 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0456 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0429 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1278 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0967 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1900 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0967 |   | 0 |
diff --git a/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5a52d295a6f2f02b23f1a057560f2abba92d1b8
--- /dev/null
+++ b/csv_files/outputs/Henrychur__MMed-Llama-3-8B__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3720 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3558 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4045 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3558 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0762 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0787 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0781 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0719 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__en__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6f2276bf179a07b2a459292322afb0dd5fbc5788
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0578 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0940 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0331 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0464 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..765d090e468a522437f45edb65fc5d65485264b7
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1317 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1215 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1415 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1322 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0031 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0028 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0016 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0049 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__gr__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cc7d4a784cb754ed058341765da74fe59e4950e
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0769 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0859 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0591 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0859 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4edd50dc0d05a279ed9a6be3efb12660fc646344
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1448 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1455 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1434 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1455 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0010 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0024 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0007 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__it__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89aa974a3e785f05de8765a3214c2c1b54683fa4
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__it__0shot.txt
@@ -0,0 +1,10 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0812 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0770 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0920 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0747 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2b5e4935064fdb46893c4e273f664a6bc5a4bf79
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1694 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1616 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1774 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1690 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0048 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0035 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0064 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0046 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__pl__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3febb68ea3e21f9230cb485500075ba859f318f
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0308 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0244 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0436 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0244 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c96e416317f7b151616c4982e8c0640322bb615
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1516 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1500 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1548 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1500 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0032 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0040 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0023 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0034 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sk__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75cf3c4ce337fe7f13221bf8b230f9c267ae3639
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0712 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0880 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0375 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0880 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce7ca5e76b585007a9dc187a6dd14ae6e22f17cc
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1444 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1485 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1360 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1485 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0027 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0038 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0024 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0020 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sl__0shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8811248dde3d8e1e5d3e5bd0c4d11888b8adad09
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0711 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0777 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0579 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0777 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97237b461fcde9621e1b414675820a8989f1add9
--- /dev/null
+++ b/csv_files/outputs/HiTZ__Medical-mT5-large__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1422 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1470 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1325 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1470 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0080 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0073 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0074 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0093 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e6243a974bbae1a9614d27b4d2e11522417cb4d
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2500 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3425 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1181 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2893 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4075 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4135 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3917 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4172 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a27ebddd1aefdbbdb6edd25f8352b15456cf81a7
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5993 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6091 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5646 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6243 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.6164 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6332 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6025 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6133 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32e68359dde026f73f4bdc753c7293e1d097dd76
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1290 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1339 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1191 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1339 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3957 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3796 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4266 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3810 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8435fa43de5b6d649de6e305295728062df17d85
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6028 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6119 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5847 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6119 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.6056 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5962 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6024 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6183 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..273a22dfa278f3ac9935c7789f1efc8bc0c51068
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2137 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2467 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1709 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2234 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4016 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4173 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3770 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4106 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e78b0cd1d68e177e93c6ae63d60ff33e934b1cd
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6569 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6719 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6327 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6661 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5952 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5767 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5998 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6093 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bbaa4e441dac6b8c9ed99f717bd896a34a45e3d
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0586 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0697 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0364 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0697 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4022 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3803 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4464 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3800 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..465d16af61fd9338c7188c53fbf60f164ed3aac6
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6092 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6226 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5824 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6226 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5944 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5991 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5466 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6375 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8660df7e3f0f119e44cf5a67e7a942f913b8aa4d
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0955 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1220 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0426 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1220 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4116 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4027 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4294 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4027 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63b5158840c219e67fbf758e2ed730ca530afe7d
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6419 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6386 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6486 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6386 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5899 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5894 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5845 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5959 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52a254555d051acdd5ed2169b161e4db6559e7f6
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3398 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3910 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2375 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3910 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3777 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3775 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3783 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3775 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11a5d3eb944b1de7399b5736ad5127c36767eac5
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6371 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6467 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6178 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6467 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5837 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5949 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5782 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5781 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d46997ae1107c126bb8219af5d19f4b69f60a6d
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3279 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3804 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3068 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2964 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4658 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4734 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4649 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4591 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5071ef1d6625c2e56e68013ab891e5757b1af187
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5895 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5970 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5602 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6113 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.6440 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6482 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6469 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6370 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d091d2dd0d08ddd7d9ae2f74d581e4787f4ebf9
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4506 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5976 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1568 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5976 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4104 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4393 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4083 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3834 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa6241f9b435b69937d53ca833cc5a27fa25c2c0
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6175 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6196 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6131 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6196 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5840 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5913 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5896 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5710 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cb5936448f68978fe3b04ed6fcf29377b929e3d0
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2734 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3758 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1647 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2796 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4370 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4505 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4159 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4447 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9fd68069be248fb9602424d0ab5e675f83263e82
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.7005 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6934 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.7152 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6930 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5641 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5801 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5595 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5526 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4f8030e0178b97b248945b2973d52689441048e
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2428 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2486 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2311 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2486 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4074 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3865 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4569 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3788 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0657f5bc039e0ef6c46d0a9ab79ea5c33277f47
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6006 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6008 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6004 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6008 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5888 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5858 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5868 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5938 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c2c921f81dfd861433916d7a82eae8f0794ee40
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3375 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3578 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2968 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3578 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4031 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3971 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4152 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3971 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccd3f8f6a3d5adfc50bb93253d2b1a2baddb48ea
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6720 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6743 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6673 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6743 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5643 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5733 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5586 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5609 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__0shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12d98d519252a33b36dae0af4719974c3d12e5c2
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3183 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3344 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2863 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3344 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4048 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3979 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4186 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3979 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45927874109b49e1ce1db253c58c78ab3ea1a926
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6373 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6253 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6615 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6253 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5727 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5992 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5849 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5339 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6420cfe9d439aeff3b1a1d5a016a08fc48833326
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__0shot.txt
@@ -0,0 +1,10 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - RE        |       |      |      |f1    |   | 0.4141 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4394 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4031 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3997 |   | 0 |
+| - NER        |       |      |      |f1    |   | 0.4445 |   |0 |
+|   - p2  |       |      |      |f1    |   | 0.4162 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4729 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..09d6c20e438665ebd65645a507c560c6cb20a278
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5907 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5986 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5593 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6143 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5259 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5150 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5261 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5364 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9679a42ef05e59d1976f28f381ab016e9bd01f2b
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4368 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4291 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4521 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4291 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3776 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3733 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3799 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3798 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7b5e451b837ea640578ca02095bc52a621c7ee1
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5999 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6164 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5669 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6164 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5149 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5015 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5209 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5223 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d32b05f0de3224777b7aae2d6385c38711a575a8
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3572 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0885 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5316 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4514 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3959 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3784 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4123 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3972 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dbb95dc7c82ed188142cf9ef0a295bfd27bb3e5
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6673 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6793 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6447 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6778 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5982 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6041 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5838 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6065 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6358c6bb902fb22713a21ed802c947dd78e7ea6
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4235 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4332 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4043 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4332 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4186 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4152 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4220 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4187 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f116f8c7deee3f443c689514bb8a23fdb8d305c
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6118 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6276 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5803 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6276 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5166 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5103 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5200 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5195 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82f30fd004efa2674df97a1dae911f0a92ff3e26
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3287 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3231 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3398 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3231 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3943 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3980 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3867 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3980 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..652672223f87eeb324263928437a787e75b87b20
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6030 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6085 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5919 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6085 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5106 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4920 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5025 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5373 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__0shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..126b784a0d6414a7ebb39eb6954f1444f9a726e9
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4501 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4486 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4531 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4486 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4118 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4115 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4126 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4115 |   | 0 |
diff --git a/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..827b5e0d0dd790eea628cc4c77b18800829dd3d5
--- /dev/null
+++ b/csv_files/outputs/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6391 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6615 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5944 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6615 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5356 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5062 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5576 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5429 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..37a0a0b3fcec4413d915fb8b17302fe6c93286c4
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2877 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1963 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3459 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3208 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4430 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4487 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4492 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4311 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d1353488cb49ca2c17d3d3e38c7a9b5efe1528a4
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5963 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6024 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5929 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5935 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5221 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5191 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5199 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5273 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79aa97329e86a168483edd679e8cc64109aed7a6
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3421 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3455 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3354 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3455 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3485 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2406 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3947 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4102 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d10be9c5334b54f3adcb1cee0c3d5a9defc21084
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5884 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5928 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5796 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5928 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4415 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4467 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4210 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4569 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5db52b32f4fc4b7478fd7b3ec2f0063bd67713ec
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3220 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2678 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3568 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3414 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4452 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4519 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4611 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4227 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..df97d37a91573d214913b317e4b83ba9899e389a
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6864 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6982 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6679 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6930 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5530 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5546 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5526 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5518 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16a21b3d60d0b28e03e3f22502e906d4f9d2586d
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3379 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3204 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3728 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3204 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4131 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3983 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4327 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4083 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bbdde1853e3107c0e3fa26a80be768aedf20a06
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6189 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6214 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6140 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6214 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5023 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4863 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5129 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5076 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffa46c1c5e22e9d8e038069c447c1f026cfc61f6
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2521 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2829 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1905 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2829 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3959 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3893 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4091 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3893 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6516944fdac3db3ed3380f5c97391fae7dbc061d
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6302 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6347 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6211 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6347 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4646 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4799 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4451 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4689 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__0shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0a0fe07a3970ef5a820c76cb9751944d12fdab2
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2604 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2810 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2192 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2810 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4116 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4116 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4115 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4116 |   | 0 |
diff --git a/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..698e2379856e9df40de4014fdbd473b61395c81b
--- /dev/null
+++ b/csv_files/outputs/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6026 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6015 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6049 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6015 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4911 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5137 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4674 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4923 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__en__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a378a1ac602f249f8f7988c361155b300f3edd65
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0612 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0578 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0410 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0848 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0313 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0442 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0497 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45de7e54ed94cfb5fcc9ab2c265f192e7aa9f981
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1245 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0803 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1479 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1454 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0692 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0722 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0692 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0663 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__gr__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b733c326cb013320727e13c717645ad3b4ff775e
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2426 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2417 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2443 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2417 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0592 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1556 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0161 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0058 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87b319e4253b8aba65bfcf2e4ade2615fc2ae10e
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__it__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a0d1cb5b5e49884090c31b808c2bc2a7f01cf4c
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0639 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0773 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0612 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0531 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1072 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0020 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1929 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1268 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b2aa7fd7dc8637dbd14ef01f078eceecddd04f15
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3288 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2991 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3563 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3311 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0896 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0832 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0887 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0968 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__pl__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c763fab18fe2af421c37a99965e57159fb9f0dd
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1161 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1140 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1203 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1140 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0025 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0076 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14675a45035d0e69895142e3b0f6800ec9197583
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3222 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3184 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3297 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3184 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0510 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0533 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0461 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0535 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sk__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e3d58f1c091c0ef7928d528ba2d95cfc046831
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0778 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0874 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0586 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0874 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0034 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0036 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0031 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0036 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bad9a6c35cda030096e0a1ffe1e020b004d5263a
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2993 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3004 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2970 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3004 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0404 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0445 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0393 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0375 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sl__0shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfb36aa25fe8478844f4e4741701e7cd84df3e6c
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0951 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1197 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0460 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1197 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0445 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0598 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0137 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0598 |   | 0 |
diff --git a/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt b/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eab52b0c7040bdf63e365ec759ff69b327922c10
--- /dev/null
+++ b/csv_files/outputs/epfl-llm__meditron-7b__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3052 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3119 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2916 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3119 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0502 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0477 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0501 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0528 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__en__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..568fb0bef70896f666a03925b261a19ecc918295
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4603 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3267 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5174 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5370 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4211 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4360 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4205 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4067 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fd9ea572467adfa2c454f1539b60635facd6d39f
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5919 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6200 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5639 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5918 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5303 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5163 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5337 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5409 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__gr__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87226c5046c2278e0f2a6e57fa83aa395a61ca52
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5292 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5549 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4777 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5549 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4008 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4124 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3957 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3943 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..739bad8c7a5639671141f53c0413696e38d96592
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5943 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6083 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5663 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6083 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5162 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5070 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4971 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5444 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__it__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c1e51ad7fa14eeadd44b3fdc9a10c6ff1ae5784
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6158 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5739 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6524 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6210 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4298 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4585 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4113 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4196 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51580ad2fec3d1b525363e7391cd794ab01ea59f
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6707 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6910 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6643 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6569 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5209 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4958 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5365 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5305 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__pl__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f187b39ad69bd8bb0dbf697be691b129eadee340
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4092 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4060 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4155 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4060 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3891 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3674 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4271 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3729 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ac1adba3779f7f68d4bfbf2cf88b163e3b84f4b
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5893 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5908 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5862 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5908 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5033 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5168 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4808 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5124 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__sk__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f0bc370ed50b39ebc88b5e7e85f8a110f45a283
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4775 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4875 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4575 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4875 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4106 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3989 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4340 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3989 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b909c3988f118020a3d985678e092a54be2f61f1
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6135 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6141 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6122 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6141 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5007 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5153 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4754 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5114 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__sl__0shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98ce20cc51f351ff18c9e416c89a29a920d6bacd
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4487 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4707 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4046 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4707 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4058 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4079 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4016 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4079 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt b/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e956839ba21e57d9f60059817766b32fe88d80a2
--- /dev/null
+++ b/csv_files/outputs/google__gemma-2-9b-it__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6156 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6365 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5737 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6365 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4883 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4801 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4878 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4972 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__en__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11bb43caf4d6194fe781b6d25ed4e6d8ba98ec60
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5490 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5446 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5830 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5194 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4623 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4543 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4582 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4743 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b6e27e1e65dc4e4d80d829f031471763c651a1f
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6187 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6160 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6308 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6094 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5518 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5191 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5600 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5764 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__gr__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e9d713d614c2dbaa4d17389ea32a9b3021a3cf
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5151 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4866 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5721 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4866 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4473 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3955 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4695 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4769 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1b60273ff3047ec635fd913fc4fe0db8a2ca133
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6570 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6551 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6608 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6551 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5405 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5083 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5550 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5581 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__it__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20b323e8a1c0fdd3626eab05d6d99055375834fe
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6065 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5543 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6697 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5954 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4737 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4390 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4895 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4927 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f498e38336fb8c77988c54dcd070e31a0ac9a220
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.7115 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.7142 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6992 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.7212 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5615 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5223 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5837 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5786 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__pl__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9f1519cf73039b96aee8faf352b110818910761
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4508 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4506 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4511 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4506 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4307 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4384 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4267 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4271 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9eed39172f7a13688201364ca71ab665a6378bda
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6618 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6591 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6672 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6591 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5592 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5795 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5601 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5380 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__sk__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27cb80b8b7a5de7165aa52e89c5f70d0ac61dc23
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2841 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3183 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2157 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3183 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4369 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4373 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4360 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4373 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f8297965f5ccbab4e4581425fdb9d9628f5cc8c
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6786 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6737 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6885 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6737 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5095 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5121 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5061 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5103 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__sl__0shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39bd48a7d0643c98d4e640b58b7343ee908f2d64
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4508 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4370 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4783 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4370 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4301 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4255 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4391 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4255 |   | 0 |
diff --git a/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt b/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed1c6926d8cdf2d22c23adf3393d14f1da9cd4d9
--- /dev/null
+++ b/csv_files/outputs/google__gemma-3-27b-it__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6806 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6750 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6918 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6750 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4999 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5149 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4703 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5145 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__en__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..15bfbd55479583afff9de0e140bb5e0cd5970dd2
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5011 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3842 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6035 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5156 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4681 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4836 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4763 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4443 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0166f0aea940a1e11dbe79c1f2cf729f6c18c859
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6324 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6355 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6161 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6455 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5540 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5562 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5494 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5565 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__gr__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d065c4984ef73926a5399a10a41f28371f93fc00
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5585 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5314 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6126 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5314 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4199 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4069 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4332 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4197 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70f88e2528d3a1ff7cf33f95ceb671fc00a7aa14
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6839 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6836 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6846 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6836 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5680 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5392 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5867 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5780 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__it__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4af53b4a7ac4e873081dbac1012b7e5319e4714
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5351 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4261 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6212 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5582 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4521 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4042 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4916 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4604 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de0f1bbac03a767e292f5c6d52f4319e367c44c0
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__it__10shot.txt
@@ -0,0 +1,10 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.7133 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.7262 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.7005 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5960 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5919 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6235 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5726 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__pl__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9eba07f4234c3cc6cc8a0f05a5cd8f0cd620ac8
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4245 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4216 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4303 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4216 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4332 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4325 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4424 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4246 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0276a07fab4408898651f3f147a274d5d5df3c97
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6791 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6829 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6715 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6829 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5997 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5940 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6133 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5918 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sk__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd8b98a582ba732f34685230d5c2af7c07ed3a59
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2336 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2971 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1066 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2971 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4440 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4395 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4531 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4395 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..003be3f4ed88a6a499b89ed958c415cc485b70c3
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.7137 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.7143 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.7127 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.7143 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5156 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5111 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5188 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5171 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sl__0shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c70cd19fab747d42922e7a5ecdf7736a81004f9
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4863 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4675 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5238 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4675 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4201 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4182 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4239 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4182 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt b/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c8ad321754c222e65360614da1f6192f3387c7c
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-27b-text-it__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6887 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6947 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6765 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6947 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5469 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5323 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5590 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5494 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__en__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d18ba6fc1bdc423039c71bd42a10e8a09e997db
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2625 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2635 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2503 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2737 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2851 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2095 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3257 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3203 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4bc79186d7029b363381303d141376c728294ab7
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4930 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4833 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5005 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4951 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1198 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0964 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1237 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1391 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__gr__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0f048d8fb01e8ed8352829fa0179010381f66ca
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2688 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2705 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2654 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2705 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2053 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2381 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3024 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0754 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4352edec2156fd74172ffb93bfc7069ed935cce2
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4953 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4910 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5039 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4910 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1453 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1204 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1605 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1551 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__it__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0726976105566a084aa8c1c51c84c700da3b7752
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2929 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3157 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2627 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3004 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1767 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2154 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2461 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0688 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4b3a8e23520bec5746d9090322128b48cf1af41f
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5454 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5633 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5377 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5352 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1753 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1592 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1917 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1751 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__pl__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1da79eb1fdbb0c791458ff4a9cce9c33a7da6497
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2231 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2255 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2183 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2255 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1173 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1150 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1314 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1054 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79c82263014a0069b6c825385d95cf6477004a4a
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5193 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5186 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5206 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5186 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1055 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1171 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0997 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0997 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__sk__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bea1720e28d395cc9d1f6ee52968a6965a98c84
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2427 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2447 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2387 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2447 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1212 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1119 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1399 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1119 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19c6346c5007538093d4b83f945e14ee4616490c
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4654 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4756 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4449 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4756 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1035 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1095 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1009 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1000 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__sl__0shot.txt b/csv_files/outputs/google__medgemma-4b-it__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56dbab87e4f7fed3a562f04e90d5511d788bdc34
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2569 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2574 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2558 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2574 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1012 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0973 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1089 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0973 |   | 0 |
diff --git a/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt b/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc424f90dde2288be7dda70e93c0e761287409da
--- /dev/null
+++ b/csv_files/outputs/google__medgemma-4b-it__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5063 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5117 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4955 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5117 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1260 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1178 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1101 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1501 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__en__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76cdb91c718afaf7089efd18094132a0bf5f121a
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2786 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2502 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3089 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2768 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3248 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2274 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3929 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3542 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb75acf6585f103c2dc138b976cb9d036ecee6c6
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5008 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5009 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4966 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5049 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1125 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1175 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1095 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1107 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__gr__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0c5c0a5a00e2cdf36cc9f66ad99c89cd41760ac
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1717 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1641 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1869 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1641 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0977 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0736 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0778 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1418 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eba564920c2c92d7800080552c6a59b8def8c9b7
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3384 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3375 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3403 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3375 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0606 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0427 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0681 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0711 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__it__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2179db033abe54c1a3211347f00f4fb25ebca628
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3307 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3397 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3300 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3226 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0792 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1489 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0736 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0149 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c42fd3d91bc50bf07de543792051bd0fbd08f0c
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5257 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5195 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5301 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5275 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1499 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2114 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0961 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1422 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__pl__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..398a394b401f757945eec334c5bd3723685aa80b
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2831 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2815 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2861 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2815 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2693 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2109 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2908 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3061 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4fc3f8a0bc1e4a0d38f2f3729779f31828e7b70b
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3986 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3913 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4132 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3913 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1366 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1255 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1207 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1636 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sk__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9aaca4c790d98eb11b6ab172e1b2f9ba297b0a56
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2710 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2571 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2987 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2571 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1062 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1554 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0077 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1554 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b031c9c0bd0f5fe6669a53563e3681aa1a74d890
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4025 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4106 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3861 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4106 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0613 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0509 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0606 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0724 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sl__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd2d143234ccb78abac42a387aa128d3314e802f
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2892 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2998 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2680 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2998 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0304 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0395 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0121 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0395 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..891a18854ac34e549e2ead223a4f5c50fa589fb3
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Clinical__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4021 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4036 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3990 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4036 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0748 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0829 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0674 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0742 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__en__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d109d8f49c2b55f83170cc9751275a72ff6e1387
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1598 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0761 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2410 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1625 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2982 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1135 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4006 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3804 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91d54c4d91a393447e1a9033fe06a44c2ea83264
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5216 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5357 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5227 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5063 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1719 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1432 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1888 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1836 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__gr__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..033006f71175ad0e7ba4e4f9b2b91bd4b604c058
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1159 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1294 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0890 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1294 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1184 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0962 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0673 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1916 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..116bb08deaa20d0ae0a961c362d4802b12d2add2
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2881 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2822 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2999 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2822 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0675 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0576 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0674 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0777 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__it__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dd0e6008d2a3198f8ea3a505176acf5dd7c54ac8
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2023 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0867 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2484 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2717 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2623 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1712 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2896 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3261 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..751b2811277dbd2d73fca6f47f097140e427c007
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5715 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5729 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5627 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5790 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2679 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2873 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2307 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2858 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__pl__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6bc50efa744a67b9f472d4fdd237432f562068
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1567 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1510 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1680 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1510 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2881 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2683 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3126 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2832 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72e10fb742c76990d605ee3a2c3c4ef35b670091
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4447 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4417 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4506 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4417 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2291 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1525 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2686 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2662 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sk__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3c42b94084e72cbff224a863d3f392fb1b26463
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1788 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1641 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2081 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1641 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1221 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1776 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0112 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1776 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d69cd7d7928f4a6c643567b723646b0ea9b62cc
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4226 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4327 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4023 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4327 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1313 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1070 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1395 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1473 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sl__0shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d1efab70337f7f4131da606243432c236e0a589
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1792 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1758 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1860 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1758 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1325 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1446 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1084 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1446 |   | 0 |
diff --git a/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..679149a7bff8efd0e260ce4a2032de17a86487c6
--- /dev/null
+++ b/csv_files/outputs/microsoft__MediPhi-Instruct__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3837 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3973 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3564 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3973 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1550 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1155 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1468 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2027 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f4ee3894a133ddaa7d959a5cc3ee2a1e1a1647e2
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2278 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2529 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2144 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2162 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3007 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3688 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3642 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1693 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e691afc144b5537fc7c41225a0f9323b2628b66e
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4753 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4725 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4730 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4805 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3592 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2593 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4034 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4148 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0017f0d9afa0d196c05a399d60bf6b69bf801441
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1705 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1603 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1909 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1603 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0592 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0432 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0348 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0994 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7051f2deb4230647fb3c9bbe0580a2fe84de6d8
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3548 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3498 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3648 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3498 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1862 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1055 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2343 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2189 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b4e44b2cf5da34e762e0db150b8246e857eae345
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2433 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2788 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2030 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2481 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0561 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1382 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0163 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0140 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a1dc9d3cb03c6344b978ea84a9310c9d638cfb5
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5176 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5147 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5232 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5149 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3958 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3092 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4530 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4252 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77b2c7212ed034a17baff6431293cdb59c42592c
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2953 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3024 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2811 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3024 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1006 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0863 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1292 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0863 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a825a34d1e4073d2ae5da7e22e86582b980912c
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4956 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4911 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5046 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4911 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3296 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3895 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3311 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2683 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6eeaac48bff7f356aa2168ad7755b879c69be13a
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2144 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2143 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2146 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2143 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0782 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0756 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0835 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0756 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3952afe3c0317b08c9e06f3caff5ae01eb9aa4e2
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3951 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4029 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3794 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4029 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2132 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2155 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1948 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2293 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__0shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0158432acf19d26d277f1b57deb076edd05514a
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1826 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1766 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1947 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1766 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1076 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0766 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1695 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0766 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0a21e1f1e602067347fb6c7ae3af7c47c220eb9
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4194 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4204 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4174 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4204 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2018 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1990 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1950 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2115 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2af72d7f2124d9fb87aba16aa7ec5979b1490e4
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2271 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2767 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2299 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1748 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3472 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3694 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3482 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3241 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e232099737212c32a37983c482f6e7bf1aee5d9
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5762 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5777 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5841 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5668 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4313 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3482 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5008 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4449 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6f9827c9bfca0e3f2fe140aa5bb7f63e64551b9
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0717 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0732 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0687 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0732 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2326 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1575 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2117 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3287 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c23dd02177855715602246da1ab145d4750a511
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5050 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5081 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4988 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5081 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2549 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2029 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2296 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3323 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94723d98b89e2a142ae06a3699ea88924444d65a
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1960 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2792 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1772 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1316 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2365 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2849 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2384 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1861 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc5b1148df6fbd0d32f982b72350b9baed9e392d
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6441 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6430 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6437 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6457 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3556 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2708 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4099 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3860 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a7d8e28e086f27eb9637e6ad992ba36f0de390a
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0468 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0483 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0439 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0483 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1823 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2123 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1686 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1661 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..824dea14ef26e4fd07539f5cdc57cb0d72d7a869
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5375 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5352 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5421 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5352 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1906 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1863 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1855 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2001 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c38472f1e9cb2c7693144b17179e9dcfe88f159
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0738 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0685 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0844 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0685 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1596 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1696 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1396 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1696 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e3e1b2ef67ae845db75704f414dd97a01bc4d8a
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5030 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5025 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5040 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5025 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1832 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1237 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2166 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2094 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__0shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..832ca83ee26ae570b1c4e4d781100383be94e147
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0842 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0861 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0805 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0861 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1905 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2309 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1096 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2309 |   | 0 |
diff --git a/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2b96898a18579d2b16376fa5e4d1159ed4fc544
--- /dev/null
+++ b/csv_files/outputs/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5327 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5323 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5335 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5323 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1725 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1390 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2057 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1727 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0cdeef408ee3cfe2f6230ee84b4f0d454d6847c9
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2658 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2270 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2709 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2996 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3280 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2157 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3835 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3848 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8852cb62bd0e04b214587916191c6b150f925661
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5730 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5840 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5421 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5928 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5145 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4335 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5586 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5515 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7516c1517104e5be8c21dca55222faa85473fdc
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1585 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2130 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0495 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2130 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0506 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0401 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0250 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0867 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba5c002a9264a1d56a51c72b4dc642ee87b8c605
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3448 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3345 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3655 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3345 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3591 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3749 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3755 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3268 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9bfdea743aa846d49f2acc6c0ba67e678ce8c4b0
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2011 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1261 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2327 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2444 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1865 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2404 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1699 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1492 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e8b141d311893ba8c76e8ed7cce50f6f06752573
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5625 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5821 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5432 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5622 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5226 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4622 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5458 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5597 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edaf86b247e1c20e0f7c4138f96ec19d5a571ae4
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2414 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2452 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2338 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2452 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0963 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1501 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0123 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1264 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..068f9654a4427b28cd68c4493756660bf40e63a0
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4244 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4304 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4123 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4304 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5396 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5129 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5571 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5489 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6caf6d188e37dd8d852231914da9bca9053abf92
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2871 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2717 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3178 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2717 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0182 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0143 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0260 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0143 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc5594c73257786e6edd1f8c852ad343d66e7f30
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4402 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4545 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4116 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4545 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4261 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3750 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4695 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4338 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__0shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d586cd4e31f21369f0d3c8873dba4eb0ce073b5
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2297 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2519 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1853 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2519 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0050 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0047 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0058 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0047 |   | 0 |
diff --git a/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..999a14510d1ea855adb9835bc9235c19f1a60783
--- /dev/null
+++ b/csv_files/outputs/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4050 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4121 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3909 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4121 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3133 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2323 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3012 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4063 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__en__0shot.txt b/csv_files/outputs/unsloth__phi-4__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba5d152519748cd98fa31e6e3d3083ba897a70ba
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__en__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0275 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0252 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0572 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4090 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4022 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4219 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4030 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__en__10shot.txt b/csv_files/outputs/unsloth__phi-4__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3f769aedc59b9b6be40222cca25d09daf6ffd0b3
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__en__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5984 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6098 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5711 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6141 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5364 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4912 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5626 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5554 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__gr__0shot.txt b/csv_files/outputs/unsloth__phi-4__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a17e8d575c703b7ccebc72cd8ff6aeca0397f1cc
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2011 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2901 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2208 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0925 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__gr__10shot.txt b/csv_files/outputs/unsloth__phi-4__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f48b7d4235602d80223abd071f8764d2a1a5bfc
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5682 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5717 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5611 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5717 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5291 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4935 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5261 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5678 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__it__0shot.txt b/csv_files/outputs/unsloth__phi-4__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85e165655342f1ca2d8da14464c516405b5a51c6
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__it__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1717 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1724 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3428 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3589 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3354 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3737 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3677 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__it__10shot.txt b/csv_files/outputs/unsloth__phi-4__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..be3c82925982b576686dd2db835c6bbc58fc89f7
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__it__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6759 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6647 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6732 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6897 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5705 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5608 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5820 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5688 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__pl__0shot.txt b/csv_files/outputs/unsloth__phi-4__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50d734915f57e7a4713da8e3d4cb6ae9a653a9a1
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0279 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0236 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0366 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0236 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3814 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3799 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3829 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3813 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__pl__10shot.txt b/csv_files/outputs/unsloth__phi-4__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c70462fcbbc4333d7e40ab047995e60782311c
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5474 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5549 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5324 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5549 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5718 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5423 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5760 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5972 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__sk__0shot.txt b/csv_files/outputs/unsloth__phi-4__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..609bfee5abd16055de50dbbc8a5b5e54bf628dde
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0567 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0316 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1070 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0316 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3277 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3252 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3326 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3252 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__sk__10shot.txt b/csv_files/outputs/unsloth__phi-4__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e55439f603a7ee43ebc4fb2b6489d94a69f17b05
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5524 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5561 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5449 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5561 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5214 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5106 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4994 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5541 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__sl__0shot.txt b/csv_files/outputs/unsloth__phi-4__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29578b4d5063f990ad13a10dcac7d69a04c24725
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2241 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2870 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0981 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2870 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2721 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3209 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1744 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3209 |   | 0 |
diff --git a/csv_files/outputs/unsloth__phi-4__sl__10shot.txt b/csv_files/outputs/unsloth__phi-4__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..debd951319f9e20f02aade8491ff82efa207384f
--- /dev/null
+++ b/csv_files/outputs/unsloth__phi-4__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5577 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5586 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5558 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5586 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5309 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5117 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5232 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5579 |   | 0 |
diff --git a/csv_new/llm_scores_p1_final.xlsx b/csv_new/llm_scores_p1_final.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..923b919a1bab64c07772e60306214192f33545b8
Binary files /dev/null and b/csv_new/llm_scores_p1_final.xlsx differ
diff --git a/csv_new/llm_scores_p2_final.xlsx b/csv_new/llm_scores_p2_final.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..4e6ac35e33f86ef21781bf32c1ff038811d49b85
Binary files /dev/null and b/csv_new/llm_scores_p2_final.xlsx differ
diff --git a/csv_new/llm_scores_p3_final.xlsx b/csv_new/llm_scores_p3_final.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..a1f28fbada7040d1d53b7925e954ec80b5aeeb19
Binary files /dev/null and b/csv_new/llm_scores_p3_final.xlsx differ
diff --git a/csv_new/output/.ipynb_checkpoints/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot-checkpoint.txt b/csv_new/output/.ipynb_checkpoints/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot-checkpoint.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92e6941722e5350b9a314942add661e213655787
--- /dev/null
+++ b/csv_new/output/.ipynb_checkpoints/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot-checkpoint.txt
@@ -0,0 +1,23 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2877 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1963 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3459 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3208 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4430 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4487 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4492 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4311 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__en__0shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1e0d44698fc193727d37adeae0ccf2ccebdf34f3
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0918 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0629 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1041 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1083 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2604 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1287 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3394 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3131 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__en__10shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d2b0d31313a12b76c1464713902f00fe033d096
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2142 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2189 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2243 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1994 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1681 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1189 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1668 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2185 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1779 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1825 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1612 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1900 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.1500 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2415 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1416 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0668 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0147 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0178 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0068 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0194 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__gr__0shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..256806ae1aae91613bb15e7f61973bb2c3d373e9
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0611 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0620 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0592 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0620 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0863 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1017 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0506 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1065 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__gr__10shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e1968c70df7ae59de71b96c9719693f1041cc591
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1474 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1667 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1089 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1667 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0970 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0821 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1053 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1036 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__it__0shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ddbca560d833645b835f8b9d6440e08d5992aaa9
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0416 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0435 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0429 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0384 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1413 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0672 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2266 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1300 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__it__10shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0ee3bab5e26688cb910d34e5a2d273285ee07ee5
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3753 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3299 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4023 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3938 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1331 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0977 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1226 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1789 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.1044 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0821 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1119 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1190 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0007 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0010 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0002 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0008 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__pl__0shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..922dce80469337edc75e7835aa6a600369523091
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0379 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0379 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0378 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0379 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0891 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0602 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1293 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0778 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__pl__10shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9dee6185f81350fdc85c72cb4a61be93b071cc61
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3966 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3992 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3916 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3992 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1003 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0998 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1055 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0956 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__sk__0shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8a25091bbb80e0a2681548322565c52cb0858b07
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0385 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0387 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0380 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0387 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0174 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0121 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0280 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0121 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__sk__10shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c591c7f0a88ced816e237245a16bdc6d688db83
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3507 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3444 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3632 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3444 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0884 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0734 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1045 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0875 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__sl__0shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..af66f9c26430a2440fce61f08cdf1c00204b2cf0
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0438 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0429 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0456 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0429 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1278 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0967 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1900 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0967 |   | 0 |
diff --git a/csv_new/output/Henrychur__MMed-Llama-3-8B__sl__10shot.txt b/csv_new/output/Henrychur__MMed-Llama-3-8B__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5a52d295a6f2f02b23f1a057560f2abba92d1b8
--- /dev/null
+++ b/csv_new/output/Henrychur__MMed-Llama-3-8B__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Henrychur/MMed-Llama-3-8B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3720 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3558 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4045 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3558 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0762 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0787 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0781 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0719 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__en__0shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f52b9ae12df87c2bcbdec0230947eb6d3debbf6b
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0578 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0940 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0331 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0464 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__en__10shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b0f20c0b7f3db3eb73357dfd3847f11bf4f7a17
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1317 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1215 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1415 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1322 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0031 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0028 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0016 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0049 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__gr__0shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4cc7d4a784cb754ed058341765da74fe59e4950e
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0769 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0859 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0591 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0859 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__gr__10shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4edd50dc0d05a279ed9a6be3efb12660fc646344
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1448 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1455 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1434 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1455 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0010 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0024 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0007 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__it__0shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4f0865ef187c77cfa71171ded1282690b2a85dd
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__it__0shot.txt
@@ -0,0 +1,22 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0812 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0770 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0920 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0747 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__it__10shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5335a77907e5ae1c8333fe21656b2257d68b9343
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1694 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1616 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1774 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1690 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0048 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0035 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0064 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0046 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__pl__0shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b3febb68ea3e21f9230cb485500075ba859f318f
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0308 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0244 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0436 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0244 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__pl__10shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c96e416317f7b151616c4982e8c0640322bb615
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1516 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1500 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1548 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1500 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0032 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0040 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0023 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0034 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__sk__0shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75cf3c4ce337fe7f13221bf8b230f9c267ae3639
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0712 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0880 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0375 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0880 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__sk__10shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce7ca5e76b585007a9dc187a6dd14ae6e22f17cc
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1444 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1485 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1360 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1485 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0027 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0038 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0024 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0020 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__sl__0shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8811248dde3d8e1e5d3e5bd0c4d11888b8adad09
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0711 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0777 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0579 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0777 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/HiTZ__Medical-mT5-large__sl__10shot.txt b/csv_new/output/HiTZ__Medical-mT5-large__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..97237b461fcde9621e1b414675820a8989f1add9
--- /dev/null
+++ b/csv_new/output/HiTZ__Medical-mT5-large__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=HiTZ/Medical-mT5-large ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1422 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1470 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1325 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1470 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0080 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0073 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0074 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0093 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__en__0shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..86fc15d9c200aecc6911fda3d0517a3a3184138c
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2500 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3425 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1181 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2893 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4075 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4135 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3917 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4172 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0001 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0002 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8047c5166003b6dc32ba1a21ccad0b9b41c646a9
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5993 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6091 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5646 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6243 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.6164 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6332 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6025 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6133 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.2843 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2129 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3222 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3178 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.1658 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3073 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1137 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0764 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.2370 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1244 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4429 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1437 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__gr__0shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..32e68359dde026f73f4bdc753c7293e1d097dd76
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1290 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1339 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1191 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1339 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3957 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3796 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4266 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3810 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8435fa43de5b6d649de6e305295728062df17d85
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6028 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6119 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5847 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6119 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.6056 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5962 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6024 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6183 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__it__0shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dcbfd60fb9ee78af684469935464dcf37905b09a
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2137 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2467 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1709 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2234 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4016 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4173 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3770 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4106 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0002 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0007 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d74aa8ca4967e04a5e3873eda76473d60166904
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6569 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6719 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6327 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6661 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5952 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5767 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5998 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6093 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1557 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1111 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1599 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1960 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.2496 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4407 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1328 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1753 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.2339 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0817 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5103 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1096 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__pl__0shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2bbaa4e441dac6b8c9ed99f717bd896a34a45e3d
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0586 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0697 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0364 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0697 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4022 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3803 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4464 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3800 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..465d16af61fd9338c7188c53fbf60f164ed3aac6
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6092 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6226 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5824 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6226 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5944 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5991 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5466 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6375 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sk__0shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8660df7e3f0f119e44cf5a67e7a942f913b8aa4d
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0955 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1220 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0426 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1220 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4116 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4027 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4294 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4027 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..63b5158840c219e67fbf758e2ed730ca530afe7d
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6419 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6386 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6486 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6386 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5899 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5894 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5845 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5959 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sl__0shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..52a254555d051acdd5ed2169b161e4db6559e7f6
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3398 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3910 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2375 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3910 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3777 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3775 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3783 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3775 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..11a5d3eb944b1de7399b5736ad5127c36767eac5
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-14B-Instruct-1M__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-14B-Instruct-1M ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6371 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6467 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6178 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6467 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5837 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5949 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5782 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5781 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__en__0shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e3541b24061567ac17d18c28588fd9704681fe09
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__en__0shot.txt
@@ -0,0 +1,25 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3279 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3804 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3068 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2964 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4658 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4734 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4649 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4591 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0015 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0005 |   | 0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0057 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0002 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0006 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65e185d66b8bc12e9c23da0771639caa6c6472cf
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__en__10shot.txt
@@ -0,0 +1,24 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5895 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5970 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5602 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6113 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.6440 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6482 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6469 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6370 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0931 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1501 |   | 0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1383 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0839 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0286 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0311 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0546 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0659 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0247 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1557 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0174 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__gr__0shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d091d2dd0d08ddd7d9ae2f74d581e4787f4ebf9
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4506 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5976 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1568 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5976 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4104 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4393 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4083 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3834 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa6241f9b435b69937d53ca833cc5a27fa25c2c0
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6175 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6196 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6131 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6196 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5840 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5913 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5896 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5710 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__it__0shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..31fade95d9724693277c788e245ae4ceaaf485ea
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__it__0shot.txt
@@ -0,0 +1,24 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2734 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3758 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1647 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2796 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4370 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4505 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4159 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4447 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0004 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0017 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0003 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0008 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92d682ce2a72967223ef7140ed1475ff8208673d
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__it__10shot.txt
@@ -0,0 +1,24 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.7005 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6934 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.7152 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6930 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5641 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5801 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5595 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5526 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0762 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0398 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0599 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1025 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1025 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.1086 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2322 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0109 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0828 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0353 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0186 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0602 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0272 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__pl__0shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d4f8030e0178b97b248945b2973d52689441048e
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2428 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2486 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2311 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2486 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4074 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3865 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4569 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3788 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0657f5bc039e0ef6c46d0a9ab79ea5c33277f47
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6006 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6008 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6004 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6008 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5888 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5858 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5868 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5938 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sk__0shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8c2c921f81dfd861433916d7a82eae8f0794ee40
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3375 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3578 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2968 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3578 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4031 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3971 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4152 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3971 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ccd3f8f6a3d5adfc50bb93253d2b1a2baddb48ea
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6720 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6743 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6673 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6743 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5643 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5733 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5586 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5609 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sl__0shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..12d98d519252a33b36dae0af4719974c3d12e5c2
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3183 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3344 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2863 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3344 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4048 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3979 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4186 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3979 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45927874109b49e1ce1db253c58c78ab3ea1a926
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen2.5-32B-Instruct__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen2.5-32B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6373 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6253 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6615 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6253 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5727 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5992 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5849 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5339 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__en__0shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d02325d7b876b860ed5ef24dc72e0e111501ef18
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__en__0shot.txt
@@ -0,0 +1,22 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - RE        |       |      |      |f1    |   | 0.4141 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4394 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4031 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3997 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0001 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0003 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0001 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - NER        |       |      |      |f1    |   | 0.4445 |   |0 |
+|   - p2  |       |      |      |f1    |   | 0.4162 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4729 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6db0705a854106d30d7f6a12c9cd0a3cd6148918
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5907 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5986 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5593 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6143 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5259 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5150 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5261 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5364 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.3351 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3206 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3581 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3267 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.3195 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3810 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3651 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2125 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.4256 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4154 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2924 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5690 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__0shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9679a42ef05e59d1976f28f381ab016e9bd01f2b
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4368 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4291 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4521 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4291 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3776 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3733 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3799 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3798 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e7b5e451b837ea640578ca02095bc52a621c7ee1
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5999 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6164 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5669 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6164 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5149 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5015 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5209 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5223 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__it__0shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6aae25b2fed715236df379e0bc39ca9aa0dd12f7
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3572 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0885 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5316 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4514 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3959 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3784 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4123 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3972 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0001 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0002 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85cb53237a3f4b3ab9e17ad5699c66352f22ca73
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6673 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6793 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6447 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6778 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5982 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6041 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5838 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6065 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1973 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1620 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2566 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1734 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.4794 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4512 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5464 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4407 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.3069 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2147 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5071 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1988 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__0shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6358c6bb902fb22713a21ed802c947dd78e7ea6
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4235 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4332 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4043 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4332 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4186 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4152 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4220 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4187 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9f116f8c7deee3f443c689514bb8a23fdb8d305c
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6118 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6276 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5803 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6276 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5166 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5103 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5200 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5195 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__0shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..82f30fd004efa2674df97a1dae911f0a92ff3e26
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3287 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3231 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3398 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3231 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3943 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3980 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3867 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3980 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..652672223f87eeb324263928437a787e75b87b20
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6030 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6085 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5919 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6085 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5106 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4920 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5025 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5373 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__0shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..126b784a0d6414a7ebb39eb6954f1444f9a726e9
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4501 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4486 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4531 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4486 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4118 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4115 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4126 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4115 |   | 0 |
diff --git a/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..827b5e0d0dd790eea628cc4c77b18800829dd3d5
--- /dev/null
+++ b/csv_new/output/Qwen__Qwen3-30B-A3B-Instruct-2507__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=Qwen/Qwen3-30B-A3B-Instruct-2507 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6391 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6615 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5944 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6615 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5356 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5062 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5576 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5429 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..92e6941722e5350b9a314942add661e213655787
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2877 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1963 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3459 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3208 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4430 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4487 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4492 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4311 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2a1b8b0df9eb5480ebc575744894b7dd65a6e792
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5963 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6024 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5929 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5935 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5221 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5191 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5199 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5273 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1768 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1169 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1503 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2633 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.2339 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3117 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2416 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1483 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.4828 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3922 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5191 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5371 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__0shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79aa97329e86a168483edd679e8cc64109aed7a6
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3421 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3455 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3354 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3455 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3485 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2406 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3947 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4102 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d10be9c5334b54f3adcb1cee0c3d5a9defc21084
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5884 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5928 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5796 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5928 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4415 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4467 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4210 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4569 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__0shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3b8bc3f3cbb4e83e24bd27ded97151cc85559b8b
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3220 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2678 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3568 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3414 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4452 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4519 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4611 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4227 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..45d0d9c0d3a66e282bc97331a00d9ec040029b26
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6864 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6982 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6679 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6930 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5530 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5546 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5526 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5518 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0570 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0308 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0174 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1228 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.2965 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2795 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2920 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3181 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.2830 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2630 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2967 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2894 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__0shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..16a21b3d60d0b28e03e3f22502e906d4f9d2586d
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3379 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3204 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3728 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3204 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4131 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3983 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4327 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4083 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7bbdde1853e3107c0e3fa26a80be768aedf20a06
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6189 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6214 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6140 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6214 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5023 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4863 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5129 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5076 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__0shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ffa46c1c5e22e9d8e038069c447c1f026cfc61f6
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2521 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2829 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1905 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2829 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3959 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3893 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4091 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3893 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6516944fdac3db3ed3380f5c97391fae7dbc061d
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6302 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6347 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6211 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6347 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4646 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4799 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4451 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4689 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__0shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0a0fe07a3970ef5a820c76cb9751944d12fdab2
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2604 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2810 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2192 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2810 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4116 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4116 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4115 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4116 |   | 0 |
diff --git a/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..698e2379856e9df40de4014fdbd473b61395c81b
--- /dev/null
+++ b/csv_new/output/deepseek-ai__DeepSeek-R1-Distill-Qwen-32B__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-32B ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6026 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6015 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6049 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6015 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4911 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5137 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4674 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4923 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__en__0shot.txt b/csv_new/output/epfl-llm__meditron-7b__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..813bfc3b784509efa44e172f6ca41a8394dba25f
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0612 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0578 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0410 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0848 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0313 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0442 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0497 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__en__10shot.txt b/csv_new/output/epfl-llm__meditron-7b__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fa0e5a5b8b2a56ea1ecd44e817fcc20657b038e0
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1245 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0803 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1479 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1454 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0692 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0722 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0692 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0663 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__gr__0shot.txt b/csv_new/output/epfl-llm__meditron-7b__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b733c326cb013320727e13c717645ad3b4ff775e
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2426 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2417 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2443 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2417 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0592 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1556 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0161 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0058 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__gr__10shot.txt b/csv_new/output/epfl-llm__meditron-7b__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87b319e4253b8aba65bfcf2e4ade2615fc2ae10e
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__it__0shot.txt b/csv_new/output/epfl-llm__meditron-7b__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..079f4b749f49a39a5fa21370618f59fca4c06bd5
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0639 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0773 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0612 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0531 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1072 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0020 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1929 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1268 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__it__10shot.txt b/csv_new/output/epfl-llm__meditron-7b__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8ad9ef5a9a45b0f140c187cbddc77e035f7d352
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3288 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2991 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3563 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3311 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0896 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0832 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0887 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0968 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__pl__0shot.txt b/csv_new/output/epfl-llm__meditron-7b__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7c763fab18fe2af421c37a99965e57159fb9f0dd
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1161 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1140 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1203 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1140 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0025 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0076 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__pl__10shot.txt b/csv_new/output/epfl-llm__meditron-7b__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..14675a45035d0e69895142e3b0f6800ec9197583
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3222 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3184 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3297 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3184 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0510 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0533 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0461 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0535 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__sk__0shot.txt b/csv_new/output/epfl-llm__meditron-7b__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e3d58f1c091c0ef7928d528ba2d95cfc046831
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0778 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0874 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0586 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0874 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0034 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0036 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0031 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0036 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__sk__10shot.txt b/csv_new/output/epfl-llm__meditron-7b__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bad9a6c35cda030096e0a1ffe1e020b004d5263a
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2993 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3004 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2970 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3004 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0404 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0445 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0393 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0375 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__sl__0shot.txt b/csv_new/output/epfl-llm__meditron-7b__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cfb36aa25fe8478844f4e4741701e7cd84df3e6c
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0951 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1197 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0460 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1197 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0445 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0598 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0137 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0598 |   | 0 |
diff --git a/csv_new/output/epfl-llm__meditron-7b__sl__10shot.txt b/csv_new/output/epfl-llm__meditron-7b__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eab52b0c7040bdf63e365ec759ff69b327922c10
--- /dev/null
+++ b/csv_new/output/epfl-llm__meditron-7b__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=epfl-llm/meditron-7b ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3052 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3119 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2916 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3119 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0502 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0477 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0501 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0528 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__en__0shot.txt b/csv_new/output/google__gemma-2-9b-it__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..81faf740dd7a13ccae7e56e1e2eaa3521b92615c
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4603 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3267 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5174 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5370 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4211 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4360 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4205 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4067 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0267 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0006 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0794 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0046 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0013 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0126 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__en__10shot.txt b/csv_new/output/google__gemma-2-9b-it__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc6c94f35fa38ff6bd6785d97ec6d136a460733e
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5919 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6200 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5639 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5918 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5303 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5163 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5337 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5409 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.3200 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2951 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3388 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3262 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.2120 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3118 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2737 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0506 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.1624 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1067 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2649 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1158 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__gr__0shot.txt b/csv_new/output/google__gemma-2-9b-it__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..87226c5046c2278e0f2a6e57fa83aa395a61ca52
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5292 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5549 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4777 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5549 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4008 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4124 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3957 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3943 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__gr__10shot.txt b/csv_new/output/google__gemma-2-9b-it__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..739bad8c7a5639671141f53c0413696e38d96592
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5943 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6083 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5663 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6083 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5162 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5070 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4971 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5444 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__it__0shot.txt b/csv_new/output/google__gemma-2-9b-it__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..564507d8be454a27e616ac79f0bf9d60884b836a
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6158 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5739 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6524 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6210 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4298 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4585 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4113 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4196 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0008 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0024 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__it__10shot.txt b/csv_new/output/google__gemma-2-9b-it__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fe4f0d215fe5d0d8e46c5c8ef607fecd8712723b
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6707 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6910 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6643 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6569 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5209 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4958 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5365 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5305 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1509 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1790 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1653 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1084 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.2747 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3288 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4035 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0919 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.1412 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0851 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2653 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0732 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__pl__0shot.txt b/csv_new/output/google__gemma-2-9b-it__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f187b39ad69bd8bb0dbf697be691b129eadee340
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4092 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4060 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4155 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4060 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3891 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3674 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4271 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3729 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__pl__10shot.txt b/csv_new/output/google__gemma-2-9b-it__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5ac1adba3779f7f68d4bfbf2cf88b163e3b84f4b
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5893 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5908 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5862 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5908 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5033 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5168 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4808 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5124 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__sk__0shot.txt b/csv_new/output/google__gemma-2-9b-it__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8f0bc370ed50b39ebc88b5e7e85f8a110f45a283
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4775 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4875 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4575 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4875 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4106 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3989 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4340 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3989 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__sk__10shot.txt b/csv_new/output/google__gemma-2-9b-it__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b909c3988f118020a3d985678e092a54be2f61f1
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6135 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6141 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6122 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6141 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5007 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5153 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4754 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5114 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__sl__0shot.txt b/csv_new/output/google__gemma-2-9b-it__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..98ce20cc51f351ff18c9e416c89a29a920d6bacd
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4487 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4707 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4046 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4707 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4058 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4079 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4016 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4079 |   | 0 |
diff --git a/csv_new/output/google__gemma-2-9b-it__sl__10shot.txt b/csv_new/output/google__gemma-2-9b-it__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e956839ba21e57d9f60059817766b32fe88d80a2
--- /dev/null
+++ b/csv_new/output/google__gemma-2-9b-it__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-2-9b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6156 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6365 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5737 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6365 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4883 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4801 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4878 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4972 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__en__0shot.txt b/csv_new/output/google__gemma-3-27b-it__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1deb30f5e851aa6b7925c9feb20fe9fff8675c1b
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5490 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5446 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5830 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5194 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4623 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4543 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4582 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4743 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0924 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1559 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1213 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0044 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0131 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__en__10shot.txt b/csv_new/output/google__gemma-3-27b-it__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2289ddf299b48c07abb85a786b9488d1d720da4
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6187 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6160 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6308 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6094 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5518 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5191 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5600 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5764 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.3305 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3271 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3301 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3342 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.2902 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4022 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3858 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0828 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.3034 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2449 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4821 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1832 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__gr__0shot.txt b/csv_new/output/google__gemma-3-27b-it__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..93e9d713d614c2dbaa4d17389ea32a9b3021a3cf
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5151 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4866 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5721 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4866 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4473 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3955 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4695 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4769 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__gr__10shot.txt b/csv_new/output/google__gemma-3-27b-it__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f1b60273ff3047ec635fd913fc4fe0db8a2ca133
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6570 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6551 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6608 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6551 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5405 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5083 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5550 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5581 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__it__0shot.txt b/csv_new/output/google__gemma-3-27b-it__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dae4b670c0c60c3fac20a66d4d9d9bfd6e268f8d
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6065 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5543 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6697 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5954 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4737 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4390 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4895 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4927 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0615 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1234 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0611 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0002 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0007 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__it__10shot.txt b/csv_new/output/google__gemma-3-27b-it__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56ef2094b14f7961137092ee039dacb1291554e3
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.7115 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.7142 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6992 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.7212 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5615 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5223 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5837 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5786 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.2109 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1965 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2487 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1874 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.3773 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5732 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3443 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2144 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.1043 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1347 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1783 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__pl__0shot.txt b/csv_new/output/google__gemma-3-27b-it__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e9f1519cf73039b96aee8faf352b110818910761
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4508 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4506 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4511 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4506 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4307 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4384 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4267 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4271 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__pl__10shot.txt b/csv_new/output/google__gemma-3-27b-it__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9eed39172f7a13688201364ca71ab665a6378bda
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6618 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6591 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6672 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6591 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5592 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5795 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5601 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5380 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__sk__0shot.txt b/csv_new/output/google__gemma-3-27b-it__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..27cb80b8b7a5de7165aa52e89c5f70d0ac61dc23
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2841 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3183 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2157 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3183 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4369 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4373 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4360 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4373 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__sk__10shot.txt b/csv_new/output/google__gemma-3-27b-it__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2f8297965f5ccbab4e4581425fdb9d9628f5cc8c
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6786 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6737 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6885 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6737 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5095 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5121 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5061 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5103 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__sl__0shot.txt b/csv_new/output/google__gemma-3-27b-it__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..39bd48a7d0643c98d4e640b58b7343ee908f2d64
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4508 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4370 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4783 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4370 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4301 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4255 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4391 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4255 |   | 0 |
diff --git a/csv_new/output/google__gemma-3-27b-it__sl__10shot.txt b/csv_new/output/google__gemma-3-27b-it__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ed1c6926d8cdf2d22c23adf3393d14f1da9cd4d9
--- /dev/null
+++ b/csv_new/output/google__gemma-3-27b-it__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/gemma-3-27b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6806 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6750 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6918 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6750 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4999 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5149 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4703 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5145 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__en__0shot.txt b/csv_new/output/google__medgemma-27b-text-it__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4f69c367cddedff7d23f6012c6e2f1ccd549c5d
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5011 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3842 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6035 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5156 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4681 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4836 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4763 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4443 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0317 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0623 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0327 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0003 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0009 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__en__10shot.txt b/csv_new/output/google__medgemma-27b-text-it__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6ad6f4457a604f30d69ebc96d8925759bf4ed3e3
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6324 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6355 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6161 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6455 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5540 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5562 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5494 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5565 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.3550 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3711 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3582 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3355 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.3327 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5480 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4010 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0491 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.4941 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4899 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3801 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6124 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__gr__0shot.txt b/csv_new/output/google__medgemma-27b-text-it__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d065c4984ef73926a5399a10a41f28371f93fc00
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5585 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5314 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6126 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5314 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4199 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4069 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4332 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4197 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__gr__10shot.txt b/csv_new/output/google__medgemma-27b-text-it__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..70f88e2528d3a1ff7cf33f95ceb671fc00a7aa14
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6839 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6836 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6846 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6836 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5680 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5392 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5867 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5780 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__it__0shot.txt b/csv_new/output/google__medgemma-27b-text-it__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..919fa792ca00b73012ddfda89685fb88e1768710
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5351 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4261 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6212 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5582 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4521 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4042 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4916 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4604 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0180 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0472 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0064 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0003 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__it__10shot.txt b/csv_new/output/google__medgemma-27b-text-it__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1799b14c9a4fb9bc8f3139242d6a9d639533436e
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__it__10shot.txt
@@ -0,0 +1,22 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.7133 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.7262 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.7005 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5960 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5919 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6235 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5726 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.2282 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2314 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2992 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1541 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.4075 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5898 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5797 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0528 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.3517 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5265 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5285 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__pl__0shot.txt b/csv_new/output/google__medgemma-27b-text-it__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f9eba07f4234c3cc6cc8a0f05a5cd8f0cd620ac8
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4245 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4216 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4303 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4216 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4332 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4325 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4424 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4246 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__pl__10shot.txt b/csv_new/output/google__medgemma-27b-text-it__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0276a07fab4408898651f3f147a274d5d5df3c97
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6791 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6829 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6715 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6829 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5997 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5940 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6133 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5918 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__sk__0shot.txt b/csv_new/output/google__medgemma-27b-text-it__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bd8b98a582ba732f34685230d5c2af7c07ed3a59
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2336 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2971 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1066 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2971 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4440 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4395 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4531 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4395 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__sk__10shot.txt b/csv_new/output/google__medgemma-27b-text-it__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..003be3f4ed88a6a499b89ed958c415cc485b70c3
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.7137 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.7143 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.7127 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.7143 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5156 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5111 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5188 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5171 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__sl__0shot.txt b/csv_new/output/google__medgemma-27b-text-it__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c70cd19fab747d42922e7a5ecdf7736a81004f9
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4863 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4675 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5238 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4675 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4201 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4182 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4239 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4182 |   | 0 |
diff --git a/csv_new/output/google__medgemma-27b-text-it__sl__10shot.txt b/csv_new/output/google__medgemma-27b-text-it__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c8ad321754c222e65360614da1f6192f3387c7c
--- /dev/null
+++ b/csv_new/output/google__medgemma-27b-text-it__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-27b-text-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6887 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6947 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6765 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6947 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5469 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5323 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5590 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5494 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__en__0shot.txt b/csv_new/output/google__medgemma-4b-it__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..91fd7d86851627db8904f7d9d037cbe8bbeaf315
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2625 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2635 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2503 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2737 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2851 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2095 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3257 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3203 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0039 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0061 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0056 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__en__10shot.txt b/csv_new/output/google__medgemma-4b-it__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e7bfd5b6ef4f3622dc727197872512f2cb6fa5f
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4930 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4833 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5005 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4951 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1198 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0964 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1237 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1391 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.2646 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2659 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2671 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2607 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.2489 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3662 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3800 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0006 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.4228 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4505 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3799 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4378 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__gr__0shot.txt b/csv_new/output/google__medgemma-4b-it__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d0f048d8fb01e8ed8352829fa0179010381f66ca
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2688 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2705 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2654 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2705 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2053 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2381 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3024 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0754 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__gr__10shot.txt b/csv_new/output/google__medgemma-4b-it__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4352edec2156fd74172ffb93bfc7069ed935cce2
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4953 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4910 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5039 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4910 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1453 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1204 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1605 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1551 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__it__0shot.txt b/csv_new/output/google__medgemma-4b-it__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..703bbb937dd58a2fa5aaaf6925656b5c79b03b31
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2929 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3157 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2627 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3004 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1767 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2154 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2461 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0688 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__it__10shot.txt b/csv_new/output/google__medgemma-4b-it__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a8a3b8ab16be63315575f29ea4ab843ed471ded9
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5454 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5633 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5377 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5352 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1753 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1592 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1917 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1751 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1096 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1072 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1355 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0861 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.3524 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5229 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5289 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0054 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.2891 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4314 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0052 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4306 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__pl__0shot.txt b/csv_new/output/google__medgemma-4b-it__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1da79eb1fdbb0c791458ff4a9cce9c33a7da6497
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2231 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2255 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2183 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2255 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1173 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1150 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1314 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1054 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__pl__10shot.txt b/csv_new/output/google__medgemma-4b-it__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..79c82263014a0069b6c825385d95cf6477004a4a
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5193 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5186 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5206 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5186 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1055 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1171 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0997 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0997 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__sk__0shot.txt b/csv_new/output/google__medgemma-4b-it__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1bea1720e28d395cc9d1f6ee52968a6965a98c84
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2427 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2447 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2387 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2447 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1212 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1119 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1399 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1119 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__sk__10shot.txt b/csv_new/output/google__medgemma-4b-it__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19c6346c5007538093d4b83f945e14ee4616490c
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4654 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4756 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4449 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4756 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1035 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1095 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1009 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1000 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__sl__0shot.txt b/csv_new/output/google__medgemma-4b-it__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..56dbab87e4f7fed3a562f04e90d5511d788bdc34
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2569 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2574 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2558 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2574 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1012 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0973 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1089 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0973 |   | 0 |
diff --git a/csv_new/output/google__medgemma-4b-it__sl__10shot.txt b/csv_new/output/google__medgemma-4b-it__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cc424f90dde2288be7dda70e93c0e761287409da
--- /dev/null
+++ b/csv_new/output/google__medgemma-4b-it__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=google/medgemma-4b-it ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5063 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5117 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4955 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5117 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1260 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1178 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1101 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1501 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__en__0shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..36836266cb450f999e59cc1c8ded4286dd2f967a
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2786 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2502 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3089 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2768 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3248 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2274 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3929 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3542 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0001 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0003 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0001 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0003 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__en__10shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..bdbfc7235dd70690e8fedb9cfe08ee7d8ee39e35
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5008 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5009 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4966 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5049 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1125 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1175 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1095 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1107 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.3189 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3052 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3307 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3208 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.2879 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1833 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2803 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4002 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.3722 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3528 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2818 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4820 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__gr__0shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a0c5c0a5a00e2cdf36cc9f66ad99c89cd41760ac
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1717 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1641 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1869 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1641 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0977 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0736 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0778 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1418 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__gr__10shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..eba564920c2c92d7800080552c6a59b8def8c9b7
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3384 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3375 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3403 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3375 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0606 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0427 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0681 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0711 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__it__0shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a96f78aa4cc8265c0f08244dc2c89d552f40dd2b
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3307 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3397 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3300 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3226 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0792 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1489 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0736 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0149 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0021 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0064 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__it__10shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2d3f133b2005537efcbebceed1f048beff58a25
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5257 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5195 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5301 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5275 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1499 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2114 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0961 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1422 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1299 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1422 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1646 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0829 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.4128 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3221 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3798 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5365 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.2053 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1169 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3103 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1885 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__pl__0shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..398a394b401f757945eec334c5bd3723685aa80b
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2831 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2815 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2861 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2815 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2693 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2109 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2908 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3061 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__pl__10shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4fc3f8a0bc1e4a0d38f2f3729779f31828e7b70b
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3986 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3913 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4132 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3913 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1366 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1255 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1207 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1636 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__sk__0shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9aaca4c790d98eb11b6ab172e1b2f9ba297b0a56
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2710 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2571 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2987 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2571 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1062 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1554 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0077 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1554 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__sk__10shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b031c9c0bd0f5fe6669a53563e3681aa1a74d890
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4025 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4106 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3861 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4106 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0613 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0509 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0606 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0724 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__sl__0shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cd2d143234ccb78abac42a387aa128d3314e802f
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2892 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2998 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2680 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2998 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0304 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0395 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0121 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0395 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Clinical__sl__10shot.txt b/csv_new/output/microsoft__MediPhi-Clinical__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..891a18854ac34e549e2ead223a4f5c50fa589fb3
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Clinical__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Clinical ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4021 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4036 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3990 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4036 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0748 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0829 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0674 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0742 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__en__0shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3006762bb6d519f644dfef9a8d9d38d675ad4a64
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1598 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0761 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2410 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1625 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2982 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1135 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4006 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3804 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0015 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0045 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0004 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0012 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__en__10shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b6057e20590af0e6a0c136089c6200b588e3e02
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5216 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5357 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5227 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5063 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1719 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1432 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1888 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1836 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.2856 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2742 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3438 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2387 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.3436 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2162 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3002 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5144 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.4173 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4543 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3176 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4801 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__gr__0shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..033006f71175ad0e7ba4e4f9b2b91bd4b604c058
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1159 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1294 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0890 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1294 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1184 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0962 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0673 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1916 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__gr__10shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..116bb08deaa20d0ae0a961c362d4802b12d2add2
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2881 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2822 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2999 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2822 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0675 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0576 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0674 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0777 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__it__0shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..20300e67c6f1830ad29329449c805bf4c30291ed
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2023 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0867 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2484 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2717 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2623 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1712 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2896 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3261 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0013 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0038 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__it__10shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1ee5e41acdcd926d55b6f5cea9a2ac30593bf8ef
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5715 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5729 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5627 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5790 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2679 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2873 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2307 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2858 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1176 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1545 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1508 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0475 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.4763 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5617 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3270 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5404 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.3735 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3291 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4029 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3884 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__pl__0shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6bc50efa744a67b9f472d4fdd237432f562068
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1567 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1510 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1680 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1510 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2881 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2683 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3126 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2832 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__pl__10shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..72e10fb742c76990d605ee3a2c3c4ef35b670091
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4447 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4417 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4506 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4417 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2291 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1525 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2686 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2662 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__sk__0shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a3c42b94084e72cbff224a863d3f392fb1b26463
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1788 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1641 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2081 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1641 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1221 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1776 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0112 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1776 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__sk__10shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2d69cd7d7928f4a6c643567b723646b0ea9b62cc
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4226 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4327 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4023 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4327 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1313 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1070 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1395 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1473 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__sl__0shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7d1efab70337f7f4131da606243432c236e0a589
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1792 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1758 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1860 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1758 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1325 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1446 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1084 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1446 |   | 0 |
diff --git a/csv_new/output/microsoft__MediPhi-Instruct__sl__10shot.txt b/csv_new/output/microsoft__MediPhi-Instruct__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..679149a7bff8efd0e260ce4a2032de17a86487c6
--- /dev/null
+++ b/csv_new/output/microsoft__MediPhi-Instruct__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=microsoft/MediPhi-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3837 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3973 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3564 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3973 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1550 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1155 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1468 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2027 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__en__0shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..355a40ec5717374a0494d89524ed0311cbeb1e87
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2278 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2529 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2144 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2162 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3007 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3688 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3642 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1693 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0001 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0002 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..aebef1ac7d25fdce9740914ef074f918c87812a7
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4753 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4725 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4730 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4805 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3592 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2593 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4034 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4148 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1222 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0099 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1388 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2178 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0640 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0137 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0240 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1543 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.1376 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0143 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3929 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0058 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__gr__0shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0017f0d9afa0d196c05a399d60bf6b69bf801441
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1705 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1603 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1909 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1603 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0592 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0432 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0348 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0994 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7051f2deb4230647fb3c9bbe0580a2fe84de6d8
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3548 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3498 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3648 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3498 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1862 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1055 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2343 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2189 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__it__0shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..286e5a1cdf69131a9fbaed275d245cfdd5eddaa7
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2433 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2788 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2030 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2481 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0561 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1382 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0163 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0140 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0001 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0002 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fb98c1bf90005a71030c1b7421225ad849521246
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5176 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5147 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5232 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5149 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3958 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3092 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4530 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4252 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0917 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0154 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1434 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1162 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.2360 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0163 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5695 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1222 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.1182 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0749 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2141 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0655 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__pl__0shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77b2c7212ed034a17baff6431293cdb59c42592c
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2953 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3024 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2811 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3024 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1006 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0863 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1292 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0863 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0a825a34d1e4073d2ae5da7e22e86582b980912c
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4956 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4911 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5046 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4911 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3296 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3895 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3311 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2683 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sk__0shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6eeaac48bff7f356aa2168ad7755b879c69be13a
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2144 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2143 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2146 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2143 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0782 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0756 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0835 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0756 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3952afe3c0317b08c9e06f3caff5ae01eb9aa4e2
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3951 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4029 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3794 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4029 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2132 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2155 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1948 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2293 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sl__0shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e0158432acf19d26d277f1b57deb076edd05514a
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1826 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1766 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1947 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1766 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1076 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0766 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1695 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0766 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c0a21e1f1e602067347fb6c7ae3af7c47c220eb9
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-7B-Instruct-v0.2__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-7B-Instruct-v0.2 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4194 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4204 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4174 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4204 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2018 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1990 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1950 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2115 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__en__0shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c1ebf3a368f471a2301474f0203334d11a2941ea
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2271 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2767 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2299 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1748 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3472 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3694 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3482 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3241 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0129 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0385 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0003 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0008 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0024 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f8bc04b0bceabfdca64b67b86f539f97c755754e
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5762 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5777 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5841 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5668 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4313 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3482 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5008 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4449 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.2524 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2499 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2718 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2356 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.4329 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3034 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4176 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5778 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.2251 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0905 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4043 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1804 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__gr__0shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a6f9827c9bfca0e3f2fe140aa5bb7f63e64551b9
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0717 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0732 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0687 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0732 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2326 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1575 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2117 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3287 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5c23dd02177855715602246da1ab145d4750a511
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5050 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5081 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4988 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5081 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2549 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2029 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2296 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3323 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__it__0shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0677e3e7254c92e03660f5c8741abe105b516256
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1960 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2792 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1772 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1316 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2365 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2849 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2384 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1861 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0010 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0029 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7e10ec1ce7e0e98bf9f8ccba81394cd7caa10c82
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6441 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6430 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6437 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6457 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3556 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2708 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4099 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3860 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1269 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1219 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1299 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1287 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.4612 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4449 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5659 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3728 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.1763 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0690 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3846 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0753 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__pl__0shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9a7d8e28e086f27eb9637e6ad992ba36f0de390a
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0468 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0483 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0439 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0483 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1823 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2123 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1686 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1661 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..824dea14ef26e4fd07539f5cdc57cb0d72d7a869
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5375 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5352 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5421 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5352 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1906 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1863 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1855 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2001 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sk__0shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6c38472f1e9cb2c7693144b17179e9dcfe88f159
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0738 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0685 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0844 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0685 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1596 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1696 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1396 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1696 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6e3e1b2ef67ae845db75704f414dd97a01bc4d8a
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5030 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5025 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5040 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5025 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1832 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1237 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2166 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2094 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sl__0shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..832ca83ee26ae570b1c4e4d781100383be94e147
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0842 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0861 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0805 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0861 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1905 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2309 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1096 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2309 |   | 0 |
diff --git a/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c2b96898a18579d2b16376fa5e4d1159ed4fc544
--- /dev/null
+++ b/csv_new/output/mistralai__Mistral-Nemo-Instruct-2407__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=mistralai/Mistral-Nemo-Instruct-2407 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5327 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5323 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5335 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5323 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1725 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1390 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2057 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1727 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__en__0shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c4860d37479d7974231c73818241e4a7e152fb8d
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2658 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2270 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2709 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2996 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3280 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2157 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3835 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3848 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0018 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0055 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__en__10shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f33e7a0b6986bcfa6bd99ca0830d837019911409
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__en__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5730 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5840 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5421 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5928 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5145 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4335 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5586 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5515 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1652 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2792 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1816 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0350 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.1081 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0708 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1658 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0877 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.1121 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0211 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2678 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0474 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__gr__0shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7516c1517104e5be8c21dca55222faa85473fdc
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1585 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2130 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0495 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2130 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0506 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0401 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0250 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0867 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ba5c002a9264a1d56a51c72b4dc642ee87b8c605
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.3448 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3345 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3655 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3345 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3591 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3749 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3755 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3268 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__it__0shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a95413e5d32046ccb504362812df25c2eccc14db
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__it__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2011 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1261 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2327 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2444 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.1865 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2404 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1699 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1492 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__it__10shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca2948f33281baa195f8a52124b6be256e2a66a1
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__it__10shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5625 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5821 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5432 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5622 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5226 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4622 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5458 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5597 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0406 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0721 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0340 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0157 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0543 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0186 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0668 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0774 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0074 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0105 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0044 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0073 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__pl__0shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..edaf86b247e1c20e0f7c4138f96ec19d5a571ae4
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2414 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2452 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2338 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2452 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0963 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1501 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0123 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.1264 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..068f9654a4427b28cd68c4493756660bf40e63a0
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4244 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4304 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4123 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4304 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5396 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5129 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5571 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5489 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__sk__0shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6caf6d188e37dd8d852231914da9bca9053abf92
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2871 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2717 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3178 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2717 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0182 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0143 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0260 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0143 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc5594c73257786e6edd1f8c852ad343d66e7f30
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4402 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4545 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4116 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4545 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4261 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3750 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4695 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4338 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__sl__0shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8d586cd4e31f21369f0d3c8873dba4eb0ce073b5
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2297 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2519 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1853 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2519 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.0050 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0047 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0058 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0047 |   | 0 |
diff --git a/csv_new/output/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt b/csv_new/output/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..999a14510d1ea855adb9835bc9235c19f1a60783
--- /dev/null
+++ b/csv_new/output/tiiuae__Falcon3-10B-Instruct__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=tiiuae/Falcon3-10B-Instruct ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.4050 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4121 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3909 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4121 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3133 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2323 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3012 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4063 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__en__0shot.txt b/csv_new/output/unsloth__phi-4__en__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b34e9e9d2a5ce204521f78108da9842620e59111
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__en__0shot.txt
@@ -0,0 +1,23 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0275 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0252 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0572 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.4090 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4022 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4219 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4030 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__en__10shot.txt b/csv_new/output/unsloth__phi-4__en__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ab7e4739278848d7b322fb7d27c1531b7ac36392
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__en__10shot.txt
@@ -0,0 +1,24 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5984 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6098 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5711 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6141 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5364 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4912 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5626 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5554 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.2878 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3841 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3289 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2191 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2191 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.4715 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4262 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5630 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.4254 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.4138 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3736 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5020 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3658 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__gr__0shot.txt b/csv_new/output/unsloth__phi-4__gr__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a17e8d575c703b7ccebc72cd8ff6aeca0397f1cc
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__gr__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2011 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2901 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.2208 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0925 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__gr__10shot.txt b/csv_new/output/unsloth__phi-4__gr__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1f48b7d4235602d80223abd071f8764d2a1a5bfc
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__gr__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5682 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5717 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5611 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5717 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5291 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.4935 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5261 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5678 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__it__0shot.txt b/csv_new/output/unsloth__phi-4__it__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b20b4e4c6aae9c122b54207688196bec201d8b6c
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__it__0shot.txt
@@ -0,0 +1,24 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.1717 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1724 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3428 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3589 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3354 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3737 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3677 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.0000 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0000 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0000 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__it__10shot.txt b/csv_new/output/unsloth__phi-4__it__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..85053f2ab2acf2c1693a65d4e6852d38d1c1b6a3
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__it__10shot.txt
@@ -0,0 +1,24 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.6759 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.6647 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.6732 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.6897 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5705 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5608 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5820 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5688 |   | 0 |
+| - RML        |       |      |      |f1    |   | 0.1263 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.1759 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1675 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0810 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0810 |   | 0 |
+| - DIA        |       |      |      |f1    |   | 0.5691 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5835 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5676 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5564 |   | 0 |
+| - HIS        |       |      |      |f1    |   | 0.4656 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5102 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5006 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3859 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__pl__0shot.txt b/csv_new/output/unsloth__phi-4__pl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..50d734915f57e7a4713da8e3d4cb6ae9a653a9a1
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__pl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0279 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0236 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0366 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0236 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3814 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3799 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3829 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3813 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__pl__10shot.txt b/csv_new/output/unsloth__phi-4__pl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..13c70462fcbbc4333d7e40ab047995e60782311c
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__pl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5474 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5549 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5324 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5549 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5718 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5423 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5760 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5972 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__sk__0shot.txt b/csv_new/output/unsloth__phi-4__sk__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..609bfee5abd16055de50dbbc8a5b5e54bf628dde
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__sk__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.0567 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.0316 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1070 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.0316 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.3277 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3252 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.3326 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3252 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__sk__10shot.txt b/csv_new/output/unsloth__phi-4__sk__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e55439f603a7ee43ebc4fb2b6489d94a69f17b05
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__sk__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5524 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5561 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5449 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5561 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5214 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5106 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.4994 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5541 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__sl__0shot.txt b/csv_new/output/unsloth__phi-4__sl__0shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..29578b4d5063f990ad13a10dcac7d69a04c24725
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__sl__0shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 0, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.2241 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.2870 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.0981 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.2870 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.2721 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.3209 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.1744 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.3209 |   | 0 |
diff --git a/csv_new/output/unsloth__phi-4__sl__10shot.txt b/csv_new/output/unsloth__phi-4__sl__10shot.txt
new file mode 100644
index 0000000000000000000000000000000000000000..debd951319f9e20f02aade8491ff82efa207384f
--- /dev/null
+++ b/csv_new/output/unsloth__phi-4__sl__10shot.txt
@@ -0,0 +1,11 @@
+hf (pretrained=unsloth/phi-4 ), num_fewshot: 10, batch_size: 1
+|Tasks  |Version|Filter|n-shot|Metric|    |Value |   |Stderr|
+|-------|-------|------|------|------|----|------|---|------|
+| - NER        |       |      |      |f1    |   | 0.5577 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5586 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5558 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5586 |   | 0 |
+| - RE        |       |      |      |f1    |   | 0.5309 |   |0 |
+|   - p1  |       |      |      |f1    |   | 0.5117 |   | 0 |
+|   - p2  |       |      |      |f1    |   | 0.5232 |   | 0 |
+|   - p3  |       |      |      |f1    |   | 0.5579 |   | 0 |
diff --git a/e3c_llm_requests/.gitattributes b/e3c_llm_requests/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..1ef325f1b111266a6b26e0196871bd78baa8c2f3
--- /dev/null
+++ b/e3c_llm_requests/.gitattributes
@@ -0,0 +1,59 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.lz4 filter=lfs diff=lfs merge=lfs -text
+*.mds filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+# Audio files - uncompressed
+*.pcm filter=lfs diff=lfs merge=lfs -text
+*.sam filter=lfs diff=lfs merge=lfs -text
+*.raw filter=lfs diff=lfs merge=lfs -text
+# Audio files - compressed
+*.aac filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+# Image files - uncompressed
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+# Image files - compressed
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text
+# Video files - compressed
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text
diff --git a/e3c_llm_requests/Henrychur/MMed-Llama-3-8B.json b/e3c_llm_requests/Henrychur/MMed-Llama-3-8B.json
new file mode 100644
index 0000000000000000000000000000000000000000..86b0019fb62092325cdc79c7eb8218aed3bad09f
--- /dev/null
+++ b/e3c_llm_requests/Henrychur/MMed-Llama-3-8B.json
@@ -0,0 +1,8 @@
+{
+    "model": "Henrychur/MMed-Llama-3-8B",
+    "base_model": "LlamaForCausalLM",
+    "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+    "submitted_time": "2024-05-22 09:17:24+00:00",
+    "num_params_billion": null,
+    "language": "en_zh_ja_fr_ru_es"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/HiTZ/Medical-mT5-large.json b/e3c_llm_requests/HiTZ/Medical-mT5-large.json
new file mode 100644
index 0000000000000000000000000000000000000000..42da6bb5dbb7d478648d28988f549c4c3e885a7c
--- /dev/null
+++ b/e3c_llm_requests/HiTZ/Medical-mT5-large.json
@@ -0,0 +1,8 @@
+{
+    "model": "HiTZ/Medical-mT5-large",
+    "base_model": "MT5ForConditionalGeneration",
+    "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+    "submitted_time": "2023-10-31 15:15:15+00:00",
+    "num_params_billion": null,
+    "language": "en_es_fr_it"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/Qwen/Qwen2.5-14B-Instruct-1M.json b/e3c_llm_requests/Qwen/Qwen2.5-14B-Instruct-1M.json
new file mode 100644
index 0000000000000000000000000000000000000000..e4460926779e971e3317af33665cf9278980c10d
--- /dev/null
+++ b/e3c_llm_requests/Qwen/Qwen2.5-14B-Instruct-1M.json
@@ -0,0 +1,8 @@
+{
+    "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+    "base_model": "Qwen2ForCausalLM",
+    "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+    "submitted_time": "2025-01-23 13:23:24+00:00",
+    "num_params_billion": 14.770033664,
+    "language": "en"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/Qwen/Qwen2.5-32B-Instruct.json b/e3c_llm_requests/Qwen/Qwen2.5-32B-Instruct.json
new file mode 100644
index 0000000000000000000000000000000000000000..3bfb3ff4c28797c9aad9070719f798119c2784e3
--- /dev/null
+++ b/e3c_llm_requests/Qwen/Qwen2.5-32B-Instruct.json
@@ -0,0 +1,8 @@
+{
+    "model": "Qwen/Qwen2.5-32B-Instruct",
+    "base_model": "Qwen2ForCausalLM",
+    "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+    "submitted_time": "2024-09-17 04:17:55+00:00",
+    "num_params_billion": 32.763876352,
+    "language": "en"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/Qwen/Qwen3-30B-A3B-Instruct-2507.json b/e3c_llm_requests/Qwen/Qwen3-30B-A3B-Instruct-2507.json
new file mode 100644
index 0000000000000000000000000000000000000000..8c19206d89e4c8cc448322bcf562cdeea4f686ba
--- /dev/null
+++ b/e3c_llm_requests/Qwen/Qwen3-30B-A3B-Instruct-2507.json
@@ -0,0 +1,8 @@
+{
+    "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+    "base_model": "Qwen3MoeForCausalLM",
+    "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+    "submitted_time": "2025-07-28 07:31:27+00:00",
+    "num_params_billion": 30.532122624,
+    "language": ""
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B-checkpoint.json b/e3c_llm_requests/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B-checkpoint.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a574a6dcb81e49041fa0f592ee86930e1b43847
--- /dev/null
+++ b/e3c_llm_requests/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B-checkpoint.json
@@ -0,0 +1,8 @@
+{
+    "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+    "base_model": "Qwen2ForCausalLM",
+    "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+    "submitted_time": "2025-01-20 09:19:00+00:00",
+    "num_params_billion": 32.763876352,
+    "language": ""
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B.json b/e3c_llm_requests/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a574a6dcb81e49041fa0f592ee86930e1b43847
--- /dev/null
+++ b/e3c_llm_requests/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B.json
@@ -0,0 +1,8 @@
+{
+    "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+    "base_model": "Qwen2ForCausalLM",
+    "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+    "submitted_time": "2025-01-20 09:19:00+00:00",
+    "num_params_billion": 32.763876352,
+    "language": ""
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/epfl-llm/meditron-7b.json b/e3c_llm_requests/epfl-llm/meditron-7b.json
new file mode 100644
index 0000000000000000000000000000000000000000..773ee8fbfdfa8623fdc11b71a79ef5122ea682b7
--- /dev/null
+++ b/e3c_llm_requests/epfl-llm/meditron-7b.json
@@ -0,0 +1,8 @@
+{
+    "model": "epfl-llm/meditron-7b",
+    "base_model": "LlamaForCausalLM",
+    "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+    "submitted_time": "2023-11-08 16:03:23+00:00",
+    "num_params_billion": 6.73855488,
+    "language": "en"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/google/gemma-2-9b-it.json b/e3c_llm_requests/google/gemma-2-9b-it.json
new file mode 100644
index 0000000000000000000000000000000000000000..bc6b860d3bd567948054e01f7630043dd7220af4
--- /dev/null
+++ b/e3c_llm_requests/google/gemma-2-9b-it.json
@@ -0,0 +1,8 @@
+{
+    "model": "google/gemma-2-9b-it",
+    "base_model": "Gemma2ForCausalLM",
+    "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+    "submitted_time": "2024-06-24 08:05:41+00:00",
+    "num_params_billion": 9.241705984,
+    "language": ""
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/google/gemma-3-27b-it.json b/e3c_llm_requests/google/gemma-3-27b-it.json
new file mode 100644
index 0000000000000000000000000000000000000000..0cbbf8fd214db1aa63941c6685829be3c1ef47a6
--- /dev/null
+++ b/e3c_llm_requests/google/gemma-3-27b-it.json
@@ -0,0 +1,8 @@
+{
+    "model": "google/gemma-3-27b-it",
+    "base_model": "Gemma3ForConditionalGeneration",
+    "revision": "005ad3404e59d6023443cb575daa05336842228a",
+    "submitted_time": "2025-03-01 19:10:19+00:00",
+    "num_params_billion": 27.43240664,
+    "language": ""
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/google/medgemma-27b-text-it.json b/e3c_llm_requests/google/medgemma-27b-text-it.json
new file mode 100644
index 0000000000000000000000000000000000000000..0eadf57aa2ee6563dba193b1a1fd16ebc0362a65
--- /dev/null
+++ b/e3c_llm_requests/google/medgemma-27b-text-it.json
@@ -0,0 +1,8 @@
+{
+    "model": "google/medgemma-27b-text-it",
+    "base_model": "Gemma3ForCausalLM",
+    "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+    "submitted_time": "2025-05-19 20:53:04+00:00",
+    "num_params_billion": 27.00900224,
+    "language": ""
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/google/medgemma-4b-it.json b/e3c_llm_requests/google/medgemma-4b-it.json
new file mode 100644
index 0000000000000000000000000000000000000000..7c6a467cac7dee24972df120b27cdd729a1e75fd
--- /dev/null
+++ b/e3c_llm_requests/google/medgemma-4b-it.json
@@ -0,0 +1,8 @@
+{
+    "model": "google/medgemma-4b-it",
+    "base_model": "Gemma3ForConditionalGeneration",
+    "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+    "submitted_time": "2025-05-19 20:52:44+00:00",
+    "num_params_billion": 4.300079472,
+    "language": ""
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/meta-llama/.ipynb_checkpoints/Llama-3.2-1B-Instruct-checkpoint.json b/e3c_llm_requests/meta-llama/.ipynb_checkpoints/Llama-3.2-1B-Instruct-checkpoint.json
new file mode 100644
index 0000000000000000000000000000000000000000..dd0dab56bad582a995b770ac81c21b4ad4954553
--- /dev/null
+++ b/e3c_llm_requests/meta-llama/.ipynb_checkpoints/Llama-3.2-1B-Instruct-checkpoint.json
@@ -0,0 +1,8 @@
+{
+    "model": "meta-llama/Llama-3.2-1B-Instruct",
+    "base_model": "LlamaForCausalLM",
+    "revision": "9213176726f574b556790deb65791e0c5aa438b6",
+    "submitted_time": "2024-09-18 15:12:47+00:00",
+    "num_params_billion": 1.2358144,
+    "language": "en_de_fr_it_pt_hi_es_th"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/meta-llama/Llama-3.2-1B-Instruct.json b/e3c_llm_requests/meta-llama/Llama-3.2-1B-Instruct.json
new file mode 100644
index 0000000000000000000000000000000000000000..dd0dab56bad582a995b770ac81c21b4ad4954553
--- /dev/null
+++ b/e3c_llm_requests/meta-llama/Llama-3.2-1B-Instruct.json
@@ -0,0 +1,8 @@
+{
+    "model": "meta-llama/Llama-3.2-1B-Instruct",
+    "base_model": "LlamaForCausalLM",
+    "revision": "9213176726f574b556790deb65791e0c5aa438b6",
+    "submitted_time": "2024-09-18 15:12:47+00:00",
+    "num_params_billion": 1.2358144,
+    "language": "en_de_fr_it_pt_hi_es_th"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/microsoft/MediPhi-Clinical.json b/e3c_llm_requests/microsoft/MediPhi-Clinical.json
new file mode 100644
index 0000000000000000000000000000000000000000..24031b2427e47fc919f0fda9c2570e2c55afafa6
--- /dev/null
+++ b/e3c_llm_requests/microsoft/MediPhi-Clinical.json
@@ -0,0 +1,8 @@
+{
+    "model": "microsoft/MediPhi-Clinical",
+    "base_model": "Phi3ForCausalLM",
+    "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+    "submitted_time": "2025-05-29 20:40:05+00:00",
+    "num_params_billion": 3.821079552,
+    "language": "en"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/microsoft/MediPhi-Instruct.json b/e3c_llm_requests/microsoft/MediPhi-Instruct.json
new file mode 100644
index 0000000000000000000000000000000000000000..fd36894ce7cc6bbb938d3639a927ebb3c277254d
--- /dev/null
+++ b/e3c_llm_requests/microsoft/MediPhi-Instruct.json
@@ -0,0 +1,8 @@
+{
+    "model": "microsoft/MediPhi-Instruct",
+    "base_model": "Phi3ForCausalLM",
+    "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+    "submitted_time": "2025-07-11 19:28:15+00:00",
+    "num_params_billion": 3.821079552,
+    "language": "en"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/mistralai/Mistral-7B-Instruct-v0.2.json b/e3c_llm_requests/mistralai/Mistral-7B-Instruct-v0.2.json
new file mode 100644
index 0000000000000000000000000000000000000000..b36579fb429f3b744a46c6a84fed781411b85cc7
--- /dev/null
+++ b/e3c_llm_requests/mistralai/Mistral-7B-Instruct-v0.2.json
@@ -0,0 +1,8 @@
+{
+    "model": "mistralai/Mistral-7B-Instruct-v0.2",
+    "base_model": "MistralForCausalLM",
+    "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+    "submitted_time": "2023-12-11 13:18:44+00:00",
+    "num_params_billion": 7.241732096,
+    "language": ""
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/mistralai/Mistral-Nemo-Instruct-2407.json b/e3c_llm_requests/mistralai/Mistral-Nemo-Instruct-2407.json
new file mode 100644
index 0000000000000000000000000000000000000000..c46a7da06e59d841593280fb14969e4182c18d95
--- /dev/null
+++ b/e3c_llm_requests/mistralai/Mistral-Nemo-Instruct-2407.json
@@ -0,0 +1,8 @@
+{
+    "model": "mistralai/Mistral-Nemo-Instruct-2407",
+    "base_model": "MistralForCausalLM",
+    "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+    "submitted_time": "2024-07-17 17:26:49+00:00",
+    "num_params_billion": 12.2477824,
+    "language": "en_fr_de_es_it_pt_ru_zh_ja"
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/tiiuae/Falcon3-10B-Instruct.json b/e3c_llm_requests/tiiuae/Falcon3-10B-Instruct.json
new file mode 100644
index 0000000000000000000000000000000000000000..6fb21c1511b3e96748b3e779985cbcbbfca44186
--- /dev/null
+++ b/e3c_llm_requests/tiiuae/Falcon3-10B-Instruct.json
@@ -0,0 +1,8 @@
+{
+    "model": "tiiuae/Falcon3-10B-Instruct",
+    "base_model": "LlamaForCausalLM",
+    "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+    "submitted_time": "2024-12-14 05:17:25+00:00",
+    "num_params_billion": 10.30565376,
+    "language": ""
+}
\ No newline at end of file
diff --git a/e3c_llm_requests/unsloth/phi-4.json b/e3c_llm_requests/unsloth/phi-4.json
new file mode 100644
index 0000000000000000000000000000000000000000..1aacd86f19c5a378deb37004884fd52f579b6daf
--- /dev/null
+++ b/e3c_llm_requests/unsloth/phi-4.json
@@ -0,0 +1,8 @@
+{
+    "model": "unsloth/phi-4",
+    "base_model": "LlamaForCausalLM",
+    "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+    "submitted_time": "2025-01-08 21:56:16+00:00",
+    "num_params_billion": 14.6595072,
+    "language": "en"
+}
\ No newline at end of file
diff --git a/e3c_llm_results/.gitattributes b/e3c_llm_results/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..1ef325f1b111266a6b26e0196871bd78baa8c2f3
--- /dev/null
+++ b/e3c_llm_results/.gitattributes
@@ -0,0 +1,59 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.lz4 filter=lfs diff=lfs merge=lfs -text
+*.mds filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+# Audio files - uncompressed
+*.pcm filter=lfs diff=lfs merge=lfs -text
+*.sam filter=lfs diff=lfs merge=lfs -text
+*.raw filter=lfs diff=lfs merge=lfs -text
+# Audio files - compressed
+*.aac filter=lfs diff=lfs merge=lfs -text
+*.flac filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.ogg filter=lfs diff=lfs merge=lfs -text
+*.wav filter=lfs diff=lfs merge=lfs -text
+# Image files - uncompressed
+*.bmp filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tiff filter=lfs diff=lfs merge=lfs -text
+# Image files - compressed
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text
+# Video files - compressed
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.webm filter=lfs diff=lfs merge=lfs -text
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_EN.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..fb662fe1d53a56922d8a772b9baee55d2a7de480
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 8.3819368,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 6.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.41,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 10.83,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 9.176666666666668,
+            "best_prompt": 10.83,
+            "prompt_id": "p3",
+            "CPS": 10.650944,
+            "is_dummy": false,
+            "std_accuracy": 2.5087314191306596
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.870000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 33.94,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 31.31,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 26.040000000000003,
+            "best_prompt": 33.94,
+            "prompt_id": "p2",
+            "CPS": 31.25874,
+            "is_dummy": false,
+            "std_accuracy": 11.481110573459345
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_GR.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..491615432b2fe12f3d16cd1efedc04c27b0aef0e
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 8.314364166666667,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 6.2,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 5.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 6.2,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.1066666666666665,
+            "best_prompt": 6.2,
+            "prompt_id": "p1",
+            "CPS": 6.194213333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.16165807537309534
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 10.17,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 5.06,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 10.65,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 8.626666666666667,
+            "best_prompt": 10.65,
+            "prompt_id": "p3",
+            "CPS": 10.434515000000001,
+            "is_dummy": false,
+            "std_accuracy": 3.098133846904187
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_IT.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..06cfdebdb9527ec4c4ed146ef72adfae1df4bfed
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 5.013616333333333,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 4.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 3.84,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 4.16,
+            "best_prompt": 4.35,
+            "prompt_id": "p1",
+            "CPS": 4.341735,
+            "is_dummy": false,
+            "std_accuracy": 0.27874719729532704
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 6.72,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 22.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 13.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 14.126666666666665,
+            "best_prompt": 22.66,
+            "prompt_id": "p2",
+            "CPS": 20.726346666666664,
+            "is_dummy": false,
+            "std_accuracy": 8.02950392822205
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_PL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..b5e867f0f3e1ee3e826d798481bb1d32a6350762
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 8.100043833333334,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 3.7900000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.7800000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 3.7900000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 3.786666666666667,
+            "best_prompt": 3.7900000000000005,
+            "prompt_id": "p1",
+            "CPS": 3.7898736666666673,
+            "is_dummy": false,
+            "std_accuracy": 0.005773502691896391
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 6.02,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 12.93,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.779999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 8.909999999999998,
+            "best_prompt": 12.93,
+            "prompt_id": "p2",
+            "CPS": 12.410214,
+            "is_dummy": false,
+            "std_accuracy": 3.590919102402615
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SK.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ce0634530fb6e442ffd9a9b74d168b53503e39b
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 3.3197085,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 3.8699999999999997,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.8,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 3.8699999999999997,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 3.8466666666666662,
+            "best_prompt": 3.8699999999999997,
+            "prompt_id": "p1",
+            "CPS": 3.8690969999999996,
+            "is_dummy": false,
+            "std_accuracy": 0.04041451884327371
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 2.8000000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 1.21,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 1.74,
+            "best_prompt": 2.8000000000000003,
+            "prompt_id": "p2",
+            "CPS": 2.77032,
+            "is_dummy": false,
+            "std_accuracy": 0.9179869280115052
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..3a241f4dce9440551f68e18e8d3c78b6d08218b2
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 11.184996000000002,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 4.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.5600000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 4.29,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 4.38,
+            "best_prompt": 4.5600000000000005,
+            "prompt_id": "p2",
+            "CPS": 4.551792000000001,
+            "is_dummy": false,
+            "std_accuracy": 0.1558845726811992
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 9.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 19.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 9.67,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.780000000000001,
+            "best_prompt": 19.0,
+            "prompt_id": "p2",
+            "CPS": 17.8182,
+            "is_dummy": false,
+            "std_accuracy": 5.3866780115392086
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f1e3664b98c96628385befb95c9fa806fce64c4
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 17.1183752,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.89,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 22.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 19.939999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 21.419999999999998,
+            "best_prompt": 22.43,
+            "prompt_id": "p2",
+            "CPS": 22.203457,
+            "is_dummy": false,
+            "std_accuracy": 1.309847319346802
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.89,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.68,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.85,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 16.80666666666667,
+            "best_prompt": 21.85,
+            "prompt_id": "p3",
+            "CPS": 20.74803166666667,
+            "is_dummy": false,
+            "std_accuracy": 4.981208019480148
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 18.25,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.12,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 19.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.790000000000003,
+            "best_prompt": 19.0,
+            "prompt_id": "p3",
+            "CPS": 18.7701,
+            "is_dummy": false,
+            "std_accuracy": 1.4940883508012497
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.15,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 14.16,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 6.68,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 14.996666666666668,
+            "best_prompt": 24.15,
+            "prompt_id": "p1",
+            "CPS": 21.93947,
+            "is_dummy": false,
+            "std_accuracy": 8.765000475375533
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.78,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.6799999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 1.94,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 1.4666666666666668,
+            "best_prompt": 1.94,
+            "prompt_id": "p3",
+            "CPS": 1.9308173333333332,
+            "is_dummy": false,
+            "std_accuracy": 0.6859543230662909
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..318e4c45e7293126c7d154cba320ba1e37fd24e0
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 13.395712833333334,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 16.669999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.89,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.669999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 14.743333333333332,
+            "best_prompt": 16.669999999999998,
+            "prompt_id": "p1",
+            "CPS": 16.348824666666665,
+            "is_dummy": false,
+            "std_accuracy": 3.337084555916036
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.530000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 10.36,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 9.700000000000001,
+            "best_prompt": 10.530000000000001,
+            "prompt_id": "p2",
+            "CPS": 10.442601000000002,
+            "is_dummy": false,
+            "std_accuracy": 1.2931743888586718
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..b961e82268fc6daaae9860c15c42c9d8cf3b1a25
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 13.608121199999996,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.379999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 37.53333333333333,
+            "best_prompt": 40.23,
+            "prompt_id": "p2",
+            "CPS": 39.145131,
+            "is_dummy": false,
+            "std_accuracy": 3.9575286901465803
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 9.77,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 12.26,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.89,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 13.306666666666667,
+            "best_prompt": 17.89,
+            "prompt_id": "p3",
+            "CPS": 17.070041666666665,
+            "is_dummy": false,
+            "std_accuracy": 4.159955929253739
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 11.19,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 11.899999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.433333333333332,
+            "best_prompt": 11.899999999999999,
+            "prompt_id": "p3",
+            "CPS": 11.725466666666664,
+            "is_dummy": false,
+            "std_accuracy": 1.9579155582744954
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.02,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.08,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.06666666666666667,
+            "best_prompt": 0.1,
+            "prompt_id": "p1",
+            "CPS": 0.09996666666666668,
+            "is_dummy": false,
+            "std_accuracy": 0.041633319989322654
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..f6f07b96eaaa37e2f66017b9ae4f8885302efa01
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 25.15700466666667,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 39.160000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.92,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 39.66666666666667,
+            "best_prompt": 39.92,
+            "prompt_id": "p1",
+            "CPS": 39.81886933333334,
+            "is_dummy": false,
+            "std_accuracy": 0.43878620458411444
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 9.98,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.549999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 9.56,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.030000000000001,
+            "best_prompt": 10.549999999999999,
+            "prompt_id": "p2",
+            "CPS": 10.49514,
+            "is_dummy": false,
+            "std_accuracy": 0.49689032995219296
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..701059657ff8182b34ce2e870f3372bb13d56e32
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 23.0736205,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 34.44,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 36.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 34.44,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.06666666666666,
+            "best_prompt": 36.32,
+            "prompt_id": "p2",
+            "CPS": 35.864789333333334,
+            "is_dummy": false,
+            "std_accuracy": 1.085418506076498
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.340000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.45,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.75,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 8.846666666666666,
+            "best_prompt": 10.45,
+            "prompt_id": "p2",
+            "CPS": 10.282451666666665,
+            "is_dummy": false,
+            "std_accuracy": 1.557251852891282
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..af0a9c3c5710908052425d2c7cf6292fa1065084
--- /dev/null
+++ b/e3c_llm_results/Henrychur/MMed-Llama-3-8B_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 23.493655333333336,
+    "config": {
+        "model_name": "Henrychur/MMed-Llama-3-8B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "Henrychur/MMed-Llama-3-8B",
+        "base_model": "LlamaForCausalLM",
+        "revision": "6c3057bb49ac499970eb2891daaef9b5c14f6943",
+        "submitted_time": "2024-05-22 09:17:24+00:00",
+        "num_params_billion": null,
+        "language": "en_zh_ja_fr_ru_es"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 35.58,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.45,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 35.58,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 37.20333333333333,
+            "best_prompt": 40.45,
+            "prompt_id": "p2",
+            "CPS": 39.136723333333336,
+            "is_dummy": false,
+            "std_accuracy": 2.81169581095348
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.870000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 7.8100000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.19,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.623333333333334,
+            "best_prompt": 7.870000000000001,
+            "prompt_id": "p1",
+            "CPS": 7.850587333333335,
+            "is_dummy": false,
+            "std_accuracy": 0.3764748774265469
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_EN.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f1bfb1013bf5d413d06d6315c1d7fc213da2cdd
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 1.8120066666666665,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 9.4,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.3099999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 4.64,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.783333333333334,
+            "best_prompt": 9.4,
+            "prompt_id": "p1",
+            "CPS": 9.060033333333333,
+            "is_dummy": false,
+            "std_accuracy": 3.2019421189854973
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_GR.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..d57e9ad3ed157cd7579b7c6862d4ad4077c8b5f9
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 4.256631333333333,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.59,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 5.91,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.59,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.696666666666666,
+            "best_prompt": 8.59,
+            "prompt_id": "p1",
+            "CPS": 8.513262666666666,
+            "is_dummy": false,
+            "std_accuracy": 1.547298721428197
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_IT.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..36793b360e4d99bcd93ffff1cb624f77261c185f
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_IT.json
@@ -0,0 +1,151 @@
+{
+    "average_CPS": 1.820189333333333,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 9.2,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.470000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 8.123333333333333,
+            "best_prompt": 9.2,
+            "prompt_id": "p2",
+            "CPS": 9.100946666666665,
+            "is_dummy": false,
+            "std_accuracy": 0.9394856748952227
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p2",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_PL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..23ab1afe25cc67c6d5d3da4ad7e79858ef67b9cf
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 2.1520960000000002,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 2.44,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.36,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 2.44,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 3.08,
+            "best_prompt": 4.36,
+            "prompt_id": "p2",
+            "CPS": 4.3041920000000005,
+            "is_dummy": false,
+            "std_accuracy": 1.1085125168440817
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_SK.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..b8709249ee55ecec1e413d17c8935c718ea1900e
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 4.3259333333333325,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.799999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.799999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.116666666666666,
+            "best_prompt": 8.799999999999999,
+            "prompt_id": "p1",
+            "CPS": 8.651866666666665,
+            "is_dummy": false,
+            "std_accuracy": 2.9156188594076093
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_0_SL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..72e6cb4c2d5067d737a04008a797598d0a329f51
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 3.859359,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.7700000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 5.79,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.7700000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.11,
+            "best_prompt": 7.7700000000000005,
+            "prompt_id": "p1",
+            "CPS": 7.718718,
+            "is_dummy": false,
+            "std_accuracy": 1.1431535329954592
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbc46eb7beebd4583e1e8d0b8014ea79446bd90d
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 2.900183933333333,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.15,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 14.149999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 13.22,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 13.173333333333332,
+            "best_prompt": 14.149999999999999,
+            "prompt_id": "p2",
+            "CPS": 14.011801666666665,
+            "is_dummy": false,
+            "std_accuracy": 1.0008163334665015
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.27999999999999997,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.16,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.49,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.31,
+            "best_prompt": 0.49,
+            "prompt_id": "p3",
+            "CPS": 0.489118,
+            "is_dummy": false,
+            "std_accuracy": 0.16703293088490065
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..23f2906a587c4ccdd8963e2fcbb3cec004da6db8
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 7.3897435,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 14.549999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 14.34,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.549999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 14.479999999999999,
+            "best_prompt": 14.549999999999999,
+            "prompt_id": "p1",
+            "CPS": 14.539814999999999,
+            "is_dummy": false,
+            "std_accuracy": 0.12124355652982088
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.06999999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.10333333333333333,
+            "best_prompt": 0.24,
+            "prompt_id": "p1",
+            "CPS": 0.239672,
+            "is_dummy": false,
+            "std_accuracy": 0.12342339054382412
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..bb39b345327beca90e2e94c31f66b7d1c084ac61
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 3.6471789333333335,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 16.16,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 17.740000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.900000000000002,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 16.933333333333337,
+            "best_prompt": 17.740000000000002,
+            "prompt_id": "p2",
+            "CPS": 17.596897333333335,
+            "is_dummy": false,
+            "std_accuracy": 0.7905272502155348
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.35000000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.64,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.45999999999999996,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.48333333333333334,
+            "best_prompt": 0.64,
+            "prompt_id": "p2",
+            "CPS": 0.6389973333333333,
+            "is_dummy": false,
+            "std_accuracy": 0.14640127503998498
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..bcadc3afeb7e410f3f540b229a5d97ed1ceeddd1
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 7.915078666666666,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 15.479999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 15.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 15.159999999999998,
+            "best_prompt": 15.479999999999999,
+            "prompt_id": "p2",
+            "CPS": 15.430463999999999,
+            "is_dummy": false,
+            "std_accuracy": 0.2771281292110196
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.4,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.22999999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.33999999999999997,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.3233333333333333,
+            "best_prompt": 0.4,
+            "prompt_id": "p1",
+            "CPS": 0.39969333333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.08621678104251711
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..624fa817e41e538d20238d2ac58d4a7ee9d264fe
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 7.5838598333333325,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 14.85,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.600000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.85,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 14.433333333333335,
+            "best_prompt": 14.85,
+            "prompt_id": "p1",
+            "CPS": 14.788124999999999,
+            "is_dummy": false,
+            "std_accuracy": 0.7216878364870312
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.38,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.2,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.2733333333333334,
+            "best_prompt": 0.38,
+            "prompt_id": "p1",
+            "CPS": 0.3795946666666667,
+            "is_dummy": false,
+            "std_accuracy": 0.09451631252505216
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ab112253b8cabcc3002db3f6700835cdeeb1fc0
--- /dev/null
+++ b/e3c_llm_results/HiTZ/Medical-mT5-large_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 7.7788705,
+    "config": {
+        "model_name": "HiTZ/Medical-mT5-large",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "HiTZ/Medical-mT5-large",
+        "base_model": "MT5ForConditionalGeneration",
+        "revision": "e8ae7101f0ab1ed5b8add8846e44a2d39f6e2c47",
+        "submitted_time": "2023-10-31 15:15:15+00:00",
+        "num_params_billion": null,
+        "language": "en_es_fr_it"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 14.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.25,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.7,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 14.216666666666667,
+            "best_prompt": 14.7,
+            "prompt_id": "p1",
+            "CPS": 14.62895,
+            "is_dummy": false,
+            "std_accuracy": 0.8371578903249569
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.73,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.74,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.9299999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.7999999999999999,
+            "best_prompt": 0.9299999999999999,
+            "prompt_id": "p3",
+            "CPS": 0.9287909999999999,
+            "is_dummy": false,
+            "std_accuracy": 0.11269427669584642
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_EN.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..47893dbc04c87db06236301977b64f06d4b96188
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 14.4829312,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 34.25,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 11.81,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 28.93,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 24.99666666666667,
+            "best_prompt": 34.25,
+            "prompt_id": "p1",
+            "CPS": 31.08073333333333,
+            "is_dummy": false,
+            "std_accuracy": 11.725686902409313
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.349999999999994,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 39.17,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.72,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.74666666666666,
+            "best_prompt": 41.72,
+            "prompt_id": "p3",
+            "CPS": 41.31392533333333,
+            "is_dummy": false,
+            "std_accuracy": 1.3779090439260953
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.02,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.006666666666666667,
+            "best_prompt": 0.02,
+            "prompt_id": "p3",
+            "CPS": 0.019997333333333336,
+            "is_dummy": false,
+            "std_accuracy": 0.011547005383792516
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_GR.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..5860dc4bf439f917359fd7140fe23fc615698fa1
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 27.333585333333332,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 13.389999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 11.91,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 13.389999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.896666666666667,
+            "best_prompt": 13.389999999999999,
+            "prompt_id": "p1",
+            "CPS": 13.323942666666666,
+            "is_dummy": false,
+            "std_accuracy": 0.8544783984006453
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.96,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 42.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.1,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 39.57333333333333,
+            "best_prompt": 42.66,
+            "prompt_id": "p2",
+            "CPS": 41.343227999999996,
+            "is_dummy": false,
+            "std_accuracy": 2.6740481172434647
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_IT.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..eeb003c099aa607d64e75c1b2a9138b41ca4667f
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 13.000253,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 17.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 22.34,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 21.36666666666667,
+            "best_prompt": 24.67,
+            "prompt_id": "p1",
+            "CPS": 23.855067666666667,
+            "is_dummy": false,
+            "std_accuracy": 3.8826065128124094
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.730000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 37.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.06,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.163333333333334,
+            "best_prompt": 41.730000000000004,
+            "prompt_id": "p1",
+            "CPS": 41.07623,
+            "is_dummy": false,
+            "std_accuracy": 2.1594520910020982
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.06999999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.02333333333333333,
+            "best_prompt": 0.06999999999999999,
+            "prompt_id": "p1",
+            "CPS": 0.06996733333333333,
+            "is_dummy": false,
+            "std_accuracy": 0.0404145188432738
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_PL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..6eddabba6de6b14d519a9ed4660583d3bfc38d52
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 24.780516499999997,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 6.97,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.64,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 6.97,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.859999999999999,
+            "best_prompt": 6.97,
+            "prompt_id": "p1",
+            "CPS": 6.892633,
+            "is_dummy": false,
+            "std_accuracy": 1.9225763964014535
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 38.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 44.64,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.223333333333336,
+            "best_prompt": 44.64,
+            "prompt_id": "p2",
+            "CPS": 42.6684,
+            "is_dummy": false,
+            "std_accuracy": 3.8249749454517126
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SK.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..7a3ad59220039bb908bd9284b34d7c0cce9aa1b1
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 27.026387333333332,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.2,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.26,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 12.2,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 9.553333333333333,
+            "best_prompt": 12.2,
+            "prompt_id": "p1",
+            "CPS": 11.877106666666666,
+            "is_dummy": false,
+            "std_accuracy": 4.584161137365628
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 40.27,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 42.94,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.27,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.160000000000004,
+            "best_prompt": 42.94,
+            "prompt_id": "p2",
+            "CPS": 42.175668,
+            "is_dummy": false,
+            "std_accuracy": 1.5415252187362976
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..55f1eea8c1a13da17cf4d2e14ff0d7d6629d2664
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 37.45460366666667,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 23.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.1,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 33.983333333333334,
+            "best_prompt": 39.1,
+            "prompt_id": "p1",
+            "CPS": 37.099383333333336,
+            "is_dummy": false,
+            "std_accuracy": 8.862326632060757
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 37.830000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 37.75,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 37.77666666666667,
+            "best_prompt": 37.830000000000005,
+            "prompt_id": "p2",
+            "CPS": 37.809824000000006,
+            "is_dummy": false,
+            "std_accuracy": 0.04618802153517318
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..30c36e51a607150b40384df22d1709e46e28567e
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 43.13491793333334,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 60.91,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.46,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 62.43,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.93333333333334,
+            "best_prompt": 62.43,
+            "prompt_id": "p3",
+            "CPS": 60.871331000000005,
+            "is_dummy": false,
+            "std_accuracy": 3.1025204807274562
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 63.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 60.25,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.33,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 61.633333333333326,
+            "best_prompt": 63.32,
+            "prompt_id": "p1",
+            "CPS": 62.25200266666666,
+            "is_dummy": false,
+            "std_accuracy": 1.5573160672558843
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 32.22,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 31.78,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.429999999999996,
+            "best_prompt": 32.22,
+            "prompt_id": "p2",
+            "CPS": 30.998862,
+            "is_dummy": false,
+            "std_accuracy": 6.1873338361526935
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 30.73,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 11.37,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.64,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 16.580000000000002,
+            "best_prompt": 30.73,
+            "prompt_id": "p1",
+            "CPS": 26.381705,
+            "is_dummy": false,
+            "std_accuracy": 12.395366069624568
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.44,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 44.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.37,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 23.7,
+            "best_prompt": 44.29,
+            "prompt_id": "p2",
+            "CPS": 35.170689,
+            "is_dummy": false,
+            "std_accuracy": 17.857555823796268
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..d8e1a2407294e5d3f02619c1c78fccf58a3929e9
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 60.84101533333333,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 61.19,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.47,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.19,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 60.28333333333333,
+            "best_prompt": 61.19,
+            "prompt_id": "p1",
+            "CPS": 60.635210666666666,
+            "is_dummy": false,
+            "std_accuracy": 1.5703927321957813
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.62,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 60.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.83,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 60.56333333333333,
+            "best_prompt": 61.83,
+            "prompt_id": "p3",
+            "CPS": 61.046820000000004,
+            "is_dummy": false,
+            "std_accuracy": 1.1399268982409938
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..34c3abc839b897074e72cf3f86bf8f3bf88426ac
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 43.527679133333336,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 67.19000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 63.27,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 66.61,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 65.69,
+            "best_prompt": 67.19000000000001,
+            "prompt_id": "p1",
+            "CPS": 66.18215000000001,
+            "is_dummy": false,
+            "std_accuracy": 2.1157504578754107
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 57.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 59.98,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 60.92999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.526666666666664,
+            "best_prompt": 60.92999999999999,
+            "prompt_id": "p3",
+            "CPS": 60.07494899999999,
+            "is_dummy": false,
+            "std_accuracy": 1.6766136505865978
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.110000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 15.989999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 19.6,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 15.566666666666668,
+            "best_prompt": 19.6,
+            "prompt_id": "p3",
+            "CPS": 18.80946666666667,
+            "is_dummy": false,
+            "std_accuracy": 4.260801958943097
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 44.07,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.28,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.53,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 24.959999999999997,
+            "best_prompt": 44.07,
+            "prompt_id": "p1",
+            "CPS": 35.648223,
+            "is_dummy": false,
+            "std_accuracy": 16.68561356378602
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.17,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 51.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 10.96,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 23.386666666666667,
+            "best_prompt": 51.03,
+            "prompt_id": "p2",
+            "CPS": 36.923607000000004,
+            "is_dummy": false,
+            "std_accuracy": 23.980438555900797
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..452fdffcf83aa97c10140473ffc00f18a70b6776
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 61.2140455,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 62.260000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 62.260000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 60.919999999999995,
+            "best_prompt": 62.260000000000005,
+            "prompt_id": "p1",
+            "CPS": 61.425716,
+            "is_dummy": false,
+            "std_accuracy": 2.3209480821422974
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.91,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 54.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 63.74999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.44,
+            "best_prompt": 63.74999999999999,
+            "prompt_id": "p3",
+            "CPS": 61.002375,
+            "is_dummy": false,
+            "std_accuracy": 4.563189673901358
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d6780459a984f222fd866c1030aefc721a41c5e
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 61.83102316666667,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 63.85999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 64.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 63.85999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 64.19333333333333,
+            "best_prompt": 64.86,
+            "prompt_id": "p2",
+            "CPS": 64.4276,
+            "is_dummy": false,
+            "std_accuracy": 0.5773502691896298
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 58.940000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.45,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.589999999999996,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 58.99333333333334,
+            "best_prompt": 59.589999999999996,
+            "prompt_id": "p3",
+            "CPS": 59.23444633333334,
+            "is_dummy": false,
+            "std_accuracy": 0.57186828320281
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..b6c3a4a3f4b52ed11463c39827448d9f6ad1334b
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-14B-Instruct-1M_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 61.436353666666676,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "Qwen/Qwen2.5-14B-Instruct-1M",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "620fad32de7bdd2293b3d99b39eba2fe63e97438",
+        "submitted_time": "2025-01-23 13:23:24+00:00",
+        "num_params_billion": 14.770033664,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 64.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 61.78,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 64.67,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 63.70666666666667,
+            "best_prompt": 64.67,
+            "prompt_id": "p1",
+            "CPS": 64.04701233333334,
+            "is_dummy": false,
+            "std_accuracy": 1.6685422779580188
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.489999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 57.82000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 57.809999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 58.373333333333335,
+            "best_prompt": 59.489999999999995,
+            "prompt_id": "p1",
+            "CPS": 58.825695,
+            "is_dummy": false,
+            "std_accuracy": 0.967074626558533
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_EN.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..71eaffd7b145ad29dc56910676fe78d6d716255e
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_EN.json
@@ -0,0 +1,169 @@
+{
+    "average_CPS": 16.729891099999996,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 38.04,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 30.680000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 29.64,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.78666666666667,
+            "best_prompt": 38.04,
+            "prompt_id": "p1",
+            "CPS": 36.041632,
+            "is_dummy": false,
+            "std_accuracy": 4.5791411130618505
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 47.339999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 46.489999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 45.910000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 46.57999999999999,
+            "best_prompt": 47.339999999999996,
+            "prompt_id": "p1",
+            "CPS": 46.98021599999999,
+            "is_dummy": false,
+            "std_accuracy": 0.7192357054540571
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.05,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.5700000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.15500000000000003,
+            "best_prompt": 0.5700000000000001,
+            "prompt_id": "p2",
+            "CPS": 0.5676345,
+            "is_dummy": false,
+            "std_accuracy": 0.27766886753829645
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.06,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.015,
+            "best_prompt": 0.06,
+            "prompt_id": "p3",
+            "CPS": 0.059973,
+            "is_dummy": false,
+            "std_accuracy": 0.03
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_GR.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..2a6c06c69e77cfaee82a2299b5651dee82e91bf4
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 46.818379166666666,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.760000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 15.68,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.760000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 45.06666666666666,
+            "best_prompt": 59.760000000000005,
+            "prompt_id": "p1",
+            "CPS": 50.979264,
+            "is_dummy": false,
+            "std_accuracy": 25.449599865878707
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.93,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.83,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.34,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.03333333333333,
+            "best_prompt": 43.93,
+            "prompt_id": "p1",
+            "CPS": 42.65749433333333,
+            "is_dummy": false,
+            "std_accuracy": 2.800541614283445
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_IT.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..6991f293641c30fff627bd6bd487d26d2215fdbc
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_IT.json
@@ -0,0 +1,163 @@
+{
+    "average_CPS": 15.684724516666668,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.580000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.470000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 27.96,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 27.336666666666673,
+            "best_prompt": 37.580000000000005,
+            "prompt_id": "p1",
+            "CPS": 33.730555333333335,
+            "is_dummy": false,
+            "std_accuracy": 10.568795264046578
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 45.050000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.589999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 44.47,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 43.70333333333334,
+            "best_prompt": 45.050000000000004,
+            "prompt_id": "p1",
+            "CPS": 44.44332666666667,
+            "is_dummy": false,
+            "std_accuracy": 1.8530335488957954
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.16999999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.042499999999999996,
+            "best_prompt": 0.16999999999999998,
+            "prompt_id": "p2",
+            "CPS": 0.16978324999999997,
+            "is_dummy": false,
+            "std_accuracy": 0.08499999999999999
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.08,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.02666666666666667,
+            "best_prompt": 0.08,
+            "prompt_id": "p3",
+            "CPS": 0.07995733333333332,
+            "is_dummy": false,
+            "std_accuracy": 0.046188021535170064
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_PL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..12c6b090f2660ecdb2863ff322e5d5d532625e08
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 34.071664166666665,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 23.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 24.86,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 24.276666666666667,
+            "best_prompt": 24.86,
+            "prompt_id": "p1",
+            "CPS": 24.714983333333333,
+            "is_dummy": false,
+            "std_accuracy": 1.0103629710818451
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 38.65,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 45.69,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 37.88,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.74,
+            "best_prompt": 45.69,
+            "prompt_id": "p2",
+            "CPS": 43.428345,
+            "is_dummy": false,
+            "std_accuracy": 4.304079460233045
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SK.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..b0b8f4bc1d6a597d917e6128ab16aceaf8d51777
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 38.03573266666667,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 35.78,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.68,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 35.78,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 33.74666666666667,
+            "best_prompt": 35.78,
+            "prompt_id": "p1",
+            "CPS": 35.05247333333333,
+            "is_dummy": false,
+            "std_accuracy": 3.521836642056718
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.71,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.52,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.71,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.31333333333333,
+            "best_prompt": 41.52,
+            "prompt_id": "p2",
+            "CPS": 41.018992000000004,
+            "is_dummy": false,
+            "std_accuracy": 1.045003987233224
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..2245c08641bb15fd75ae546afbc67b47f4e51aa5
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 37.09308866666666,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 33.44,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 28.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 33.44,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 31.836666666666662,
+            "best_prompt": 33.44,
+            "prompt_id": "p1",
+            "CPS": 32.90384533333333,
+            "is_dummy": false,
+            "std_accuracy": 2.777054794802099
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.79,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.79,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.48,
+            "best_prompt": 41.86,
+            "prompt_id": "p2",
+            "CPS": 41.282332,
+            "is_dummy": false,
+            "std_accuracy": 1.1951150572225255
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..a82c4a806ac20ea198f1bf7053ebbb2ace711189
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_EN.json
@@ -0,0 +1,163 @@
+{
+    "average_CPS": 31.59830568333333,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.699999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.02,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.129999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 58.949999999999996,
+            "best_prompt": 61.129999999999995,
+            "prompt_id": "p3",
+            "CPS": 59.79736599999999,
+            "is_dummy": false,
+            "std_accuracy": 2.63626629914354
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 64.82,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 64.69,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 63.7,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 64.40333333333332,
+            "best_prompt": 64.82,
+            "prompt_id": "p1",
+            "CPS": 64.54991666666665,
+            "is_dummy": false,
+            "std_accuracy": 0.6125629219380878
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.010000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.83,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.39,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 9.307500000000001,
+            "best_prompt": 15.010000000000002,
+            "prompt_id": "p1",
+            "CPS": 14.154054750000002,
+            "is_dummy": false,
+            "std_accuracy": 6.842097022599626
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 3.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 5.46,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 2.856666666666667,
+            "best_prompt": 5.46,
+            "prompt_id": "p3",
+            "CPS": 5.317858,
+            "is_dummy": false,
+            "std_accuracy": 2.738801441020019
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 2.4699999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 15.57,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 1.7399999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.593333333333333,
+            "best_prompt": 15.57,
+            "prompt_id": "p2",
+            "CPS": 14.172333,
+            "is_dummy": false,
+            "std_accuracy": 7.782585260267525
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..459d29262498f3a5b8e0c9623d609d58691f53fc
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 60.26106666666667,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 61.96,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 61.309999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.96,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 61.74333333333333,
+            "best_prompt": 61.96,
+            "prompt_id": "p1",
+            "CPS": 61.82575333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.37527767497326003
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.13,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.96,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 57.099999999999994,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 58.39666666666667,
+            "best_prompt": 59.13,
+            "prompt_id": "p1",
+            "CPS": 58.696380000000005,
+            "is_dummy": false,
+            "std_accuracy": 1.1261586625930393
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..974d3edcf50e656124c16ace38699f0496c2f5f2
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_IT.json
@@ -0,0 +1,163 @@
+{
+    "average_CPS": 32.75043995,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 69.34,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 71.52,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 69.3,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 70.05333333333334,
+            "best_prompt": 71.52,
+            "prompt_id": "p2",
+            "CPS": 70.47104,
+            "is_dummy": false,
+            "std_accuracy": 1.2703280416228429
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 58.01,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.26,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 56.406666666666666,
+            "best_prompt": 58.01,
+            "prompt_id": "p1",
+            "CPS": 57.079906333333334,
+            "is_dummy": false,
+            "std_accuracy": 1.4307457263026617
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 3.9800000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 5.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 10.25,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 10.25,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.6175,
+            "best_prompt": 10.25,
+            "prompt_id": "p3",
+            "CPS": 9.980168749999999,
+            "is_dummy": false,
+            "std_accuracy": 3.1485591942982425
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 23.22,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 1.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.28,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.863333333333332,
+            "best_prompt": 23.22,
+            "prompt_id": "p1",
+            "CPS": 20.350782,
+            "is_dummy": false,
+            "std_accuracy": 11.288907534980225
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.8599999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.02,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 2.7199999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 3.5333333333333328,
+            "best_prompt": 6.02,
+            "prompt_id": "p2",
+            "CPS": 5.8703026666666664,
+            "is_dummy": false,
+            "std_accuracy": 2.196026715077331
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..cc4032de29f21671f35367bd4411de6a4d219ff9
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 59.57754466666667,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 60.08,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 60.040000000000006,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 60.08,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 60.06666666666666,
+            "best_prompt": 60.08,
+            "prompt_id": "p1",
+            "CPS": 60.071989333333335,
+            "is_dummy": false,
+            "std_accuracy": 0.023094010767580435
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 58.58,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.68,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.38,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 58.879999999999995,
+            "best_prompt": 59.38,
+            "prompt_id": "p3",
+            "CPS": 59.083099999999995,
+            "is_dummy": false,
+            "std_accuracy": 0.43588989435406944
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..765514bf924769352adb6adf057443658d28ecae
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 62.042391166666675,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 67.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 66.73,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 67.43,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 67.19666666666667,
+            "best_prompt": 67.43,
+            "prompt_id": "p1",
+            "CPS": 67.27266333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.40414518843273967
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 57.330000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 56.089999999999996,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 56.42666666666667,
+            "best_prompt": 57.330000000000005,
+            "prompt_id": "p1",
+            "CPS": 56.812119,
+            "is_dummy": false,
+            "std_accuracy": 0.7907169742286678
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..c80c0447f76d126e739e337f179be7f2071ffcb6
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen2.5-32B-Instruct_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 61.44185133333333,
+    "config": {
+        "model_name": "Qwen/Qwen2.5-32B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "Qwen/Qwen2.5-32B-Instruct",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd",
+        "submitted_time": "2024-09-17 04:17:55+00:00",
+        "num_params_billion": 32.763876352,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 62.529999999999994,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 66.14999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 62.529999999999994,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 63.73666666666666,
+            "best_prompt": 66.14999999999999,
+            "prompt_id": "p2",
+            "CPS": 64.55358,
+            "is_dummy": false,
+            "std_accuracy": 2.090007974466444
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.919999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.489999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.39,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 57.26666666666667,
+            "best_prompt": 59.919999999999995,
+            "prompt_id": "p1",
+            "CPS": 58.33012266666667,
+            "is_dummy": false,
+            "std_accuracy": 3.4325840606361426
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_EN.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..b5caa94b2f5b818eba6053c28c2b5d9462460a2b
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_EN.json
@@ -0,0 +1,151 @@
+{
+    "average_CPS": 17.763235033333334,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.94,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.31,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.97,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.406666666666666,
+            "best_prompt": 43.94,
+            "prompt_id": "p1",
+            "CPS": 42.82685333333333,
+            "is_dummy": false,
+            "std_accuracy": 2.200507517218091
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.01,
+            "best_prompt": 0.03,
+            "prompt_id": "p1",
+            "CPS": 0.029994,
+            "is_dummy": false,
+            "std_accuracy": 0.017320508075688773
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.01,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0033333333333333335,
+            "best_prompt": 0.01,
+            "prompt_id": "p3",
+            "CPS": 0.009999333333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.005773502691896258
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.620000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 47.29,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.455,
+            "best_prompt": 47.29,
+            "prompt_id": "p3",
+            "CPS": 45.9493285,
+            "is_dummy": false,
+            "std_accuracy": 4.009295449327721
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_GR.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..bde9845bf6583ae27fa3d3224befac1f265ea118
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 41.21096783333334,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 42.91,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 45.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 42.91,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 43.67666666666667,
+            "best_prompt": 45.21,
+            "prompt_id": "p2",
+            "CPS": 44.516780000000004,
+            "is_dummy": false,
+            "std_accuracy": 1.3279056191361418
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.330000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 37.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 37.980000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 37.76666666666667,
+            "best_prompt": 37.99,
+            "prompt_id": "p2",
+            "CPS": 37.90515566666667,
+            "is_dummy": false,
+            "std_accuracy": 0.37819747927945296
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_IT.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..352e97dde73f14c661bba1eb59d2bfecb444ec27
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 16.892739600000002,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.85,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 53.16,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 45.14,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.71666666666667,
+            "best_prompt": 53.16,
+            "prompt_id": "p2",
+            "CPS": 43.887124,
+            "is_dummy": false,
+            "std_accuracy": 23.610240010074723
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.84,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.72,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 39.596666666666664,
+            "best_prompt": 41.23,
+            "prompt_id": "p2",
+            "CPS": 40.556576666666665,
+            "is_dummy": false,
+            "std_accuracy": 1.6983619559249796
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.02,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.006666666666666667,
+            "best_prompt": 0.02,
+            "prompt_id": "p1",
+            "CPS": 0.019997333333333336,
+            "is_dummy": false,
+            "std_accuracy": 0.011547005383792516
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_PL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..7670404e0b2aaeb6b0bcca1dc84d540c1baef9ac
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 42.480305333333334,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.32,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.35666666666666,
+            "best_prompt": 43.32,
+            "prompt_id": "p1",
+            "CPS": 42.902684,
+            "is_dummy": false,
+            "std_accuracy": 1.6685422779580188
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.52,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 42.199999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.870000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.86333333333334,
+            "best_prompt": 42.199999999999996,
+            "prompt_id": "p2",
+            "CPS": 42.05792666666667,
+            "is_dummy": false,
+            "std_accuracy": 0.3400490160746401
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SK.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..acf4da7fc3aca030be17f5ec8041c74b3c00b91c
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 36.625888,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.31,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 33.98,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.31,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.86666666666667,
+            "best_prompt": 33.98,
+            "prompt_id": "p2",
+            "CPS": 33.60168933333333,
+            "is_dummy": false,
+            "std_accuracy": 0.9641749495466719
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.800000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 38.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.800000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 39.42333333333334,
+            "best_prompt": 39.800000000000004,
+            "prompt_id": "p1",
+            "CPS": 39.65008666666667,
+            "is_dummy": false,
+            "std_accuracy": 0.6524058041842786
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..3707b703de4ba52063672af91385b56679d01035
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 43.20190633333334,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 44.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 45.31,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 44.86,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 45.01,
+            "best_prompt": 45.31,
+            "prompt_id": "p2",
+            "CPS": 45.17407,
+            "is_dummy": false,
+            "std_accuracy": 0.25980762113533323
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.15,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.260000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.15,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.18666666666667,
+            "best_prompt": 41.260000000000005,
+            "prompt_id": "p2",
+            "CPS": 41.229742666666674,
+            "is_dummy": false,
+            "std_accuracy": 0.0635085296108626
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..50158d879b0a2af51f1436b3ec0e09a244d303b0
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 46.50834546666666,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.93,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.42999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.07333333333332,
+            "best_prompt": 61.42999999999999,
+            "prompt_id": "p3",
+            "CPS": 59.982299666666655,
+            "is_dummy": false,
+            "std_accuracy": 2.833131365350593
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.5,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.64,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 52.583333333333336,
+            "best_prompt": 53.64,
+            "prompt_id": "p3",
+            "CPS": 53.073204000000004,
+            "is_dummy": false,
+            "std_accuracy": 1.0702491921666346
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.06,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 35.809999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.67,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 33.513333333333335,
+            "best_prompt": 35.809999999999995,
+            "prompt_id": "p2",
+            "CPS": 34.98756366666667,
+            "is_dummy": false,
+            "std_accuracy": 2.0122209951526986
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 38.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 36.51,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.25,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 31.953333333333333,
+            "best_prompt": 38.1,
+            "prompt_id": "p1",
+            "CPS": 35.75812,
+            "is_dummy": false,
+            "std_accuracy": 9.303388271663897
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.54,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 56.89999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.559999999999995,
+            "best_prompt": 56.89999999999999,
+            "prompt_id": "p3",
+            "CPS": 48.740539999999996,
+            "is_dummy": false,
+            "std_accuracy": 13.858181698909851
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..571d9a3122c649dd3e13d19400ba065411e15ea4
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 56.23321899999999,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 61.63999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.69,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.63999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.98999999999999,
+            "best_prompt": 61.63999999999999,
+            "prompt_id": "p1",
+            "CPS": 60.62293999999999,
+            "is_dummy": false,
+            "std_accuracy": 2.8578838324886453
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 50.14999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 52.23,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 51.49,
+            "best_prompt": 52.23,
+            "prompt_id": "p3",
+            "CPS": 51.843498,
+            "is_dummy": false,
+            "std_accuracy": 1.1625833303466944
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..547a8bb626bd9855aa58ad261a49bb5db89ed810
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 48.58629273333334,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 67.93,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 64.47,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 67.78,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 66.72666666666667,
+            "best_prompt": 67.93,
+            "prompt_id": "p1",
+            "CPS": 67.11257566666667,
+            "is_dummy": false,
+            "std_accuracy": 1.955769243375441
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 60.41,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.379999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 60.650000000000006,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.81333333333333,
+            "best_prompt": 60.650000000000006,
+            "prompt_id": "p3",
+            "CPS": 60.142561666666666,
+            "is_dummy": false,
+            "std_accuracy": 1.2470899459675484
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 16.2,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 25.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.34,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 19.733333333333334,
+            "best_prompt": 25.66,
+            "prompt_id": "p2",
+            "CPS": 24.139217333333335,
+            "is_dummy": false,
+            "std_accuracy": 5.164197259335989
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 45.12,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 54.64,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 44.07,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 47.94333333333333,
+            "best_prompt": 54.64,
+            "prompt_id": "p2",
+            "CPS": 50.980941333333334,
+            "is_dummy": false,
+            "std_accuracy": 5.82319786142746
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.47,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.71,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 19.88,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 30.686666666666667,
+            "best_prompt": 50.71,
+            "prompt_id": "p2",
+            "CPS": 40.55616766666667,
+            "is_dummy": false,
+            "std_accuracy": 17.358929498483867
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..a24f42db114e235ce3ac2c5028fa17b0c3d44a5b
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 56.796842,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 62.760000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 62.760000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 61.18333333333334,
+            "best_prompt": 62.760000000000005,
+            "prompt_id": "p1",
+            "CPS": 61.770484,
+            "is_dummy": false,
+            "std_accuracy": 2.730866773266932
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.949999999999996,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 51.66,
+            "best_prompt": 52.0,
+            "prompt_id": "p2",
+            "CPS": 51.8232,
+            "is_dummy": false,
+            "std_accuracy": 0.5461684721768532
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..941a107c7b210a7f5e342887ce0ed52078283535
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 56.40435283333333,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 60.85,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 59.19,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 60.85,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 60.29666666666666,
+            "best_prompt": 60.85,
+            "prompt_id": "p1",
+            "CPS": 60.51329666666666,
+            "is_dummy": false,
+            "std_accuracy": 0.9584014468547809
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 49.2,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.24999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.73,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 51.059999999999995,
+            "best_prompt": 53.73,
+            "prompt_id": "p3",
+            "CPS": 52.29540899999999,
+            "is_dummy": false,
+            "std_accuracy": 2.371138966825857
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..2ed21a928d51a4f5feeaca60ee48601fcee48595
--- /dev/null
+++ b/e3c_llm_results/Qwen/Qwen3-30B-A3B-Instruct-2507_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 59.600933166666664,
+    "config": {
+        "model_name": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+        "base_model": "Qwen3MoeForCausalLM",
+        "revision": "61082d4deaa4785f64943b443cbc2b5de7524fad",
+        "submitted_time": "2025-07-28 07:31:27+00:00",
+        "num_params_billion": 30.532122624,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 66.14999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 59.440000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 66.14999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 63.913333333333334,
+            "best_prompt": 66.14999999999999,
+            "prompt_id": "p1",
+            "CPS": 64.670445,
+            "is_dummy": false,
+            "std_accuracy": 3.874020306262381
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 50.62,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.76,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 54.290000000000006,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 53.55666666666667,
+            "best_prompt": 55.76,
+            "prompt_id": "p2",
+            "CPS": 54.531421333333334,
+            "is_dummy": false,
+            "std_accuracy": 2.647306807556189
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B_0_EN-checkpoint.json b/e3c_llm_results/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B_0_EN-checkpoint.json
new file mode 100644
index 0000000000000000000000000000000000000000..dd04460110df910bce59a8f73b3c97d40801845c
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/.ipynb_checkpoints/DeepSeek-R1-Distill-Qwen-32B_0_EN-checkpoint.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 15.443440999999998,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 19.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 34.589999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.08,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.766666666666666,
+            "best_prompt": 34.589999999999996,
+            "prompt_id": "p2",
+            "CPS": 32.575708999999996,
+            "is_dummy": false,
+            "std_accuracy": 8.01149382658024
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 44.87,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 44.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.11,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.29999999999999,
+            "best_prompt": 44.92,
+            "prompt_id": "p2",
+            "CPS": 44.641496,
+            "is_dummy": false,
+            "std_accuracy": 1.030873416089483
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_EN.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..dd04460110df910bce59a8f73b3c97d40801845c
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 15.443440999999998,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 19.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 34.589999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.08,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.766666666666666,
+            "best_prompt": 34.589999999999996,
+            "prompt_id": "p2",
+            "CPS": 32.575708999999996,
+            "is_dummy": false,
+            "std_accuracy": 8.01149382658024
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 44.87,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 44.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.11,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.29999999999999,
+            "best_prompt": 44.92,
+            "prompt_id": "p2",
+            "CPS": 44.641496,
+            "is_dummy": false,
+            "std_accuracy": 1.030873416089483
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_GR.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..25cf2fb05e184ba378fb7302f6d7a140af01235c
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 36.46137383333333,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 34.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 33.54,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 34.55,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 34.21333333333333,
+            "best_prompt": 34.55,
+            "prompt_id": "p1",
+            "CPS": 34.433681666666665,
+            "is_dummy": false,
+            "std_accuracy": 0.5831237718815209
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.060000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 39.47,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.02,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 34.85,
+            "best_prompt": 41.02,
+            "prompt_id": "p3",
+            "CPS": 38.489066,
+            "is_dummy": false,
+            "std_accuracy": 9.376497213778714
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_IT.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..de4b5cc7c3dcd289f74d0459f3eddfc020e1000c
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 15.963344799999998,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 26.779999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 35.68,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 34.14,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.199999999999996,
+            "best_prompt": 35.68,
+            "prompt_id": "p2",
+            "CPS": 34.438336,
+            "is_dummy": false,
+            "std_accuracy": 4.756595421096902
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 45.190000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 46.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 42.27,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.52333333333334,
+            "best_prompt": 46.11,
+            "prompt_id": "p2",
+            "CPS": 45.378388,
+            "is_dummy": false,
+            "std_accuracy": 2.0049272638510676
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_PL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..c3380d143512bf188b6b051e6eb3143b9b708e99
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 39.199796666666664,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.04,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 37.28,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.04,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 33.78666666666666,
+            "best_prompt": 37.28,
+            "prompt_id": "p2",
+            "CPS": 35.97768533333333,
+            "is_dummy": false,
+            "std_accuracy": 3.02531541055364
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.83,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 43.269999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.83,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.309999999999995,
+            "best_prompt": 43.269999999999996,
+            "prompt_id": "p2",
+            "CPS": 42.421907999999995,
+            "is_dummy": false,
+            "std_accuracy": 1.769519708847572
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SK.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..91392100d164b54aff38fbd3b4932bdf3ba9bc29
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 33.894328,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 28.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 19.05,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 28.29,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 25.209999999999997,
+            "best_prompt": 28.29,
+            "prompt_id": "p1",
+            "CPS": 27.418667999999997,
+            "is_dummy": false,
+            "std_accuracy": 5.334716487312141
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 38.93,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.910000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.93,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 39.59,
+            "best_prompt": 40.910000000000004,
+            "prompt_id": "p2",
+            "CPS": 40.369988000000006,
+            "is_dummy": false,
+            "std_accuracy": 1.1431535329954614
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..9e6bb1d1f11b95fdf5901fc0810dd74e60c7d0e5
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 34.339884000000005,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 28.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 21.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 28.1,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 26.040000000000003,
+            "best_prompt": 28.1,
+            "prompt_id": "p1",
+            "CPS": 27.521140000000003,
+            "is_dummy": false,
+            "std_accuracy": 3.568024663591887
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.160000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.15,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.160000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.156666666666666,
+            "best_prompt": 41.160000000000004,
+            "prompt_id": "p1",
+            "CPS": 41.15862800000001,
+            "is_dummy": false,
+            "std_accuracy": 0.005773502691899211
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..936b95d0c5b71f86f5615bc1dd8ed1828dbc0594
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 43.183429333333336,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 60.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 59.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.35,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.626666666666665,
+            "best_prompt": 60.24,
+            "prompt_id": "p1",
+            "CPS": 59.870528,
+            "is_dummy": false,
+            "std_accuracy": 0.5320087718575085
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.910000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 51.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 52.73,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 52.21,
+            "best_prompt": 52.73,
+            "prompt_id": "p3",
+            "CPS": 52.455804,
+            "is_dummy": false,
+            "std_accuracy": 0.45210618221828913
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.690000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 15.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 26.33,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.683333333333334,
+            "best_prompt": 26.33,
+            "prompt_id": "p3",
+            "CPS": 24.053332666666666,
+            "is_dummy": false,
+            "std_accuracy": 7.672192211704117
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 31.169999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 24.16,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.829999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 23.386666666666667,
+            "best_prompt": 31.169999999999998,
+            "prompt_id": "p1",
+            "CPS": 28.743935,
+            "is_dummy": false,
+            "std_accuracy": 8.197404060636106
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.22,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 51.910000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.71,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 48.28,
+            "best_prompt": 53.71,
+            "prompt_id": "p3",
+            "CPS": 50.793547,
+            "is_dummy": false,
+            "std_accuracy": 7.897638887667632
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..e9a1b10c2d587985c1d8fe4e6d8bd1de98a23aa8
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 52.0035325,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.28,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 57.96,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.28,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 58.84,
+            "best_prompt": 59.28,
+            "prompt_id": "p1",
+            "CPS": 59.019168,
+            "is_dummy": false,
+            "std_accuracy": 0.7621023553303061
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 44.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 42.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 45.69,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.153333333333336,
+            "best_prompt": 45.69,
+            "prompt_id": "p3",
+            "CPS": 44.987897,
+            "is_dummy": false,
+            "std_accuracy": 1.8499279265239843
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..4a1146fa77865ec03196283d24cc77823d10eae3
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 39.245102200000005,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 69.82000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 66.79,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 69.3,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 68.63666666666667,
+            "best_prompt": 69.82000000000001,
+            "prompt_id": "p1",
+            "CPS": 68.99379666666667,
+            "is_dummy": false,
+            "std_accuracy": 1.620257181231834
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 55.46,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.26,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.17999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 55.29999999999999,
+            "best_prompt": 55.46,
+            "prompt_id": "p1",
+            "CPS": 55.37126399999999,
+            "is_dummy": false,
+            "std_accuracy": 0.1442220510185634
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 3.08,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 1.7399999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 12.280000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.7,
+            "best_prompt": 12.280000000000001,
+            "prompt_id": "p3",
+            "CPS": 11.471976000000002,
+            "is_dummy": false,
+            "std_accuracy": 5.7376998875856176
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 27.950000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.2,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 31.81,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 29.653333333333336,
+            "best_prompt": 31.81,
+            "prompt_id": "p3",
+            "CPS": 31.123964333333333,
+            "is_dummy": false,
+            "std_accuracy": 1.9695261697508175
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 26.3,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 28.939999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.30333333333333,
+            "best_prompt": 29.67,
+            "prompt_id": "p2",
+            "CPS": 29.26451,
+            "is_dummy": false,
+            "std_accuracy": 1.7729166177046605
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..6cfece54a11e960b9587ebb01ba6abd137f95bff
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 56.36566883333333,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 62.13999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 61.4,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 62.13999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 61.893333333333324,
+            "best_prompt": 62.13999999999999,
+            "prompt_id": "p1",
+            "CPS": 61.98672133333332,
+            "is_dummy": false,
+            "std_accuracy": 0.4272391992003201
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 48.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 51.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 50.760000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 50.22666666666667,
+            "best_prompt": 51.29,
+            "prompt_id": "p2",
+            "CPS": 50.74461633333333,
+            "is_dummy": false,
+            "std_accuracy": 1.4079180847383597
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..d870eae3dfe93c6e3a37518ac5516b4b8939f743
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 55.21981100000001,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 63.470000000000006,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 62.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 63.470000000000006,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 63.01666666666667,
+            "best_prompt": 63.470000000000006,
+            "prompt_id": "p1",
+            "CPS": 63.182269333333345,
+            "is_dummy": false,
+            "std_accuracy": 0.7851963660978948
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 47.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 44.51,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 46.89,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 46.46333333333333,
+            "best_prompt": 47.99,
+            "prompt_id": "p1",
+            "CPS": 47.25735266666666,
+            "is_dummy": false,
+            "std_accuracy": 1.7788010943704022
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..c5ae7fd6a6dea4eff0ea12a15fd6ef4b7731915e
--- /dev/null
+++ b/e3c_llm_results/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 55.28181983333334,
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
+        "base_model": "Qwen2ForCausalLM",
+        "revision": "711ad2ea6aa40cfca18895e8aca02ab92df1a746",
+        "submitted_time": "2025-01-20 09:19:00+00:00",
+        "num_params_billion": 32.763876352,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 60.150000000000006,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 60.49,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 60.150000000000006,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 60.26333333333334,
+            "best_prompt": 60.49,
+            "prompt_id": "p2",
+            "CPS": 60.35288933333334,
+            "is_dummy": false,
+            "std_accuracy": 0.19629909152447061
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.370000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 46.739999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 49.230000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 49.11333333333334,
+            "best_prompt": 51.370000000000005,
+            "prompt_id": "p1",
+            "CPS": 50.21075033333334,
+            "is_dummy": false,
+            "std_accuracy": 2.31720377466751
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_EN.json b/e3c_llm_results/epfl-llm/meditron-7b_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..018a81f3289f9a28a0c0a980f65758d256cba978
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 2.6316848,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 5.779999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.1000000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.48,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.12,
+            "best_prompt": 8.48,
+            "prompt_id": "p3",
+            "CPS": 8.279872000000001,
+            "is_dummy": false,
+            "std_accuracy": 2.209705862779026
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.42,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 4.97,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 3.1300000000000003,
+            "best_prompt": 4.97,
+            "prompt_id": "p3",
+            "CPS": 4.878552,
+            "is_dummy": false,
+            "std_accuracy": 2.7245733610971095
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_GR.json b/e3c_llm_results/epfl-llm/meditron-7b_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..194e574bef7a459af27ebd8d155bcbe19e26b253
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 19.223575999999998,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.169999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 24.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 24.169999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 24.256666666666664,
+            "best_prompt": 24.43,
+            "prompt_id": "p2",
+            "CPS": 24.387654666666666,
+            "is_dummy": false,
+            "std_accuracy": 0.1501110699893036
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.559999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 1.6099999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.58,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.916666666666665,
+            "best_prompt": 15.559999999999999,
+            "prompt_id": "p1",
+            "CPS": 14.059497333333331,
+            "is_dummy": false,
+            "std_accuracy": 8.367235704420745
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_IT.json b/e3c_llm_results/epfl-llm/meditron-7b_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..38c4e448e165f577c82fc85cbdebae2679fd0970
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 5.052730066666666,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.7299999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.12,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 5.3100000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.386666666666667,
+            "best_prompt": 7.7299999999999995,
+            "prompt_id": "p1",
+            "CPS": 7.626160333333333,
+            "is_dummy": false,
+            "std_accuracy": 1.2318414400130124
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.2,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 19.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 12.68,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.723333333333334,
+            "best_prompt": 19.29,
+            "prompt_id": "p2",
+            "CPS": 17.63749,
+            "is_dummy": false,
+            "std_accuracy": 9.694247435120136
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_PL.json b/e3c_llm_results/epfl-llm/meditron-7b_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..d8fa46e41e6c78724b244eba4ac7fe27fd493609
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 6.367811666666667,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.4,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 12.030000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 11.4,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 11.61,
+            "best_prompt": 12.030000000000001,
+            "prompt_id": "p2",
+            "CPS": 11.979474000000002,
+            "is_dummy": false,
+            "std_accuracy": 0.3637306695894647
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.76,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.25333333333333335,
+            "best_prompt": 0.76,
+            "prompt_id": "p3",
+            "CPS": 0.7561493333333333,
+            "is_dummy": false,
+            "std_accuracy": 0.4387862045841156
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_SK.json b/e3c_llm_results/epfl-llm/meditron-7b_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..2be265b8f56d0cf4853f7945c234a0b3edf38821
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 4.508018000000001,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.74,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 5.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.74,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.780000000000001,
+            "best_prompt": 8.74,
+            "prompt_id": "p1",
+            "CPS": 8.656096000000002,
+            "is_dummy": false,
+            "std_accuracy": 1.6627687752661222
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.36,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.31,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.36,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.34333333333333327,
+            "best_prompt": 0.36,
+            "prompt_id": "p1",
+            "CPS": 0.35994,
+            "is_dummy": false,
+            "std_accuracy": 0.02886751345948128
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_0_SL.json b/e3c_llm_results/epfl-llm/meditron-7b_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..fc240fc571811ec62943a25b58191a669fa86a0b
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 8.782022166666668,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.97,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.6,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 11.97,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 9.513333333333334,
+            "best_prompt": 11.97,
+            "prompt_id": "p1",
+            "CPS": 11.675937000000001,
+            "is_dummy": false,
+            "std_accuracy": 4.2550714839275425
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 5.9799999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 1.37,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 5.9799999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 4.4433333333333325,
+            "best_prompt": 5.9799999999999995,
+            "prompt_id": "p1",
+            "CPS": 5.888107333333333,
+            "is_dummy": false,
+            "std_accuracy": 2.6615847409641744
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json b/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..ae1d76af70eafafedd20c46cd853652733afa6ba
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 4.328597533333334,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 14.790000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.540000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.453333333333333,
+            "best_prompt": 14.790000000000001,
+            "prompt_id": "p2",
+            "CPS": 14.444407000000002,
+            "is_dummy": false,
+            "std_accuracy": 3.8327579278286468
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.22,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 6.63,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.923333333333333,
+            "best_prompt": 7.22,
+            "prompt_id": "p1",
+            "CPS": 7.1985806666666665,
+            "is_dummy": false,
+            "std_accuracy": 0.29501412395567317
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json b/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..f6eb5f9a3ff99a0bfa6d9e32342db44792911c81
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 0.0,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json b/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d58714176f8b36fa7af44fb3e9a44ef54edc8e4
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 8.8522688,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 29.909999999999997,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 35.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 33.11,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.88333333333333,
+            "best_prompt": 35.63,
+            "prompt_id": "p2",
+            "CPS": 34.651362666666664,
+            "is_dummy": false,
+            "std_accuracy": 2.8667286814997595
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 8.870000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 9.68,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 8.956666666666667,
+            "best_prompt": 9.68,
+            "prompt_id": "p3",
+            "CPS": 9.609981333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.6841296173484472
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json b/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..8336b4911a828762b7c245545f66e08f250a2dd8
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 19.029036333333334,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 31.840000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 32.97,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 31.840000000000003,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.21666666666667,
+            "best_prompt": 32.97,
+            "prompt_id": "p2",
+            "CPS": 32.721626,
+            "is_dummy": false,
+            "std_accuracy": 0.6524058041842745
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 5.33,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 5.35,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.096666666666667,
+            "best_prompt": 5.35,
+            "prompt_id": "p3",
+            "CPS": 5.336446666666666,
+            "is_dummy": false,
+            "std_accuracy": 0.4215843134336631
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json b/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..1886c27e669de562ad32dc3485a3f09609b48b52
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 17.218929,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 30.04,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 30.04,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 29.926666666666666,
+            "best_prompt": 30.04,
+            "prompt_id": "p1",
+            "CPS": 30.005954666666668,
+            "is_dummy": false,
+            "std_accuracy": 0.19629909152447267
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 4.45,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.93,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 3.75,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 4.043333333333334,
+            "best_prompt": 4.45,
+            "prompt_id": "p1",
+            "CPS": 4.4319033333333335,
+            "is_dummy": false,
+            "std_accuracy": 0.36350149013908234
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json b/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..eccf555e2e2f848343b45cbd8f7740a5095c91ec
--- /dev/null
+++ b/e3c_llm_results/epfl-llm/meditron-7b_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 18.122609833333332,
+    "config": {
+        "model_name": "epfl-llm/meditron-7b",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "epfl-llm/meditron-7b",
+        "base_model": "LlamaForCausalLM",
+        "revision": "d7d0a5ed929384a6b059ac74198cf1d71f44ba76",
+        "submitted_time": "2023-11-08 16:03:23+00:00",
+        "num_params_billion": 6.73855488,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 31.19,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.160000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 31.19,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 30.513333333333335,
+            "best_prompt": 31.19,
+            "prompt_id": "p1",
+            "CPS": 30.978947666666667,
+            "is_dummy": false,
+            "std_accuracy": 1.172021046454939
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 4.77,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 5.01,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 5.28,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.02,
+            "best_prompt": 5.28,
+            "prompt_id": "p3",
+            "CPS": 5.266272,
+            "is_dummy": false,
+            "std_accuracy": 0.2551470164434618
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_EN.json b/e3c_llm_results/google/gemma-2-9b-it_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..ca744fc0dbc71cbb66a0e945c34fcdb4a13f2c60
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 20.2609912,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 51.739999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.7,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 46.03666666666667,
+            "best_prompt": 53.7,
+            "prompt_id": "p3",
+            "CPS": 49.584790000000005,
+            "is_dummy": false,
+            "std_accuracy": 11.617281667125631
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.6,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 42.05,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.67,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.10666666666667,
+            "best_prompt": 43.6,
+            "prompt_id": "p1",
+            "CPS": 42.948906666666666,
+            "is_dummy": false,
+            "std_accuracy": 1.4658217263137197
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.06,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 7.9399999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 2.6666666666666665,
+            "best_prompt": 7.9399999999999995,
+            "prompt_id": "p2",
+            "CPS": 7.521297333333333,
+            "is_dummy": false,
+            "std_accuracy": 4.566939164619267
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.13,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 1.26,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.4633333333333334,
+            "best_prompt": 1.26,
+            "prompt_id": "p2",
+            "CPS": 1.249962,
+            "is_dummy": false,
+            "std_accuracy": 0.6929886963965093
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_GR.json b/e3c_llm_results/google/gemma-2-9b-it_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..3c3c8c09ca78762e8e2930ec0efeca6d88ea98e4
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 47.411836666666666,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 55.489999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 47.77,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.489999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 52.916666666666664,
+            "best_prompt": 55.489999999999995,
+            "prompt_id": "p1",
+            "CPS": 54.06205733333333,
+            "is_dummy": false,
+            "std_accuracy": 4.457144078143906
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 39.57,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.43,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.080000000000005,
+            "best_prompt": 41.24,
+            "prompt_id": "p1",
+            "CPS": 40.761616000000004,
+            "is_dummy": false,
+            "std_accuracy": 1.0070253224224317
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_IT.json b/e3c_llm_results/google/gemma-2-9b-it_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..1582693b99165fb210f40f52cf2ca3056ab2a3c1
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 21.524752466666662,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 57.38999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 65.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 62.1,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 61.57666666666666,
+            "best_prompt": 65.24,
+            "prompt_id": "p2",
+            "CPS": 62.85004133333333,
+            "is_dummy": false,
+            "std_accuracy": 3.9510800211250268
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 45.85,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.13,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.959999999999994,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.98,
+            "best_prompt": 45.85,
+            "prompt_id": "p1",
+            "CPS": 44.534105,
+            "is_dummy": false,
+            "std_accuracy": 2.51990079169796
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.08,
+            "best_prompt": 0.24,
+            "prompt_id": "p1",
+            "CPS": 0.23961599999999997,
+            "is_dummy": false,
+            "std_accuracy": 0.13856406460551018
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_PL.json b/e3c_llm_results/google/gemma-2-9b-it_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..5e6d2589bc155ee5a52d512fe4a78b3900afef8d
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 41.18764683333333,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 40.6,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.6,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.916666666666664,
+            "best_prompt": 41.55,
+            "prompt_id": "p2",
+            "CPS": 41.28685,
+            "is_dummy": false,
+            "std_accuracy": 0.548482755730142
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 36.74,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 42.71,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 37.29,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 38.913333333333334,
+            "best_prompt": 42.71,
+            "prompt_id": "p2",
+            "CPS": 41.08844366666666,
+            "is_dummy": false,
+            "std_accuracy": 3.2994898595591007
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_SK.json b/e3c_llm_results/google/gemma-2-9b-it_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..27b21560efcc055b5b36ef06708ed28d4cb7365a
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 45.32347,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 48.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 45.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 48.75,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 47.75,
+            "best_prompt": 48.75,
+            "prompt_id": "p1",
+            "CPS": 48.2625,
+            "is_dummy": false,
+            "std_accuracy": 1.7320508075688772
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.89,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 43.4,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.89,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.059999999999995,
+            "best_prompt": 43.4,
+            "prompt_id": "p2",
+            "CPS": 42.38444,
+            "is_dummy": false,
+            "std_accuracy": 2.0264994448555855
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_0_SL.json b/e3c_llm_results/google/gemma-2-9b-it_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d9bb91731ce43227d4af908c2fe375610387da6
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 43.368616,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 47.07,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.46,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 47.07,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.86666666666667,
+            "best_prompt": 47.07,
+            "prompt_id": "p1",
+            "CPS": 46.032891,
+            "is_dummy": false,
+            "std_accuracy": 3.816285279343426
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 40.79,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.160000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.79,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.580000000000005,
+            "best_prompt": 40.79,
+            "prompt_id": "p1",
+            "CPS": 40.704341,
+            "is_dummy": false,
+            "std_accuracy": 0.3637306695894616
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_EN.json b/e3c_llm_results/google/gemma-2-9b-it_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..f38a35dab7386200cb87d83d2674aaea7b3c5916
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 39.772889533333334,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 62.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.38999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.18,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.19,
+            "best_prompt": 62.0,
+            "prompt_id": "p1",
+            "CPS": 60.257799999999996,
+            "is_dummy": false,
+            "std_accuracy": 2.8050133689521015
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.629999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 53.37,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 54.09,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 53.03,
+            "best_prompt": 54.09,
+            "prompt_id": "p3",
+            "CPS": 53.516646,
+            "is_dummy": false,
+            "std_accuracy": 1.2647529403009938
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 29.509999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 33.879999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.62,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.00333333333333,
+            "best_prompt": 33.879999999999995,
+            "prompt_id": "p2",
+            "CPS": 33.244185333333334,
+            "is_dummy": false,
+            "std_accuracy": 2.2493184152834673
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 31.180000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 27.37,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 5.06,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 21.203333333333337,
+            "best_prompt": 31.180000000000003,
+            "prompt_id": "p1",
+            "CPS": 28.069275333333337,
+            "is_dummy": false,
+            "std_accuracy": 14.109728322449493
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 10.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 26.490000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 11.58,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 16.246666666666666,
+            "best_prompt": 26.490000000000002,
+            "prompt_id": "p2",
+            "CPS": 23.776541,
+            "is_dummy": false,
+            "std_accuracy": 8.882647878495092
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_GR.json b/e3c_llm_results/google/gemma-2-9b-it_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..c26a8a55cdd19623c0ff0f5e712de60c7e6ad80b
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 56.44067866666667,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 60.83,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 60.83,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.43000000000001,
+            "best_prompt": 60.83,
+            "prompt_id": "p1",
+            "CPS": 59.97838,
+            "is_dummy": false,
+            "std_accuracy": 2.4248711305964257
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 50.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 49.71,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 54.44,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 51.61666666666667,
+            "best_prompt": 54.44,
+            "prompt_id": "p3",
+            "CPS": 52.90297733333333,
+            "is_dummy": false,
+            "std_accuracy": 2.494681008332192
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_IT.json b/e3c_llm_results/google/gemma-2-9b-it_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..44ec606d3bfdffeaff629bc4b3693a101b991da2
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 39.260664733333336,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 69.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 66.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 65.69,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 67.07333333333334,
+            "best_prompt": 69.1,
+            "prompt_id": "p1",
+            "CPS": 67.69957333333333,
+            "is_dummy": false,
+            "std_accuracy": 1.7937205282131663
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 49.58,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 53.65,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.05,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 52.09333333333333,
+            "best_prompt": 53.65,
+            "prompt_id": "p2",
+            "CPS": 52.81484833333333,
+            "is_dummy": false,
+            "std_accuracy": 2.197187596299718
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 17.9,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.53,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 10.84,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 15.089999999999998,
+            "best_prompt": 17.9,
+            "prompt_id": "p1",
+            "CPS": 17.397009999999998,
+            "is_dummy": false,
+            "std_accuracy": 3.743808221584006
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.879999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 9.19,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 27.47333333333333,
+            "best_prompt": 40.35,
+            "prompt_id": "p2",
+            "CPS": 35.154265,
+            "is_dummy": false,
+            "std_accuracy": 16.268387545584638
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.51,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 26.529999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.32,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 14.12,
+            "best_prompt": 26.529999999999998,
+            "prompt_id": "p2",
+            "CPS": 23.237627,
+            "is_dummy": false,
+            "std_accuracy": 10.763832960428175
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_PL.json b/e3c_llm_results/google/gemma-2-9b-it_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..19e3421c8fad4a9a3a529ae9e17b4edd75aa67cc
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 54.98672666666667,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.08,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.620000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.08,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 58.92666666666667,
+            "best_prompt": 59.08,
+            "prompt_id": "p1",
+            "CPS": 58.98941066666667,
+            "is_dummy": false,
+            "std_accuracy": 0.26558112382722426
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.68000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 48.08,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.239999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 50.333333333333336,
+            "best_prompt": 51.68000000000001,
+            "prompt_id": "p1",
+            "CPS": 50.98404266666667,
+            "is_dummy": false,
+            "std_accuracy": 1.9638058288266032
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_SK.json b/e3c_llm_results/google/gemma-2-9b-it_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..246c36ffcc5a012bf8167c5bc372e6ba6b8a1a06
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 56.074384499999994,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 61.41,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 61.22,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.41,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 61.346666666666664,
+            "best_prompt": 61.41,
+            "prompt_id": "p1",
+            "CPS": 61.371106999999995,
+            "is_dummy": false,
+            "std_accuracy": 0.10969655114602758
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.53,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 47.54,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.13999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 50.06999999999999,
+            "best_prompt": 51.53,
+            "prompt_id": "p1",
+            "CPS": 50.777662,
+            "is_dummy": false,
+            "std_accuracy": 2.199704525612473
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-2-9b-it_10_SL.json b/e3c_llm_results/google/gemma-2-9b-it_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c09f9ec64339ddf7b3b9651d9148fa1453ebda0
--- /dev/null
+++ b/e3c_llm_results/google/gemma-2-9b-it_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 55.7992,
+    "config": {
+        "model_name": "google/gemma-2-9b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "google/gemma-2-9b-it",
+        "base_model": "Gemma2ForCausalLM",
+        "revision": "11c9b309abf73637e4b6f9a3fa1e92e615547819",
+        "submitted_time": "2024-06-24 08:05:41+00:00",
+        "num_params_billion": 9.241705984,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 63.65,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 57.37,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 63.65,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 61.556666666666665,
+            "best_prompt": 63.65,
+            "prompt_id": "p1",
+            "CPS": 62.31759333333333,
+            "is_dummy": false,
+            "std_accuracy": 3.6257596905108507
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 48.010000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 48.78,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 49.72,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 48.836666666666666,
+            "best_prompt": 49.72,
+            "prompt_id": "p3",
+            "CPS": 49.28080666666666,
+            "is_dummy": false,
+            "std_accuracy": 0.8564072240081397
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_EN.json b/e3c_llm_results/google/gemma-3-27b-it_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..b61e5f4cf20e438a1a1220080d2dbdbdc8fac0f9
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 23.81513066666667,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 54.459999999999994,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.3,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.94,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 54.9,
+            "best_prompt": 58.3,
+            "prompt_id": "p2",
+            "CPS": 56.3178,
+            "is_dummy": false,
+            "std_accuracy": 3.202748819373758
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 45.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 45.82,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 47.43,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 46.22666666666667,
+            "best_prompt": 47.43,
+            "prompt_id": "p3",
+            "CPS": 46.859259,
+            "is_dummy": false,
+            "std_accuracy": 1.0602043828117922
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.590000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 12.13,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 9.24,
+            "best_prompt": 15.590000000000002,
+            "prompt_id": "p1",
+            "CPS": 14.600035000000002,
+            "is_dummy": false,
+            "std_accuracy": 8.186946927884657
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 1.31,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.4366666666666667,
+            "best_prompt": 1.31,
+            "prompt_id": "p3",
+            "CPS": 1.2985593333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.7563288526384098
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_GR.json b/e3c_llm_results/google/gemma-3-27b-it_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..108468c36515deb23bfc1b1c6e5e2eb67bd70a35
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 50.113703,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 48.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 57.21000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 48.66,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 51.51,
+            "best_prompt": 57.21000000000001,
+            "prompt_id": "p2",
+            "CPS": 53.94903000000001,
+            "is_dummy": false,
+            "std_accuracy": 4.936344801571307
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.550000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 46.949999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 47.69,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.73,
+            "best_prompt": 47.69,
+            "prompt_id": "p3",
+            "CPS": 46.278376,
+            "is_dummy": false,
+            "std_accuracy": 4.501244272420679
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_IT.json b/e3c_llm_results/google/gemma-3-27b-it_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..1addb51066625c0689aa893038bd259be21472a6
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 24.543379466666668,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 55.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 66.97,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.540000000000006,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 60.64666666666667,
+            "best_prompt": 66.97,
+            "prompt_id": "p2",
+            "CPS": 62.73526366666667,
+            "is_dummy": false,
+            "std_accuracy": 5.849054054574408
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.9,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 48.949999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 49.27,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 47.373333333333335,
+            "best_prompt": 49.27,
+            "prompt_id": "p3",
+            "CPS": 48.335512333333334,
+            "is_dummy": false,
+            "std_accuracy": 3.0122472231431034
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.34,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.1499999999999995,
+            "best_prompt": 12.34,
+            "prompt_id": "p1",
+            "CPS": 11.576154,
+            "is_dummy": false,
+            "std_accuracy": 6.170097243966256
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.06999999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.02333333333333333,
+            "best_prompt": 0.06999999999999999,
+            "prompt_id": "p3",
+            "CPS": 0.06996733333333333,
+            "is_dummy": false,
+            "std_accuracy": 0.0404145188432738
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_PL.json b/e3c_llm_results/google/gemma-3-27b-it_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..394c056ed0e89819ffe5b326d2a6ea06595b9a44
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 44.29942833333334,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 45.06,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 45.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 45.06,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 45.076666666666675,
+            "best_prompt": 45.11,
+            "prompt_id": "p2",
+            "CPS": 45.09496333333333,
+            "is_dummy": false,
+            "std_accuracy": 0.028867513459479646
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.84,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 42.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 42.71,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 43.07333333333333,
+            "best_prompt": 43.84,
+            "prompt_id": "p1",
+            "CPS": 43.50389333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.6642539674953661
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_SK.json b/e3c_llm_results/google/gemma-3-27b-it_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..47d69384014d165501a8895f4c2647723f737797
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 37.22623216666667,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 31.830000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 21.57,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 31.830000000000002,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.41,
+            "best_prompt": 31.830000000000002,
+            "prompt_id": "p1",
+            "CPS": 30.741414000000002,
+            "is_dummy": false,
+            "std_accuracy": 5.923613761885561
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.730000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 43.6,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.730000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 43.68666666666667,
+            "best_prompt": 43.730000000000004,
+            "prompt_id": "p1",
+            "CPS": 43.71105033333333,
+            "is_dummy": false,
+            "std_accuracy": 0.07505553499465283
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_0_SL.json b/e3c_llm_results/google/gemma-3-27b-it_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..37b56e19ea639bdcaed35eb2d06922b7629a9d0a
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 45.01248166666667,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 47.83,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.7,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 45.076666666666675,
+            "best_prompt": 47.83,
+            "prompt_id": "p2",
+            "CPS": 46.513080666666674,
+            "is_dummy": false,
+            "std_accuracy": 2.384456611753152
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 42.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 43.91,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 42.55,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 43.00333333333333,
+            "best_prompt": 43.91,
+            "prompt_id": "p2",
+            "CPS": 43.511882666666665,
+            "is_dummy": false,
+            "std_accuracy": 0.7851963660978907
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_EN.json b/e3c_llm_results/google/gemma-3-27b-it_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..9171719ae6e1e33c6212b6f7e158092c14b22229
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 45.43019166666667,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 61.6,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 63.080000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 60.940000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 61.873333333333335,
+            "best_prompt": 63.080000000000005,
+            "prompt_id": "p2",
+            "CPS": 62.318834666666675,
+            "is_dummy": false,
+            "std_accuracy": 1.0958710386415615
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.910000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.00000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 57.64,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 55.18333333333334,
+            "best_prompt": 57.64,
+            "prompt_id": "p3",
+            "CPS": 56.22397733333334,
+            "is_dummy": false,
+            "std_accuracy": 2.9510054783638284
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.71,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 33.01,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 33.42,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 33.04666666666667,
+            "best_prompt": 33.42,
+            "prompt_id": "p3",
+            "CPS": 33.295232,
+            "is_dummy": false,
+            "std_accuracy": 0.3564173583501984
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 40.22,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 38.58,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.28,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 29.026666666666667,
+            "best_prompt": 40.22,
+            "prompt_id": "p1",
+            "CPS": 35.71804133333333,
+            "is_dummy": false,
+            "std_accuracy": 17.985842580578016
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.490000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 48.209999999999994,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 18.32,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 30.339999999999993,
+            "best_prompt": 48.209999999999994,
+            "prompt_id": "p2",
+            "CPS": 39.594873,
+            "is_dummy": false,
+            "std_accuracy": 15.78036438109082
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_GR.json b/e3c_llm_results/google/gemma-3-27b-it_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..41270bebf28ef0e6e8f33f0d0e737baca795023d
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 60.327389833333335,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 65.51,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 66.08000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 65.51,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 65.7,
+            "best_prompt": 66.08000000000001,
+            "prompt_id": "p2",
+            "CPS": 65.828896,
+            "is_dummy": false,
+            "std_accuracy": 0.32908965343809093
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 50.83,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.50000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.81,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 54.046666666666674,
+            "best_prompt": 55.81,
+            "prompt_id": "p3",
+            "CPS": 54.82588366666667,
+            "is_dummy": false,
+            "std_accuracy": 2.7900238947602856
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_IT.json b/e3c_llm_results/google/gemma-3-27b-it_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..70a2804c92f0a83e3005afb3f9db21e907f4043d
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 43.0060486,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 71.41999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 69.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 72.11999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 71.15333333333332,
+            "best_prompt": 72.11999999999999,
+            "prompt_id": "p3",
+            "CPS": 71.42284,
+            "is_dummy": false,
+            "std_accuracy": 1.1239810200058178
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 52.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.37,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 57.86,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 56.15333333333333,
+            "best_prompt": 58.37,
+            "prompt_id": "p2",
+            "CPS": 57.07613166666666,
+            "is_dummy": false,
+            "std_accuracy": 3.40726185276878
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 19.650000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 24.87,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 18.740000000000002,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 21.08666666666667,
+            "best_prompt": 24.87,
+            "prompt_id": "p2",
+            "CPS": 23.929085000000004,
+            "is_dummy": false,
+            "std_accuracy": 3.307904674160567
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 57.32000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 34.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.44,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 37.73,
+            "best_prompt": 57.32000000000001,
+            "prompt_id": "p1",
+            "CPS": 46.091012,
+            "is_dummy": false,
+            "std_accuracy": 18.166207639460694
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 13.469999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.83,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.433333333333332,
+            "best_prompt": 17.83,
+            "prompt_id": "p3",
+            "CPS": 16.511174333333333,
+            "is_dummy": false,
+            "std_accuracy": 9.294796035058182
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_PL.json b/e3c_llm_results/google/gemma-3-27b-it_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..bd7b8c17d93e86263091569c6493ab58ca0ed609
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 61.5666635,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 65.91,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 66.72,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 65.91,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 66.17999999999999,
+            "best_prompt": 66.72,
+            "prompt_id": "p2",
+            "CPS": 66.35971199999999,
+            "is_dummy": false,
+            "std_accuracy": 0.4676537180435982
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 57.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.010000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.800000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 55.92000000000001,
+            "best_prompt": 57.95,
+            "prompt_id": "p1",
+            "CPS": 56.77361500000001,
+            "is_dummy": false,
+            "std_accuracy": 2.0764633394307728
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_SK.json b/e3c_llm_results/google/gemma-3-27b-it_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..f3f69756b16bd7a976c5a541a45af486df4bc2b0
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 59.623766999999994,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 67.36999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 68.85,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 67.36999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 67.86333333333333,
+            "best_prompt": 68.85,
+            "prompt_id": "p2",
+            "CPS": 68.17067999999999,
+            "is_dummy": false,
+            "std_accuracy": 0.8544783984006484
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.03,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 50.949999999999996,
+            "best_prompt": 51.21,
+            "prompt_id": "p1",
+            "CPS": 51.076854,
+            "is_dummy": false,
+            "std_accuracy": 0.30789608636681387
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/gemma-3-27b-it_10_SL.json b/e3c_llm_results/google/gemma-3-27b-it_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..bd7c4e6c8176c167ca6687ba039808406d62be64
--- /dev/null
+++ b/e3c_llm_results/google/gemma-3-27b-it_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 59.561417000000006,
+    "config": {
+        "model_name": "google/gemma-3-27b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "google/gemma-3-27b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "005ad3404e59d6023443cb575daa05336842228a",
+        "submitted_time": "2025-03-01 19:10:19+00:00",
+        "num_params_billion": 27.43240664,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 67.5,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 69.17999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 67.5,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 68.06,
+            "best_prompt": 69.17999999999999,
+            "prompt_id": "p2",
+            "CPS": 68.405184,
+            "is_dummy": false,
+            "std_accuracy": 0.9699484522385671
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.49,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 47.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.449999999999996,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 49.99,
+            "best_prompt": 51.49,
+            "prompt_id": "p1",
+            "CPS": 50.71765,
+            "is_dummy": false,
+            "std_accuracy": 2.5635132143213135
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_EN.json b/e3c_llm_results/google/medgemma-27b-text-it_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f7b638b4f5148f10d1ac8a6fcc8ac73b0120493
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 21.581613666666666,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 38.42,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 60.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.559999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 50.11000000000001,
+            "best_prompt": 60.35,
+            "prompt_id": "p2",
+            "CPS": 54.17016,
+            "is_dummy": false,
+            "std_accuracy": 11.036670693646702
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 48.36,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 47.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 44.43,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 46.80666666666667,
+            "best_prompt": 48.36,
+            "prompt_id": "p1",
+            "CPS": 47.608808,
+            "is_dummy": false,
+            "std_accuracy": 2.0903667939702197
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 6.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.27,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 3.1666666666666665,
+            "best_prompt": 6.23,
+            "prompt_id": "p1",
+            "CPS": 6.039154333333334,
+            "is_dummy": false,
+            "std_accuracy": 3.116285181643897
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.09,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.03,
+            "best_prompt": 0.09,
+            "prompt_id": "p3",
+            "CPS": 0.089946,
+            "is_dummy": false,
+            "std_accuracy": 0.05196152422706632
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_GR.json b/e3c_llm_results/google/medgemma-27b-text-it_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..1ae8f09585013a20de7ea4216e6178d6ba673b9e
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 50.34454,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 53.14,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 61.260000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.14,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 55.84666666666667,
+            "best_prompt": 61.260000000000005,
+            "prompt_id": "p2",
+            "CPS": 57.943792,
+            "is_dummy": false,
+            "std_accuracy": 4.688084185819764
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 40.69,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 43.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.97,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.99333333333333,
+            "best_prompt": 43.32,
+            "prompt_id": "p2",
+            "CPS": 42.745288,
+            "is_dummy": false,
+            "std_accuracy": 1.3151552506580113
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_IT.json b/e3c_llm_results/google/medgemma-27b-text-it_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..a94b99de846b0b13135cb48d3a85113d66d96f44
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 21.714833866666666,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 42.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 62.12,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.82,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 53.51666666666666,
+            "best_prompt": 62.12,
+            "prompt_id": "p2",
+            "CPS": 56.77560933333333,
+            "is_dummy": false,
+            "std_accuracy": 9.9568586076801
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 40.42,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 49.16,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 46.04,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 45.20666666666667,
+            "best_prompt": 49.16,
+            "prompt_id": "p2",
+            "CPS": 47.21654133333333,
+            "is_dummy": false,
+            "std_accuracy": 4.429191047283162
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 4.72,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.64,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.03,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 1.7966666666666666,
+            "best_prompt": 4.72,
+            "prompt_id": "p1",
+            "CPS": 4.5820186666666665,
+            "is_dummy": false,
+            "std_accuracy": 2.54998692807107
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_PL.json b/e3c_llm_results/google/medgemma-27b-text-it_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..85973c072b147631fc1c74a559b0559f9b693e4a
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 43.305971666666665,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 42.16,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 43.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 42.16,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.449999999999996,
+            "best_prompt": 43.03,
+            "prompt_id": "p2",
+            "CPS": 42.780426,
+            "is_dummy": false,
+            "std_accuracy": 0.502294734194977
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.25,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 44.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 42.46,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 43.31666666666667,
+            "best_prompt": 44.24,
+            "prompt_id": "p2",
+            "CPS": 43.83151733333334,
+            "is_dummy": false,
+            "std_accuracy": 0.8918706931687655
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_SK.json b/e3c_llm_results/google/medgemma-27b-text-it_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..2fd3f810e8e542c5cac10a148bdc23ba143ceec5
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 36.36130216666666,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 29.709999999999997,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 29.709999999999997,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 23.36,
+            "best_prompt": 29.709999999999997,
+            "prompt_id": "p1",
+            "CPS": 27.823414999999997,
+            "is_dummy": false,
+            "std_accuracy": 10.99852262806237
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 45.31,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.95,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.403333333333336,
+            "best_prompt": 45.31,
+            "prompt_id": "p2",
+            "CPS": 44.89918933333333,
+            "is_dummy": false,
+            "std_accuracy": 0.7851963660978907
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_0_SL.json b/e3c_llm_results/google/medgemma-27b-text-it_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..2c55dad3d6016df0e351a987d060e5ae23fbba6b
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 46.321461,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 46.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.38,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 46.75,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 48.626666666666665,
+            "best_prompt": 52.38,
+            "prompt_id": "p2",
+            "CPS": 50.414004,
+            "is_dummy": false,
+            "std_accuracy": 3.2504820155375946
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.82,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 42.39,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.82,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.01,
+            "best_prompt": 42.39,
+            "prompt_id": "p2",
+            "CPS": 42.228918,
+            "is_dummy": false,
+            "std_accuracy": 0.3290896534380868
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json b/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..a7b9e6bfcab853a76d30a8636e03c8eba85e45ab
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 50.544787533333334,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 63.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 61.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 64.55,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 63.23666666666666,
+            "best_prompt": 64.55,
+            "prompt_id": "p3",
+            "CPS": 63.70224333333332,
+            "is_dummy": false,
+            "std_accuracy": 1.4948355539434195
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 55.620000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 54.94,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.65,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 55.403333333333336,
+            "best_prompt": 55.65,
+            "prompt_id": "p3",
+            "CPS": 55.512730000000005,
+            "is_dummy": false,
+            "std_accuracy": 0.4015387071420824
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 35.82,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 33.550000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.49333333333334,
+            "best_prompt": 37.11,
+            "prompt_id": "p1",
+            "CPS": 36.510055,
+            "is_dummy": false,
+            "std_accuracy": 1.8023410702010108
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 54.800000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 4.91,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 33.27,
+            "best_prompt": 54.800000000000004,
+            "prompt_id": "p1",
+            "CPS": 43.00156,
+            "is_dummy": false,
+            "std_accuracy": 25.63668660338149
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 48.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 38.01,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.24000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 49.413333333333334,
+            "best_prompt": 61.24000000000001,
+            "prompt_id": "p3",
+            "CPS": 53.99734933333334,
+            "is_dummy": false,
+            "std_accuracy": 11.620784540354126
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json b/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..a45217ffcb35082fa5eb968b82d5a3430383c72a
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 62.99263766666667,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 68.36,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 68.46,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 68.36,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 68.39333333333333,
+            "best_prompt": 68.46,
+            "prompt_id": "p2",
+            "CPS": 68.41436,
+            "is_dummy": false,
+            "std_accuracy": 0.05773502691895929
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 53.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 57.8,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 56.79666666666666,
+            "best_prompt": 58.67,
+            "prompt_id": "p2",
+            "CPS": 57.57091533333333,
+            "is_dummy": false,
+            "std_accuracy": 2.528958942595417
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json b/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..fa0c4c7ef1eb0f4da3d0d96d993ffc4c2883e03f
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_10_IT.json
@@ -0,0 +1,151 @@
+{
+    "average_CPS": 50.36945153333333,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 72.61999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 70.05,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 71.335,
+            "best_prompt": 72.61999999999999,
+            "prompt_id": "p1",
+            "CPS": 71.686833,
+            "is_dummy": false,
+            "std_accuracy": 1.8172644276494223
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.19,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 62.35000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 57.26,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.6,
+            "best_prompt": 62.35000000000001,
+            "prompt_id": "p2",
+            "CPS": 60.635375,
+            "is_dummy": false,
+            "std_accuracy": 2.569649781585037
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 23.14,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 15.409999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 22.823333333333334,
+            "best_prompt": 29.92,
+            "prompt_id": "p2",
+            "CPS": 27.796677333333335,
+            "is_dummy": false,
+            "std_accuracy": 7.260181356779826
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 58.98,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 57.97,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 5.28,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.74333333333333,
+            "best_prompt": 58.98,
+            "prompt_id": "p1",
+            "CPS": 48.224014,
+            "is_dummy": false,
+            "std_accuracy": 30.71629914773805
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 52.65,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 52.849999999999994,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.166666666666664,
+            "best_prompt": 52.849999999999994,
+            "prompt_id": "p3",
+            "CPS": 43.50435833333333,
+            "is_dummy": false,
+            "std_accuracy": 30.455390874742246
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json b/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..58e3a004980b086e1f5fe175d7884784ff6b0a94
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 64.263205,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 68.28999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 67.15,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 68.28999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 67.91,
+            "best_prompt": 68.28999999999999,
+            "prompt_id": "p1",
+            "CPS": 68.030498,
+            "is_dummy": false,
+            "std_accuracy": 0.6581793068761655
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 59.4,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 61.33,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.18,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.97,
+            "best_prompt": 61.33,
+            "prompt_id": "p2",
+            "CPS": 60.495912000000004,
+            "is_dummy": false,
+            "std_accuracy": 1.1829201156460223
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json b/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..addddd61e05b8976e4a1d3fcae6adf1772776b69
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 61.554673333333334,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 71.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 71.27,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 71.43,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 71.37666666666667,
+            "best_prompt": 71.43,
+            "prompt_id": "p1",
+            "CPS": 71.391904,
+            "is_dummy": false,
+            "std_accuracy": 0.09237604307034636
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 51.88,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.71,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 51.56666666666667,
+            "best_prompt": 51.88,
+            "prompt_id": "p2",
+            "CPS": 51.71744266666667,
+            "is_dummy": false,
+            "std_accuracy": 0.40451617190581457
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json b/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..d15804eb624203d39811e58e97dd13af0a908992
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-27b-text-it_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 62.13607933333333,
+    "config": {
+        "model_name": "google/medgemma-27b-text-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "google/medgemma-27b-text-it",
+        "base_model": "Gemma3ForCausalLM",
+        "revision": "6b08c481126ff65a9b8fa5ab4d691b152b8edb5d",
+        "submitted_time": "2025-05-19 20:53:04+00:00",
+        "num_params_billion": 27.00900224,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 69.47,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 67.65,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 69.47,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 68.86333333333333,
+            "best_prompt": 69.47,
+            "prompt_id": "p1",
+            "CPS": 69.04854866666666,
+            "is_dummy": false,
+            "std_accuracy": 1.050777489925115
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 53.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.900000000000006,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 54.94,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 54.69,
+            "best_prompt": 55.900000000000006,
+            "prompt_id": "p2",
+            "CPS": 55.22361,
+            "is_dummy": false,
+            "std_accuracy": 1.352442235365345
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_0_EN.json b/e3c_llm_results/google/medgemma-4b-it_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..6edf00bce2f17fe607f9aba70e967c27036c0fab
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 11.784388666666667,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 26.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 25.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 27.37,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 26.25,
+            "best_prompt": 27.37,
+            "prompt_id": "p3",
+            "CPS": 27.063456000000002,
+            "is_dummy": false,
+            "std_accuracy": 1.173200750084997
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 20.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 32.57,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.029999999999994,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.516666666666662,
+            "best_prompt": 32.57,
+            "prompt_id": "p2",
+            "CPS": 31.24982933333333,
+            "is_dummy": false,
+            "std_accuracy": 6.558485597554768
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.5599999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.38999999999999996,
+            "best_prompt": 0.61,
+            "prompt_id": "p1",
+            "CPS": 0.608658,
+            "is_dummy": false,
+            "std_accuracy": 0.33867388443752194
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_0_GR.json b/e3c_llm_results/google/medgemma-4b-it_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..f9a4742960aa0eafa76003daeca3bd32305b543d
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 27.1538555,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 27.05,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 26.540000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 27.05,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 26.88,
+            "best_prompt": 27.05,
+            "prompt_id": "p1",
+            "CPS": 27.004015,
+            "is_dummy": false,
+            "std_accuracy": 0.294448637286708
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 23.810000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 30.240000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.539999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 20.53,
+            "best_prompt": 30.240000000000002,
+            "prompt_id": "p2",
+            "CPS": 27.303696000000002,
+            "is_dummy": false,
+            "std_accuracy": 11.70005555542366
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_0_IT.json b/e3c_llm_results/google/medgemma-4b-it_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..38072c731ca8a9c900bca0a8685958a7ed43acf5
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 10.7509926,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 31.569999999999997,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 26.27,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 30.04,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 29.293333333333333,
+            "best_prompt": 31.569999999999997,
+            "prompt_id": "p1",
+            "CPS": 30.851256333333332,
+            "is_dummy": false,
+            "std_accuracy": 2.727752432559327
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.54,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 24.610000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 6.88,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.67666666666667,
+            "best_prompt": 24.610000000000003,
+            "prompt_id": "p2",
+            "CPS": 22.903706666666668,
+            "is_dummy": false,
+            "std_accuracy": 9.47534871829704
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_0_PL.json b/e3c_llm_results/google/medgemma-4b-it_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..56ba3c111cec9f9a021e0fab94a16735dfc6be30
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 17.725084,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 22.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 21.83,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 22.55,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 22.31,
+            "best_prompt": 22.55,
+            "prompt_id": "p1",
+            "CPS": 22.49588,
+            "is_dummy": false,
+            "std_accuracy": 0.41569219381653194
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.5,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.139999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 10.54,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 11.726666666666667,
+            "best_prompt": 13.139999999999999,
+            "prompt_id": "p2",
+            "CPS": 12.954287999999998,
+            "is_dummy": false,
+            "std_accuracy": 1.3147369825685031
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_0_SK.json b/e3c_llm_results/google/medgemma-4b-it_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..e7f23d3cdad5a7506bb4d3d0d7d4e57cc1afbb0a
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 19.074956666666665,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.47,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 23.87,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 24.47,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 24.27,
+            "best_prompt": 24.47,
+            "prompt_id": "p1",
+            "CPS": 24.421059999999997,
+            "is_dummy": false,
+            "std_accuracy": 0.34641016151377424
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.19,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 11.19,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.123333333333333,
+            "best_prompt": 13.99,
+            "prompt_id": "p2",
+            "CPS": 13.728853333333333,
+            "is_dummy": false,
+            "std_accuracy": 1.6165807537309524
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_0_SL.json b/e3c_llm_results/google/medgemma-4b-it_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..4e2b67f04b2f31f440338c2cec8b0ab5c9103308
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 18.266028000000002,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 25.740000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 25.580000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 25.740000000000002,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 25.686666666666667,
+            "best_prompt": 25.740000000000002,
+            "prompt_id": "p1",
+            "CPS": 25.726272,
+            "is_dummy": false,
+            "std_accuracy": 0.09237604307034021
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 9.73,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.89,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 9.73,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.116666666666667,
+            "best_prompt": 10.89,
+            "prompt_id": "p2",
+            "CPS": 10.805784000000001,
+            "is_dummy": false,
+            "std_accuracy": 0.6697263122599659
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_10_EN.json b/e3c_llm_results/google/medgemma-4b-it_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..70626aa417b34b7d2b2ccdddb35a20015f968ed9
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 33.354895866666666,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 48.33,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.05,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 49.51,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 49.29666666666666,
+            "best_prompt": 50.05,
+            "prompt_id": "p2",
+            "CPS": 49.672956666666664,
+            "is_dummy": false,
+            "std_accuracy": 0.8796211305632285
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 9.64,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 12.370000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 13.91,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 11.973333333333334,
+            "best_prompt": 13.91,
+            "prompt_id": "p3",
+            "CPS": 13.640609666666666,
+            "is_dummy": false,
+            "std_accuracy": 2.16246001889823
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 26.590000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 26.71,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 26.07,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 26.456666666666667,
+            "best_prompt": 26.71,
+            "prompt_id": "p2",
+            "CPS": 26.642334666666667,
+            "is_dummy": false,
+            "std_accuracy": 0.3401960219246161
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 36.620000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 38.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.06,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 24.893333333333334,
+            "best_prompt": 38.0,
+            "prompt_id": "p2",
+            "CPS": 33.019466666666666,
+            "is_dummy": false,
+            "std_accuracy": 21.517363531188792
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 45.050000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 37.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.78,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.27333333333333,
+            "best_prompt": 45.050000000000004,
+            "prompt_id": "p1",
+            "CPS": 43.79911166666667,
+            "is_dummy": false,
+            "std_accuracy": 3.763433715815032
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_10_GR.json b/e3c_llm_results/google/medgemma-4b-it_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..753d835f990f9f0fa75fe03181b4e867bda9d7db
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 32.8816105,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 49.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.39,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 49.1,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 49.53,
+            "best_prompt": 50.39,
+            "prompt_id": "p2",
+            "CPS": 49.956646000000006,
+            "is_dummy": false,
+            "std_accuracy": 0.7447818472546168
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.04,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.05,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 15.509999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 14.533333333333331,
+            "best_prompt": 16.05,
+            "prompt_id": "p2",
+            "CPS": 15.806575,
+            "is_dummy": false,
+            "std_accuracy": 2.1761050832469775
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_10_IT.json b/e3c_llm_results/google/medgemma-4b-it_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..8f8f5f9fe3968307cf6519e71d901d58727e7b1e
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 33.5863308,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 56.330000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 53.769999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.52,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 54.54,
+            "best_prompt": 56.330000000000005,
+            "prompt_id": "p1",
+            "CPS": 55.321693,
+            "is_dummy": false,
+            "std_accuracy": 1.5552170266557686
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.920000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 19.17,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.51,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.533333333333335,
+            "best_prompt": 19.17,
+            "prompt_id": "p2",
+            "CPS": 18.856251000000004,
+            "is_dummy": false,
+            "std_accuracy": 1.6251256361688882
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 10.72,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.61,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.96,
+            "best_prompt": 13.55,
+            "prompt_id": "p2",
+            "CPS": 13.199055,
+            "is_dummy": false,
+            "std_accuracy": 2.4787295132789304
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 52.290000000000006,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.89,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.54,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.24,
+            "best_prompt": 52.89,
+            "prompt_id": "p2",
+            "CPS": 43.554915,
+            "is_dummy": false,
+            "std_accuracy": 30.0525789242787
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.14,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.52,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.059999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.906666666666666,
+            "best_prompt": 43.14,
+            "prompt_id": "p1",
+            "CPS": 36.99974,
+            "is_dummy": false,
+            "std_accuracy": 24.583607004126414
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_10_PL.json b/e3c_llm_results/google/medgemma-4b-it_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..d94e371ca25ef75fddd5c86c38c8c5dc2cbc56ba
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 31.782375333333327,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.85999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.059999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.85999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 51.926666666666655,
+            "best_prompt": 52.059999999999995,
+            "prompt_id": "p2",
+            "CPS": 51.99058666666666,
+            "is_dummy": false,
+            "std_accuracy": 0.1154700538379268
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.709999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 9.969999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 9.969999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.549999999999999,
+            "best_prompt": 11.709999999999999,
+            "prompt_id": "p1",
+            "CPS": 11.574163999999998,
+            "is_dummy": false,
+            "std_accuracy": 1.004589468389949
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_10_SK.json b/e3c_llm_results/google/medgemma-4b-it_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..dbaba2b960b60e7987164956beffb9d1bcb5a809
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 28.978618833333336,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 47.56,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 44.49,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 47.56,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 46.53666666666667,
+            "best_prompt": 47.56,
+            "prompt_id": "p1",
+            "CPS": 47.07330266666667,
+            "is_dummy": false,
+            "std_accuracy": 1.7724653264121513
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 10.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 10.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.346666666666666,
+            "best_prompt": 10.95,
+            "prompt_id": "p1",
+            "CPS": 10.883935,
+            "is_dummy": false,
+            "std_accuracy": 0.5244362052083484
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/google/medgemma-4b-it_10_SL.json b/e3c_llm_results/google/medgemma-4b-it_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..fa1a8dbc033fe881961a9e854dabae4c4a91ee7f
--- /dev/null
+++ b/e3c_llm_results/google/medgemma-4b-it_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 32.7709705,
+    "config": {
+        "model_name": "google/medgemma-4b-it",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "google/medgemma-4b-it",
+        "base_model": "Gemma3ForConditionalGeneration",
+        "revision": "efe6cc02361759b6bd501c654ddb7c9d25ec509d",
+        "submitted_time": "2025-05-19 20:52:44+00:00",
+        "num_params_billion": 4.300079472,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.17,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 49.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.17,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 50.629999999999995,
+            "best_prompt": 51.17,
+            "prompt_id": "p1",
+            "CPS": 50.893682,
+            "is_dummy": false,
+            "std_accuracy": 0.9353074360871964
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.78,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 11.01,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 15.010000000000002,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.6,
+            "best_prompt": 15.010000000000002,
+            "prompt_id": "p3",
+            "CPS": 14.648259000000001,
+            "is_dummy": false,
+            "std_accuracy": 2.1223336212763546
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/meta-llama/.DS_Store b/e3c_llm_results/meta-llama/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..446a5c6c78528e5ea6ca14aabc826a97cfbc6bcc
Binary files /dev/null and b/e3c_llm_results/meta-llama/.DS_Store differ
diff --git a/e3c_llm_results/meta-llama/Llama-3.2-1B-Instruct_5.json b/e3c_llm_results/meta-llama/Llama-3.2-1B-Instruct_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..57f929e967ef4193e87800b4c8543700b611a406
--- /dev/null
+++ b/e3c_llm_results/meta-llama/Llama-3.2-1B-Instruct_5.json
@@ -0,0 +1,24 @@
+{
+    "average_CPS": 12.479999999999999,
+    "config": {
+        "model_name": "meta-llama/Llama-3.2-1B-Instruct",
+        "num_fewshot": "5",
+        "batch_size": 8
+    },
+    "tasks": {
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "prompt-1",
+                    "metric": "f1",
+                    "value": 12.479999999999999,
+                    "stderr": null
+                }
+            ],
+            "average_accuracy": 12.479999999999999,
+            "best_prompt": 12.479999999999999,
+            "prompt_id": "prompt-1",
+            "CPS": 12.479999999999999
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_EN.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..4db4a012e0e3da7b6c380f98c64ad248850bde75
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 13.326142266666668,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 25.019999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 30.89,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 27.68,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 27.863333333333333,
+            "best_prompt": 30.89,
+            "prompt_id": "p2",
+            "CPS": 29.955062666666667,
+            "is_dummy": false,
+            "std_accuracy": 2.939291297801793
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 22.74,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 39.290000000000006,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 35.42,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.483333333333334,
+            "best_prompt": 39.290000000000006,
+            "prompt_id": "p2",
+            "CPS": 36.61566066666667,
+            "is_dummy": false,
+            "std_accuracy": 8.65699909514454
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.01,
+            "best_prompt": 0.03,
+            "prompt_id": "p2",
+            "CPS": 0.029994,
+            "is_dummy": false,
+            "std_accuracy": 0.017320508075688773
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.03,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.01,
+            "best_prompt": 0.03,
+            "prompt_id": "p3",
+            "CPS": 0.029994,
+            "is_dummy": false,
+            "std_accuracy": 0.017320508075688773
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_GR.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..5439d8bb6753816be58ecc8557b98adbf7f56db0
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 15.980523333333334,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 16.41,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 18.69,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.41,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.17,
+            "best_prompt": 18.69,
+            "prompt_id": "p2",
+            "CPS": 18.405912,
+            "is_dummy": false,
+            "std_accuracy": 1.3163586137523473
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.359999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 7.779999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.180000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 9.773333333333333,
+            "best_prompt": 14.180000000000001,
+            "prompt_id": "p3",
+            "CPS": 13.555134666666667,
+            "is_dummy": false,
+            "std_accuracy": 3.822058782035324
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_IT.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..722d002e6e2ebd62d2b02df97491535731029afe
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 9.630995666666667,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 33.97,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 33.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.26,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 33.07666666666666,
+            "best_prompt": 33.97,
+            "prompt_id": "p1",
+            "CPS": 33.666534666666664,
+            "is_dummy": false,
+            "std_accuracy": 0.8575740978675451
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 14.89,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 7.359999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 1.49,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.913333333333333,
+            "best_prompt": 14.89,
+            "prompt_id": "p1",
+            "CPS": 13.851174333333335,
+            "is_dummy": false,
+            "std_accuracy": 6.7171149560904
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.64,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.21333333333333335,
+            "best_prompt": 0.64,
+            "prompt_id": "p2",
+            "CPS": 0.6372693333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.3695041722813605
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_PL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..07493d6ba926b87d132a4592225d9e5242419c11
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 29.002397166666665,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 28.15,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 28.610000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 28.15,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.30333333333333,
+            "best_prompt": 28.610000000000003,
+            "prompt_id": "p2",
+            "CPS": 28.522262666666666,
+            "is_dummy": false,
+            "std_accuracy": 0.26558112382723037
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.080000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 30.61,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 26.926666666666666,
+            "best_prompt": 30.61,
+            "prompt_id": "p3",
+            "CPS": 29.482531666666663,
+            "is_dummy": false,
+            "std_accuracy": 5.112263034443096
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_SK.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..06957df0b599a226b72424d6d11c3537fa551e27
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 21.908259666666666,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 25.71,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.87,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 25.71,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 27.096666666666664,
+            "best_prompt": 29.87,
+            "prompt_id": "p2",
+            "CPS": 29.041605333333333,
+            "is_dummy": false,
+            "std_accuracy": 2.4017771198288433
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.540000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.77,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 15.540000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.616666666666667,
+            "best_prompt": 15.540000000000001,
+            "prompt_id": "p1",
+            "CPS": 14.774914,
+            "is_dummy": false,
+            "std_accuracy": 8.527463475930773
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_0_SL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..ddd541d06daf7f909e3feb39637342751a49b26b
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 16.78806766666667,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 29.98,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 26.8,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 29.98,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.92,
+            "best_prompt": 29.98,
+            "prompt_id": "p1",
+            "CPS": 29.662212000000004,
+            "is_dummy": false,
+            "std_accuracy": 1.8359738560230099
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 3.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 1.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 3.95,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 3.0366666666666666,
+            "best_prompt": 3.95,
+            "prompt_id": "p1",
+            "CPS": 3.9139233333333334,
+            "is_dummy": false,
+            "std_accuracy": 1.5819397375795747
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..1696c5bc0fb57f5dd15f5e26d54b2f6936a552b9
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 34.61790526666666,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 50.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 49.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 50.49,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 50.080000000000005,
+            "best_prompt": 50.49,
+            "prompt_id": "p3",
+            "CPS": 50.282991,
+            "is_dummy": false,
+            "std_accuracy": 0.41509035161034796
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 11.07,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 11.256666666666666,
+            "best_prompt": 11.75,
+            "prompt_id": "p1",
+            "CPS": 11.692033333333333,
+            "is_dummy": false,
+            "std_accuracy": 0.4314317249963585
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 30.520000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 33.07,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.08,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 31.89,
+            "best_prompt": 33.07,
+            "prompt_id": "p2",
+            "CPS": 32.679774,
+            "is_dummy": false,
+            "std_accuracy": 1.285573801848807
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 18.33,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 28.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.02,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.793333333333333,
+            "best_prompt": 40.02,
+            "prompt_id": "p3",
+            "CPS": 35.527088,
+            "is_dummy": false,
+            "std_accuracy": 10.86512923684451
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 35.28,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 28.18,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 48.199999999999996,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 37.22,
+            "best_prompt": 48.199999999999996,
+            "prompt_id": "p3",
+            "CPS": 42.907639999999994,
+            "is_dummy": false,
+            "std_accuracy": 10.150014778314363
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..f5d4cbb3022141fb6c72b1cd6aede980b698a613
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 20.501029666666668,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 33.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 34.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 33.75,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 33.843333333333334,
+            "best_prompt": 34.03,
+            "prompt_id": "p2",
+            "CPS": 33.96647733333334,
+            "is_dummy": false,
+            "std_accuracy": 0.16165807537309587
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 4.2700000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.81,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.109999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.063333333333333,
+            "best_prompt": 7.109999999999999,
+            "prompt_id": "p3",
+            "CPS": 7.035582,
+            "is_dummy": false,
+            "std_accuracy": 1.560299116622621
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..2fbe45b0f55683348d2da5b042e05d3ea0817b35
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 32.65775206666667,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.949999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 53.010000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 52.75,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 52.57,
+            "best_prompt": 53.010000000000005,
+            "prompt_id": "p2",
+            "CPS": 52.776756,
+            "is_dummy": false,
+            "std_accuracy": 0.5524490926773298
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.14,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 9.610000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.219999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 14.99,
+            "best_prompt": 21.14,
+            "prompt_id": "p1",
+            "CPS": 19.83989,
+            "is_dummy": false,
+            "std_accuracy": 5.803438635843408
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 14.219999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.46,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.290000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.99,
+            "best_prompt": 16.46,
+            "prompt_id": "p2",
+            "CPS": 15.888838000000002,
+            "is_dummy": false,
+            "std_accuracy": 4.221599223043324
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 37.980000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.65,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.28,
+            "best_prompt": 53.65,
+            "prompt_id": "p3",
+            "CPS": 47.013495000000006,
+            "is_dummy": false,
+            "std_accuracy": 11.094408501583127
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.690000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 31.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 18.85,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 20.523333333333333,
+            "best_prompt": 31.03,
+            "prompt_id": "p2",
+            "CPS": 27.769781333333334,
+            "is_dummy": false,
+            "std_accuracy": 9.777982068572909
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..c0a4491e5fc31cf8df808a8d15d2d0212e34d7bd
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 28.317504,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.129999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.129999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 39.85999999999999,
+            "best_prompt": 41.32,
+            "prompt_id": "p2",
+            "CPS": 40.716727999999996,
+            "is_dummy": false,
+            "std_accuracy": 1.2643970895252832
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 12.07,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.36,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 13.660000000000002,
+            "best_prompt": 16.36,
+            "prompt_id": "p3",
+            "CPS": 15.91828,
+            "is_dummy": false,
+            "std_accuracy": 2.35055312639387
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..b7776b39560d44bb293b6bde77b03a5af6909e73
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 23.942156333333333,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.06,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 38.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.06,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.24333333333333,
+            "best_prompt": 41.06,
+            "prompt_id": "p1",
+            "CPS": 40.72467666666667,
+            "is_dummy": false,
+            "std_accuracy": 1.4145081595145848
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 5.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.0600000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.24,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.13,
+            "best_prompt": 7.24,
+            "prompt_id": "p3",
+            "CPS": 7.159636,
+            "is_dummy": false,
+            "std_accuracy": 1.0767079455451234
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..bb21bc4adcf5b49db6ab02eda0be2e1968689f12
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Clinical_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 24.260621,
+    "config": {
+        "model_name": "microsoft/MediPhi-Clinical",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "microsoft/MediPhi-Clinical",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "0906e64d321a9c4b058137b34fb3ed6e257e05a0",
+        "submitted_time": "2025-05-29 20:40:05+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 40.36,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 39.900000000000006,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.36,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.20666666666667,
+            "best_prompt": 40.36,
+            "prompt_id": "p1",
+            "CPS": 40.29811466666667,
+            "is_dummy": false,
+            "std_accuracy": 0.26558112382722426
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.290000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.74,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.42,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.483333333333334,
+            "best_prompt": 8.290000000000001,
+            "prompt_id": "p1",
+            "CPS": 8.223127333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.7769384359994902
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_EN.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..af7076f74a0b9ae21fef440fd356fd556f331e38
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 11.733952266666666,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 24.099999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.25,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 15.986666666666665,
+            "best_prompt": 24.099999999999998,
+            "prompt_id": "p2",
+            "CPS": 22.144686666666665,
+            "is_dummy": false,
+            "std_accuracy": 8.24815332867505
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.06,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.04,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 29.816666666666666,
+            "best_prompt": 40.06,
+            "prompt_id": "p2",
+            "CPS": 35.95652066666667,
+            "is_dummy": false,
+            "std_accuracy": 16.024463589566214
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.44999999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.15,
+            "best_prompt": 0.44999999999999996,
+            "prompt_id": "p2",
+            "CPS": 0.44864999999999994,
+            "is_dummy": false,
+            "std_accuracy": 0.25980762113533157
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.12,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.04,
+            "best_prompt": 0.12,
+            "prompt_id": "p3",
+            "CPS": 0.119904,
+            "is_dummy": false,
+            "std_accuracy": 0.06928203230275509
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_GR.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..0deadb52317a0d4b4df567d7a0aa11aee92ad91f
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 15.261295333333333,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.94,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 8.9,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 12.94,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 11.593333333333334,
+            "best_prompt": 12.94,
+            "prompt_id": "p1",
+            "CPS": 12.765741333333333,
+            "is_dummy": false,
+            "std_accuracy": 2.3324950875260875
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 9.62,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.7299999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 19.16,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 11.836666666666666,
+            "best_prompt": 19.16,
+            "prompt_id": "p3",
+            "CPS": 17.75684933333333,
+            "is_dummy": false,
+            "std_accuracy": 6.504723924451624
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_IT.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..110c043ee56415495c01f264c6fafcc47b72a79d
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 11.238403133333334,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 24.84,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 27.169999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 20.226666666666663,
+            "best_prompt": 27.169999999999998,
+            "prompt_id": "p3",
+            "CPS": 25.283496333333332,
+            "is_dummy": false,
+            "std_accuracy": 10.075943297445322
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 17.119999999999997,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 28.96,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.61,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 26.23,
+            "best_prompt": 32.61,
+            "prompt_id": "p3",
+            "CPS": 30.529482,
+            "is_dummy": false,
+            "std_accuracy": 8.097820694483179
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.38,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.12666666666666668,
+            "best_prompt": 0.38,
+            "prompt_id": "p2",
+            "CPS": 0.37903733333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.2193931022920578
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_PL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..7951d19547ec0c62157290970c79c1bf1ac4dd10
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 23.550823,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.8,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 15.1,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 15.666666666666666,
+            "best_prompt": 16.8,
+            "prompt_id": "p2",
+            "CPS": 16.6096,
+            "is_dummy": false,
+            "std_accuracy": 0.9814954576223645
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 26.83,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 31.259999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 28.32,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.80333333333333,
+            "best_prompt": 31.259999999999998,
+            "prompt_id": "p2",
+            "CPS": 30.492046,
+            "is_dummy": false,
+            "std_accuracy": 2.2542034809070213
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_SK.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..ca96039fbd7a4d7f1c3c742a43ec6649c3813335
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 18.487242666666667,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 16.41,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 20.810000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.41,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.876666666666665,
+            "best_prompt": 20.810000000000002,
+            "prompt_id": "p2",
+            "CPS": 20.199573333333337,
+            "is_dummy": false,
+            "std_accuracy": 2.5403411844343546
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 17.76,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 1.1199999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.76,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.213333333333333,
+            "best_prompt": 17.76,
+            "prompt_id": "p1",
+            "CPS": 16.774912,
+            "is_dummy": false,
+            "std_accuracy": 9.607108479315373
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_0_SL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..6faae21aaca5273dff9afdea9cc3e3059ac8ed49
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 16.379518,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 17.580000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 18.6,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.580000000000002,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.92,
+            "best_prompt": 18.6,
+            "prompt_id": "p2",
+            "CPS": 18.47352,
+            "is_dummy": false,
+            "std_accuracy": 0.588897274573418
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 14.46,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.84,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.46,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 13.253333333333336,
+            "best_prompt": 14.46,
+            "prompt_id": "p1",
+            "CPS": 14.285516000000001,
+            "is_dummy": false,
+            "std_accuracy": 2.0900079744664457
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..e4a7e0da538646f44356efa40899230fad028abf
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 38.280346866666676,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 53.56999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.27,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 50.629999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 52.156666666666666,
+            "best_prompt": 53.56999999999999,
+            "prompt_id": "p1",
+            "CPS": 52.81287733333333,
+            "is_dummy": false,
+            "std_accuracy": 1.4732730002729741
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 14.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 18.88,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 18.360000000000003,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.186666666666667,
+            "best_prompt": 18.88,
+            "prompt_id": "p2",
+            "CPS": 18.560298666666665,
+            "is_dummy": false,
+            "std_accuracy": 2.496183753919838
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 27.42,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 34.38,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 23.87,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.55666666666667,
+            "best_prompt": 34.38,
+            "prompt_id": "p2",
+            "CPS": 32.377938,
+            "is_dummy": false,
+            "std_accuracy": 5.346403775748081
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.62,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 30.020000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.44,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 34.36,
+            "best_prompt": 51.44,
+            "prompt_id": "p3",
+            "CPS": 42.654048,
+            "is_dummy": false,
+            "std_accuracy": 15.376436518257407
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 45.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 31.759999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 48.010000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.733333333333334,
+            "best_prompt": 48.010000000000005,
+            "prompt_id": "p3",
+            "CPS": 44.99657233333333,
+            "is_dummy": false,
+            "std_accuracy": 8.73296246031857
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..650ea7348936978f27e05aaa8a3c638806fda2f5
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 18.663691,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 28.22,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 29.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 28.22,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.81,
+            "best_prompt": 29.99,
+            "prompt_id": "p2",
+            "CPS": 29.636117999999996,
+            "is_dummy": false,
+            "std_accuracy": 1.0219099764656374
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 5.76,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.74,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.7700000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.756666666666667,
+            "best_prompt": 7.7700000000000005,
+            "prompt_id": "p3",
+            "CPS": 7.691264,
+            "is_dummy": false,
+            "std_accuracy": 1.0051036430803213
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..b93cb657d586266ae5ae595a9d695451b5f99c86
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 38.20042566666667,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 57.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.269999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 57.9,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 57.153333333333336,
+            "best_prompt": 57.9,
+            "prompt_id": "p3",
+            "CPS": 57.46768,
+            "is_dummy": false,
+            "std_accuracy": 0.8235492294534285
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 28.73,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 23.07,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 28.58,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 26.793333333333333,
+            "best_prompt": 28.73,
+            "prompt_id": "p1",
+            "CPS": 28.173595666666667,
+            "is_dummy": false,
+            "std_accuracy": 3.2253733634004806
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.45,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 15.079999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 4.75,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 11.76,
+            "best_prompt": 15.45,
+            "prompt_id": "p1",
+            "CPS": 14.879895,
+            "is_dummy": false,
+            "std_accuracy": 6.073656229982069
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 56.169999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 32.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 54.04,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 47.63666666666666,
+            "best_prompt": 56.169999999999995,
+            "prompt_id": "p1",
+            "CPS": 51.376826666666666,
+            "is_dummy": false,
+            "std_accuracy": 12.979300186579138
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.910000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.84,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 37.34666666666667,
+            "best_prompt": 40.29,
+            "prompt_id": "p2",
+            "CPS": 39.104131,
+            "is_dummy": false,
+            "std_accuracy": 3.910068200598721
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..b66eabed175d08abdde52f8af38292bb62a04113
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 35.295837,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 44.17,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 45.06,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 44.17,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.46666666666667,
+            "best_prompt": 45.06,
+            "prompt_id": "p2",
+            "CPS": 44.792644,
+            "is_dummy": false,
+            "std_accuracy": 0.5138417395787672
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.25,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 26.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 26.619999999999997,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 22.909999999999997,
+            "best_prompt": 26.86,
+            "prompt_id": "p2",
+            "CPS": 25.79903,
+            "is_dummy": false,
+            "std_accuracy": 6.634839862423206
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..061b50d9a6a36a837980da53f31b65f6e4a4a4db
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 28.662679833333332,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.269999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.269999999999996,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.25666666666667,
+            "best_prompt": 43.269999999999996,
+            "prompt_id": "p1",
+            "CPS": 42.831530666666666,
+            "is_dummy": false,
+            "std_accuracy": 1.7551448183364617
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 10.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.950000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.729999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 13.126666666666665,
+            "best_prompt": 14.729999999999999,
+            "prompt_id": "p3",
+            "CPS": 14.493828999999998,
+            "is_dummy": false,
+            "std_accuracy": 2.1374361588906776
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..aef87aa5170b16e3a61df26401fb96d046eca734
--- /dev/null
+++ b/e3c_llm_results/microsoft/MediPhi-Instruct_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 29.24573433333333,
+    "config": {
+        "model_name": "microsoft/MediPhi-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "microsoft/MediPhi-Instruct",
+        "base_model": "Phi3ForCausalLM",
+        "revision": "a94ac478e7c246103d55665a0804684042f3b973",
+        "submitted_time": "2025-07-11 19:28:15+00:00",
+        "num_params_billion": 3.821079552,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 39.73,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 35.64,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 39.73,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 38.36666666666667,
+            "best_prompt": 39.73,
+            "prompt_id": "p1",
+            "CPS": 39.188347666666665,
+            "is_dummy": false,
+            "std_accuracy": 2.3613626009855673
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 11.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 14.680000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 20.27,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 15.5,
+            "best_prompt": 20.27,
+            "prompt_id": "p3",
+            "CPS": 19.303121,
+            "is_dummy": false,
+            "std_accuracy": 4.417454017870474
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_EN.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..aadc2cb582024aa75c6c9d0b9a19fdb6cec678fb
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 11.809398400000003,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 25.290000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 21.44,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.62,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 22.783333333333335,
+            "best_prompt": 25.290000000000003,
+            "prompt_id": "p1",
+            "CPS": 24.656064,
+            "is_dummy": false,
+            "std_accuracy": 2.172701850998737
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 36.88,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 36.42,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.93,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 30.07666666666667,
+            "best_prompt": 36.88,
+            "prompt_id": "p1",
+            "CPS": 34.37093066666667,
+            "is_dummy": false,
+            "std_accuracy": 11.387670232902487
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.02,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.006666666666666667,
+            "best_prompt": 0.02,
+            "prompt_id": "p2",
+            "CPS": 0.019997333333333336,
+            "is_dummy": false,
+            "std_accuracy": 0.011547005383792516
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_GR.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..cb74cbd399ee3933ce898ab6828e2693b10521e4
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 14.120156666666666,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 16.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 19.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.03,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.05,
+            "best_prompt": 19.09,
+            "prompt_id": "p2",
+            "CPS": 18.700564,
+            "is_dummy": false,
+            "std_accuracy": 1.766691823720254
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 4.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.4799999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 9.94,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.913333333333333,
+            "best_prompt": 9.94,
+            "prompt_id": "p3",
+            "CPS": 9.539749333333333,
+            "is_dummy": false,
+            "std_accuracy": 3.5123970922054544
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_IT.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..9a9beab9a845c0ffefb5e20bcd50f075cd21968e
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 7.919311333333335,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 27.88,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 20.3,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 24.81,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 24.33,
+            "best_prompt": 27.88,
+            "prompt_id": "p1",
+            "CPS": 26.89026,
+            "is_dummy": false,
+            "std_accuracy": 3.8127286816661887
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 13.819999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 1.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 1.4000000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.616666666666666,
+            "best_prompt": 13.819999999999999,
+            "prompt_id": "p1",
+            "CPS": 12.686299333333332,
+            "is_dummy": false,
+            "std_accuracy": 7.105225776379898
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.02,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.006666666666666667,
+            "best_prompt": 0.02,
+            "prompt_id": "p1",
+            "CPS": 0.019997333333333336,
+            "is_dummy": false,
+            "std_accuracy": 0.011547005383792516
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_PL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..731f4e2146d96568835ab97ca613fd768326b73f
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 21.287892,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 30.240000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 28.110000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 30.240000000000002,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 29.53,
+            "best_prompt": 30.240000000000002,
+            "prompt_id": "p1",
+            "CPS": 30.025296,
+            "is_dummy": false,
+            "std_accuracy": 1.2297560733739024
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 12.920000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.63,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.060000000000002,
+            "best_prompt": 12.920000000000002,
+            "prompt_id": "p2",
+            "CPS": 12.550488000000001,
+            "is_dummy": false,
+            "std_accuracy": 2.476832654823495
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SK.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..63b3049082e6bb3bd8346a2643161658df5aa232
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 14.880865666666669,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 21.46,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.43,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 21.439999999999998,
+            "best_prompt": 21.46,
+            "prompt_id": "p2",
+            "CPS": 21.455708,
+            "is_dummy": false,
+            "std_accuracy": 0.01732050807568943
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.5600000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 8.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.5600000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.823333333333333,
+            "best_prompt": 8.35,
+            "prompt_id": "p2",
+            "CPS": 8.306023333333334,
+            "is_dummy": false,
+            "std_accuracy": 0.45610671265980385
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..ec8d55691ba462f4f9f19d6fbcb79d46d3a9aa6f
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 17.567646000000003,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 17.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 19.470000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.66,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 18.263333333333335,
+            "best_prompt": 19.470000000000002,
+            "prompt_id": "p2",
+            "CPS": 19.235062000000003,
+            "is_dummy": false,
+            "std_accuracy": 1.045003987233224
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.950000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.66,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.756666666666668,
+            "best_prompt": 16.950000000000003,
+            "prompt_id": "p2",
+            "CPS": 15.900230000000002,
+            "is_dummy": false,
+            "std_accuracy": 5.363584000771625
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..b59774c4de6ace8ec3e1efd922b4f6a655936fe6
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 29.993946066666666,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 47.25,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 47.3,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 48.05,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 47.53333333333333,
+            "best_prompt": 48.05,
+            "prompt_id": "p3",
+            "CPS": 47.801741666666665,
+            "is_dummy": false,
+            "std_accuracy": 0.4481443219916242
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 25.929999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.339999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.48,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.916666666666664,
+            "best_prompt": 41.48,
+            "prompt_id": "p3",
+            "CPS": 39.17232933333333,
+            "is_dummy": false,
+            "std_accuracy": 8.667469834578794
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.9900000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.88,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.78,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.216666666666669,
+            "best_prompt": 21.78,
+            "prompt_id": "p3",
+            "CPS": 19.697106,
+            "is_dummy": false,
+            "std_accuracy": 10.494333391565819
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.37,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 2.4,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 15.43,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 6.3999999999999995,
+            "best_prompt": 15.43,
+            "prompt_id": "p3",
+            "CPS": 14.036670999999998,
+            "is_dummy": false,
+            "std_accuracy": 7.837148716210507
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 39.290000000000006,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.58,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 13.766666666666667,
+            "best_prompt": 39.290000000000006,
+            "prompt_id": "p2",
+            "CPS": 29.261882333333336,
+            "is_dummy": false,
+            "std_accuracy": 22.107940504111493
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f74d913bb00dc2c9e4bf88d2e4dbf9fe34675a8
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 29.209499,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 34.98,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 36.480000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 34.98,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.48,
+            "best_prompt": 36.480000000000004,
+            "prompt_id": "p2",
+            "CPS": 36.1152,
+            "is_dummy": false,
+            "std_accuracy": 0.8660254037844427
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 10.549999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 23.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.89,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 18.62333333333333,
+            "best_prompt": 23.43,
+            "prompt_id": "p2",
+            "CPS": 22.303798,
+            "is_dummy": false,
+            "std_accuracy": 7.03398417209858
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..868408c2c0e8dddfde632c93328879776a7a19c2
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 33.12944686666667,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.470000000000006,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 51.49,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 51.76,
+            "best_prompt": 52.32,
+            "prompt_id": "p2",
+            "CPS": 52.027007999999995,
+            "is_dummy": false,
+            "std_accuracy": 0.48507731342539395
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 30.919999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 45.300000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 42.52,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 39.580000000000005,
+            "best_prompt": 45.300000000000004,
+            "prompt_id": "p2",
+            "CPS": 42.70884,
+            "is_dummy": false,
+            "std_accuracy": 7.627502867911624
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.54,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 14.34,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 11.62,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 9.166666666666666,
+            "best_prompt": 14.34,
+            "prompt_id": "p2",
+            "CPS": 13.598144,
+            "is_dummy": false,
+            "std_accuracy": 6.743451144134829
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 12.22,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 23.600000000000005,
+            "best_prompt": 56.95,
+            "prompt_id": "p2",
+            "CPS": 37.95717500000001,
+            "is_dummy": false,
+            "std_accuracy": 29.36330873726597
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.489999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 21.41,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 6.550000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 11.816666666666668,
+            "best_prompt": 21.41,
+            "prompt_id": "p2",
+            "CPS": 19.356067333333336,
+            "is_dummy": false,
+            "std_accuracy": 8.321354056482233
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..13a89c8e4b08849d588f153fdcb8066740e941f0
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 43.31202666666667,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 49.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.46000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 49.11,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 49.56,
+            "best_prompt": 50.46000000000001,
+            "prompt_id": "p2",
+            "CPS": 50.005860000000006,
+            "is_dummy": false,
+            "std_accuracy": 0.7794228634059998
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 38.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 33.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 26.83,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.96333333333333,
+            "best_prompt": 38.95,
+            "prompt_id": "p1",
+            "CPS": 36.61819333333333,
+            "is_dummy": false,
+            "std_accuracy": 6.061330986947781
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..db755fca2eefd4b03af0d9deea859e0616b8b878
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 31.267611000000002,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 40.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 37.940000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.29,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 39.50666666666667,
+            "best_prompt": 40.29,
+            "prompt_id": "p1",
+            "CPS": 39.974395,
+            "is_dummy": false,
+            "std_accuracy": 1.3567731325956172
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 19.48,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 22.93,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 21.32,
+            "best_prompt": 22.93,
+            "prompt_id": "p3",
+            "CPS": 22.560827,
+            "is_dummy": false,
+            "std_accuracy": 1.7364619201122722
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..d8fe976be9e70fd68689e49a6e17ccab037e8e7c
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-7B-Instruct-v0.2_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 31.471755,
+    "config": {
+        "model_name": "mistralai/Mistral-7B-Instruct-v0.2",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "mistralai/Mistral-7B-Instruct-v0.2",
+        "base_model": "MistralForCausalLM",
+        "revision": "63a8b081895390a26e140280378bc85ec8bce07a",
+        "submitted_time": "2023-12-11 13:18:44+00:00",
+        "num_params_billion": 7.241732096,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 42.04,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.74,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 42.04,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.94,
+            "best_prompt": 42.04,
+            "prompt_id": "p1",
+            "CPS": 41.99796,
+            "is_dummy": false,
+            "std_accuracy": 0.1732050807568861
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 19.900000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 19.5,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.15,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 20.183333333333334,
+            "best_prompt": 21.15,
+            "prompt_id": "p3",
+            "CPS": 20.94555,
+            "is_dummy": false,
+            "std_accuracy": 0.8607167555783559
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_EN.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..1bd1b313b513abb3fd3fddc070192d2116dd584a
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 13.2821676,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 27.67,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 22.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.48,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 22.713333333333335,
+            "best_prompt": 27.67,
+            "prompt_id": "p1",
+            "CPS": 26.298490333333337,
+            "is_dummy": false,
+            "std_accuracy": 5.100630679958444
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 36.94,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 34.82,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.41,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 34.72333333333333,
+            "best_prompt": 36.94,
+            "prompt_id": "p1",
+            "CPS": 36.12116333333333,
+            "is_dummy": false,
+            "std_accuracy": 2.26654656544562
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 3.85,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.03,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 1.2933333333333332,
+            "best_prompt": 3.85,
+            "prompt_id": "p1",
+            "CPS": 3.7515683333333336,
+            "is_dummy": false,
+            "std_accuracy": 2.21418909159388
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.08,
+            "best_prompt": 0.24,
+            "prompt_id": "p2",
+            "CPS": 0.23961599999999997,
+            "is_dummy": false,
+            "std_accuracy": 0.13856406460551018
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_GR.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..9f950af6ff240caf2360ab845773046d6b8534b7
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 18.510654333333335,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.87,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.32,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.170000000000001,
+            "best_prompt": 7.32,
+            "prompt_id": "p1",
+            "CPS": 7.30902,
+            "is_dummy": false,
+            "std_accuracy": 0.25980762113533173
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 21.17,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.87,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 23.263333333333332,
+            "best_prompt": 32.87,
+            "prompt_id": "p3",
+            "CPS": 29.712288666666666,
+            "is_dummy": false,
+            "std_accuracy": 8.749864760859639
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_IT.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..81bbd6c64c00d7d4701e0cdef93c518d24aefeda
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 10.599325933333333,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 27.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 17.72,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 13.16,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 19.599999999999998,
+            "best_prompt": 27.92,
+            "prompt_id": "p1",
+            "CPS": 25.597056,
+            "is_dummy": false,
+            "std_accuracy": 7.557459890730484
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 28.49,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 23.84,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 18.61,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 23.646666666666665,
+            "best_prompt": 28.49,
+            "prompt_id": "p1",
+            "CPS": 27.11013433333333,
+            "is_dummy": false,
+            "std_accuracy": 4.942836567532183
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.09666666666666666,
+            "best_prompt": 0.29,
+            "prompt_id": "p1",
+            "CPS": 0.2894393333333333,
+            "is_dummy": false,
+            "std_accuracy": 0.16743157806499145
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_PL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..f6f617f8695146472f3082ad22891853a84acc20
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 12.708361833333333,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 4.83,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.390000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 4.83,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 4.683333333333334,
+            "best_prompt": 4.83,
+            "prompt_id": "p1",
+            "CPS": 4.822916,
+            "is_dummy": false,
+            "std_accuracy": 0.25403411844343504
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.61,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 18.233333333333334,
+            "best_prompt": 21.23,
+            "prompt_id": "p1",
+            "CPS": 20.593807666666667,
+            "is_dummy": false,
+            "std_accuracy": 2.598198093551247
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SK.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..171db4f330cc7f9d6b7d596285ad10637ded4d89
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 12.570468000000002,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 6.8500000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 8.44,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 6.8500000000000005,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 7.38,
+            "best_prompt": 8.44,
+            "prompt_id": "p2",
+            "CPS": 8.350536,
+            "is_dummy": false,
+            "std_accuracy": 0.9179869280115044
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 16.96,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 13.96,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 16.96,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 15.96,
+            "best_prompt": 16.96,
+            "prompt_id": "p1",
+            "CPS": 16.7904,
+            "is_dummy": false,
+            "std_accuracy": 1.7320508075688772
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cacb2f5cb2ed10a7172cdc1fe0420a1e0f1a7c7
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 15.375161166666668,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 8.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 8.05,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.61,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 8.423333333333334,
+            "best_prompt": 8.61,
+            "prompt_id": "p1",
+            "CPS": 8.593928,
+            "is_dummy": false,
+            "std_accuracy": 0.3233161507461897
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 23.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.96,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 23.09,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 19.046666666666667,
+            "best_prompt": 23.09,
+            "prompt_id": "p1",
+            "CPS": 22.156394333333335,
+            "is_dummy": false,
+            "std_accuracy": 7.00325876527016
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..01cbf61242c6ef0be3119462634742a963ea82a0
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 42.75896306666667,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 57.769999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.41,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 56.68,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 57.62,
+            "best_prompt": 58.41,
+            "prompt_id": "p2",
+            "CPS": 57.948561,
+            "is_dummy": false,
+            "std_accuracy": 0.8746999485537866
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 34.82,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.080000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 44.49,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 43.13,
+            "best_prompt": 50.080000000000005,
+            "prompt_id": "p2",
+            "CPS": 46.59944,
+            "is_dummy": false,
+            "std_accuracy": 7.720369162157988
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.990000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 27.18,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 23.56,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 25.243333333333336,
+            "best_prompt": 27.18,
+            "prompt_id": "p2",
+            "CPS": 26.653614,
+            "is_dummy": false,
+            "std_accuracy": 1.8232480175042929
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 30.34,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.760000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 57.78,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 43.29333333333333,
+            "best_prompt": 57.78,
+            "prompt_id": "p3",
+            "CPS": 49.409604,
+            "is_dummy": false,
+            "std_accuracy": 13.784111626555168
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 9.049999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 18.04,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 22.506666666666664,
+            "best_prompt": 40.43,
+            "prompt_id": "p2",
+            "CPS": 33.183596333333334,
+            "is_dummy": false,
+            "std_accuracy": 16.15980919854357
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d47f4da00b8572c6d9d7ff9e6a17522b8f6736d
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 40.65579733333333,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 50.81,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 49.88,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 50.81,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 50.5,
+            "best_prompt": 50.81,
+            "prompt_id": "p1",
+            "CPS": 50.652489,
+            "is_dummy": false,
+            "std_accuracy": 0.5369357503463518
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 20.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 22.96,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 33.23,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 25.49333333333333,
+            "best_prompt": 33.23,
+            "prompt_id": "p3",
+            "CPS": 30.65910566666666,
+            "is_dummy": false,
+            "std_accuracy": 6.831854311483326
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..144ace83125b2722b6bd87adbd3135852773850f
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 39.459138,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 64.3,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 64.37,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 64.57000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 64.41333333333334,
+            "best_prompt": 64.57000000000001,
+            "prompt_id": "p3",
+            "CPS": 64.46884033333333,
+            "is_dummy": false,
+            "std_accuracy": 0.14011899704656258
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 27.08,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 40.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.6,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.556666666666665,
+            "best_prompt": 40.99,
+            "prompt_id": "p2",
+            "CPS": 38.76287666666667,
+            "is_dummy": false,
+            "std_accuracy": 7.437636273261376
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.19,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 12.989999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 12.870000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.683333333333332,
+            "best_prompt": 12.989999999999998,
+            "prompt_id": "p2",
+            "CPS": 12.950164,
+            "is_dummy": false,
+            "std_accuracy": 0.43143172499635823
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 44.49,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.589999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 37.28,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 46.120000000000005,
+            "best_prompt": 56.589999999999996,
+            "prompt_id": "p2",
+            "CPS": 50.665027,
+            "is_dummy": false,
+            "std_accuracy": 9.757648282245059
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 6.9,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 38.46,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.53,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.63,
+            "best_prompt": 38.46,
+            "prompt_id": "p2",
+            "CPS": 30.448781999999998,
+            "is_dummy": false,
+            "std_accuracy": 18.042059195114064
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..b64dc4233493142474036aff130aaaaff276a69f
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 36.890603,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 53.52,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 54.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.52,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 53.75,
+            "best_prompt": 54.21,
+            "prompt_id": "p2",
+            "CPS": 53.960634,
+            "is_dummy": false,
+            "std_accuracy": 0.3983716857408405
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 18.63,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 18.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 20.01,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 19.063333333333333,
+            "best_prompt": 20.01,
+            "prompt_id": "p3",
+            "CPS": 19.820572,
+            "is_dummy": false,
+            "std_accuracy": 0.8208126054912502
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..eada31997236f6a8902716b90e2ebb7174c22701
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 35.643439,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 50.24999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.4,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 50.24999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 50.29999999999999,
+            "best_prompt": 50.4,
+            "prompt_id": "p2",
+            "CPS": 50.349599999999995,
+            "is_dummy": false,
+            "std_accuracy": 0.08660254037844715
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.370000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 21.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 20.94,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 18.323333333333334,
+            "best_prompt": 21.66,
+            "prompt_id": "p2",
+            "CPS": 20.937278,
+            "is_dummy": false,
+            "std_accuracy": 5.16829114246995
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..e58549cd882cffb336a9d6d8a333af7027b45f0b
--- /dev/null
+++ b/e3c_llm_results/mistralai/Mistral-Nemo-Instruct-2407_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 36.596855166666664,
+    "config": {
+        "model_name": "mistralai/Mistral-Nemo-Instruct-2407",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "mistralai/Mistral-Nemo-Instruct-2407",
+        "base_model": "MistralForCausalLM",
+        "revision": "04d8a90549d23fc6bd7f642064003592df51e9b3",
+        "submitted_time": "2024-07-17 17:26:49+00:00",
+        "num_params_billion": 12.2477824,
+        "language": "en_fr_de_es_it_pt_ru_zh_ja"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 53.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 53.349999999999994,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 53.23,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 53.26999999999999,
+            "best_prompt": 53.349999999999994,
+            "prompt_id": "p2",
+            "CPS": 53.30731999999999,
+            "is_dummy": false,
+            "std_accuracy": 0.06928203230275362
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 13.900000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 20.57,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 17.27,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.246666666666666,
+            "best_prompt": 20.57,
+            "prompt_id": "p2",
+            "CPS": 19.886390333333335,
+            "is_dummy": false,
+            "std_accuracy": 3.335061218828423
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_EN.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..10ae518593d1807a174e4fa432fd431d0ae97996
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 13.158133999999999,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 22.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 27.089999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 29.959999999999997,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 26.58333333333333,
+            "best_prompt": 29.959999999999997,
+            "prompt_id": "p3",
+            "CPS": 28.948350666666663,
+            "is_dummy": false,
+            "std_accuracy": 3.6564235713786393
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.57,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 38.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.48,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.800000000000004,
+            "best_prompt": 38.48,
+            "prompt_id": "p3",
+            "CPS": 36.294336,
+            "is_dummy": false,
+            "std_accuracy": 9.725682495331625
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.5499999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.18333333333333332,
+            "best_prompt": 0.5499999999999999,
+            "prompt_id": "p1",
+            "CPS": 0.5479833333333333,
+            "is_dummy": false,
+            "std_accuracy": 0.3175426480542941
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_GR.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..98ef301d1dc0f252a219466e3c5bb99d1df82d9b
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 14.248081500000001,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 21.3,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 4.95,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.3,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 15.85,
+            "best_prompt": 21.3,
+            "prompt_id": "p1",
+            "CPS": 20.13915,
+            "is_dummy": false,
+            "std_accuracy": 9.439676901250381
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 4.01,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 2.5,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.67,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.06,
+            "best_prompt": 8.67,
+            "prompt_id": "p3",
+            "CPS": 8.357013,
+            "is_dummy": false,
+            "std_accuracy": 3.2162244946520757
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_IT.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..a8d2736e1fdc621ba8a7865b0898212c3103f0a6
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 9.225035466666668,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 12.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 23.27,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 24.44,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 20.106666666666666,
+            "best_prompt": 24.44,
+            "prompt_id": "p3",
+            "CPS": 23.380933333333335,
+            "is_dummy": false,
+            "std_accuracy": 6.5186067018446
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.04,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 14.92,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 18.650000000000002,
+            "best_prompt": 24.04,
+            "prompt_id": "p1",
+            "CPS": 22.744244000000002,
+            "is_dummy": false,
+            "std_accuracy": 4.7812446078400965
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_PL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..e0599da2f28b325c997b59c07291b8b641ae9509
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 19.314392833333333,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 24.52,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 23.380000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 24.52,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 24.14,
+            "best_prompt": 24.52,
+            "prompt_id": "p1",
+            "CPS": 24.426824,
+            "is_dummy": false,
+            "std_accuracy": 0.6581793068761717
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 15.010000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 1.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 12.64,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 9.626666666666667,
+            "best_prompt": 15.010000000000002,
+            "prompt_id": "p1",
+            "CPS": 14.201961666666667,
+            "is_dummy": false,
+            "std_accuracy": 7.367647747641939
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SK.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..e049cbb1535fdab83832d8918c7233ae21389dce
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 16.691507333333334,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 27.169999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 31.78,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 27.169999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.706666666666667,
+            "best_prompt": 31.78,
+            "prompt_id": "p2",
+            "CPS": 30.803294666666666,
+            "is_dummy": false,
+            "std_accuracy": 2.6615847409641766
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.43,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 2.6,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 1.43,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 1.82,
+            "best_prompt": 2.6,
+            "prompt_id": "p2",
+            "CPS": 2.57972,
+            "is_dummy": false,
+            "std_accuracy": 0.6754998149518622
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..63e6960b5e06d5b9c360d7afb172496c8bd16cfe
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 12.605178333333333,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 25.19,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 18.529999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 25.19,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 22.97,
+            "best_prompt": 25.19,
+            "prompt_id": "p1",
+            "CPS": 24.630782,
+            "is_dummy": false,
+            "std_accuracy": 3.8451527928029097
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.47000000000000003,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.58,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.47000000000000003,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.5066666666666667,
+            "best_prompt": 0.58,
+            "prompt_id": "p2",
+            "CPS": 0.5795746666666666,
+            "is_dummy": false,
+            "std_accuracy": 0.06350852961085879
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..ad6d65e78f5c187255c34d8ebaebcf4483a86c4f
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 34.895077066666666,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 58.4,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 54.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.28,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 57.29666666666666,
+            "best_prompt": 59.28,
+            "prompt_id": "p3",
+            "CPS": 58.104279999999996,
+            "is_dummy": false,
+            "std_accuracy": 2.709101942218737
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.15,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 51.45333333333334,
+            "best_prompt": 55.86,
+            "prompt_id": "p2",
+            "CPS": 53.398436000000004,
+            "is_dummy": false,
+            "std_accuracy": 7.026665876027785
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 27.92,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 18.16,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 3.5000000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 16.526666666666667,
+            "best_prompt": 27.92,
+            "prompt_id": "p1",
+            "CPS": 24.738981333333335,
+            "is_dummy": false,
+            "std_accuracy": 12.291661129942256
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.08,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.580000000000002,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.77,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 10.810000000000002,
+            "best_prompt": 16.580000000000002,
+            "prompt_id": "p2",
+            "CPS": 15.623334000000002,
+            "is_dummy": false,
+            "std_accuracy": 5.067908838959124
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 2.11,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 26.779999999999998,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 4.74,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 11.209999999999999,
+            "best_prompt": 26.779999999999998,
+            "prompt_id": "p2",
+            "CPS": 22.610354,
+            "is_dummy": false,
+            "std_accuracy": 13.547985090041987
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..5d46686fafe4d41a7d6e122ac2cd8759bab92f23
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 36.36378083333333,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 33.45,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 36.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 33.45,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 34.483333333333334,
+            "best_prompt": 36.55,
+            "prompt_id": "p2",
+            "CPS": 35.79463333333333,
+            "is_dummy": false,
+            "std_accuracy": 1.7897858344878366
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.49,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 37.55,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.68,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.906666666666666,
+            "best_prompt": 37.55,
+            "prompt_id": "p2",
+            "CPS": 36.93292833333333,
+            "is_dummy": false,
+            "std_accuracy": 2.7945363360195072
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..ddef3463e79980f6f3fdb2bb1c641bddd79c3dac
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_IT.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 25.310261866666664,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 58.209999999999994,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 54.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 56.220000000000006,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 56.25,
+            "best_prompt": 58.209999999999994,
+            "prompt_id": "p1",
+            "CPS": 57.069084,
+            "is_dummy": false,
+            "std_accuracy": 1.94517351411127
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 46.22,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 54.58,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.97,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 52.25666666666666,
+            "best_prompt": 55.97,
+            "prompt_id": "p3",
+            "CPS": 53.89164733333333,
+            "is_dummy": false,
+            "std_accuracy": 5.273901149370675
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 7.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.4000000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 1.5699999999999998,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 4.06,
+            "best_prompt": 7.21,
+            "prompt_id": "p1",
+            "CPS": 6.9828850000000005,
+            "is_dummy": false,
+            "std_accuracy": 2.8773425239272434
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.8599999999999999,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 6.68,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 7.739999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.426666666666666,
+            "best_prompt": 7.739999999999999,
+            "prompt_id": "p3",
+            "CPS": 7.560947999999999,
+            "is_dummy": false,
+            "std_accuracy": 3.133964475442141
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 1.05,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.44,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.73,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.7399999999999999,
+            "best_prompt": 1.05,
+            "prompt_id": "p1",
+            "CPS": 1.046745,
+            "is_dummy": false,
+            "std_accuracy": 0.3051229260478472
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..624360653ccd6f84d827609c1915ba0339b31d66
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 48.75862866666667,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 43.04,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.04,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.43666666666667,
+            "best_prompt": 43.04,
+            "prompt_id": "p1",
+            "CPS": 42.78032533333333,
+            "is_dummy": false,
+            "std_accuracy": 1.045003987233224
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.71,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 54.89000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 53.96333333333334,
+            "best_prompt": 55.71,
+            "prompt_id": "p2",
+            "CPS": 54.736932,
+            "is_dummy": false,
+            "std_accuracy": 2.3511982760569863
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..bc4305eba880b843f1805753bd555c7e84842d8a
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 44.8562175,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 45.45,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 41.160000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 45.45,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 44.02,
+            "best_prompt": 45.45,
+            "prompt_id": "p1",
+            "CPS": 44.800065000000004,
+            "is_dummy": false,
+            "std_accuracy": 2.476832654823494
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.5,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 46.949999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 43.38,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 42.60999999999999,
+            "best_prompt": 46.949999999999996,
+            "prompt_id": "p2",
+            "CPS": 44.912369999999996,
+            "is_dummy": false,
+            "std_accuracy": 4.771823550803192
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..ae741da7f8339df7e872b5639bf0c48c40836de3
--- /dev/null
+++ b/e3c_llm_results/tiiuae/Falcon3-10B-Instruct_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 38.88441916666667,
+    "config": {
+        "model_name": "tiiuae/Falcon3-10B-Instruct",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "tiiuae/Falcon3-10B-Instruct",
+        "base_model": "LlamaForCausalLM",
+        "revision": "8799bc6aec0152757221dc6b272d824642db6202",
+        "submitted_time": "2024-12-14 05:17:25+00:00",
+        "num_params_billion": 10.30565376,
+        "language": ""
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 41.21,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 39.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 41.21,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.50333333333334,
+            "best_prompt": 41.21,
+            "prompt_id": "p1",
+            "CPS": 40.918782666666665,
+            "is_dummy": false,
+            "std_accuracy": 1.223982570682005
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 23.23,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 30.12,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.63,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 31.326666666666668,
+            "best_prompt": 40.63,
+            "prompt_id": "p3",
+            "CPS": 36.85005566666667,
+            "is_dummy": false,
+            "std_accuracy": 8.762535782142825
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_0_EN.json b/e3c_llm_results/unsloth/phi-4_0_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..2545b65e5fa76897736dfb11a0171847d68b5456
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_0_EN.json
@@ -0,0 +1,157 @@
+{
+    "average_CPS": 9.439416133333335,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 2.52,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 5.72,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 2.7466666666666666,
+            "best_prompt": 5.72,
+            "prompt_id": "p3",
+            "CPS": 5.549925333333333,
+            "is_dummy": false,
+            "std_accuracy": 2.8667286814997563
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 40.22,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 42.19,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 40.300000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 40.903333333333336,
+            "best_prompt": 42.19,
+            "prompt_id": "p2",
+            "CPS": 41.64715533333334,
+            "is_dummy": false,
+            "std_accuracy": 1.115003736914513
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_0_GR.json b/e3c_llm_results/unsloth/phi-4_0_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..5f40f450f1d4931b4a767b4fd91e91843b836bb7
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_0_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 13.214538500000002,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 29.01,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 22.08,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 9.25,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 20.113333333333333,
+            "best_prompt": 29.01,
+            "prompt_id": "p1",
+            "CPS": 26.429077000000003,
+            "is_dummy": false,
+            "std_accuracy": 10.025728568704288
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_0_IT.json b/e3c_llm_results/unsloth/phi-4_0_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..53e10415ddc7b52a4cde3be0563f84032758c292
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_0_IT.json
@@ -0,0 +1,163 @@
+{
+    "average_CPS": 13.046800866666667,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 17.24,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 34.28,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 17.173333333333332,
+            "best_prompt": 34.28,
+            "prompt_id": "p3",
+            "CPS": 28.415834666666665,
+            "is_dummy": false,
+            "std_accuracy": 17.1400972381528
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 33.54,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 37.37,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 36.77,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 35.89333333333334,
+            "best_prompt": 37.37,
+            "prompt_id": "p2",
+            "CPS": 36.81816966666667,
+            "is_dummy": false,
+            "std_accuracy": 2.0600080905989993
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 0.0,
+            "best_prompt": 0.0,
+            "prompt_id": "p1",
+            "CPS": 0.0,
+            "is_dummy": false,
+            "std_accuracy": 0.0
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_0_PL.json b/e3c_llm_results/unsloth/phi-4_0_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..c43c1e9732a47cc964593f298f5929c8204806b1
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_0_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 20.92978433333333,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 2.36,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 3.66,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 2.36,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 2.793333333333333,
+            "best_prompt": 3.66,
+            "prompt_id": "p2",
+            "CPS": 3.62828,
+            "is_dummy": false,
+            "std_accuracy": 0.7505553499465136
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.99,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 38.29,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.129999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 38.13666666666666,
+            "best_prompt": 38.29,
+            "prompt_id": "p2",
+            "CPS": 38.231288666666664,
+            "is_dummy": false,
+            "std_accuracy": 0.15011106998930138
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_0_SK.json b/e3c_llm_results/unsloth/phi-4_0_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..29845de4500953b65d294d3414a6bf74349b7bff
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_0_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 21.629032,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 3.16,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 10.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 3.16,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 5.673333333333333,
+            "best_prompt": 10.7,
+            "prompt_id": "p2",
+            "CPS": 10.162146666666667,
+            "is_dummy": false,
+            "std_accuracy": 4.353221029689778
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.519999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 33.26,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.519999999999996,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 32.766666666666666,
+            "best_prompt": 33.26,
+            "prompt_id": "p2",
+            "CPS": 33.09591733333333,
+            "is_dummy": false,
+            "std_accuracy": 0.4272391992003242
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_0_SL.json b/e3c_llm_results/unsloth/phi-4_0_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..702cc3093b4d1afceef0fa35986f4156eb9ffd17
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_0_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 28.7078975,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "0",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 28.7,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 9.81,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 28.7,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 22.403333333333332,
+            "best_prompt": 28.7,
+            "prompt_id": "p1",
+            "CPS": 26.892856666666667,
+            "is_dummy": false,
+            "std_accuracy": 10.90614658499203
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 32.09,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 17.44,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 32.09,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 27.206666666666667,
+            "best_prompt": 32.09,
+            "prompt_id": "p1",
+            "CPS": 30.522938333333336,
+            "is_dummy": false,
+            "std_accuracy": 8.45818144362802
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_10_EN.json b/e3c_llm_results/unsloth/phi-4_10_EN.json
new file mode 100644
index 0000000000000000000000000000000000000000..1cb35bbe7e0f2550fa21c9cad46ea26e028595f9
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_10_EN.json
@@ -0,0 +1,163 @@
+{
+    "average_CPS": 49.37233213333333,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "EN",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 60.980000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 57.11000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 61.41,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 59.833333333333336,
+            "best_prompt": 61.41,
+            "prompt_id": "p3",
+            "CPS": 60.441769,
+            "is_dummy": false,
+            "std_accuracy": 2.36825533533302
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 49.120000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.26,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.54,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 53.63999999999999,
+            "best_prompt": 56.26,
+            "prompt_id": "p2",
+            "CPS": 54.785987999999996,
+            "is_dummy": false,
+            "std_accuracy": 3.9309540826623723
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 38.41,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 32.89,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.91,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 21.91,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 28.779999999999998,
+            "best_prompt": 38.41,
+            "prompt_id": "p1",
+            "CPS": 34.711116999999994,
+            "is_dummy": false,
+            "std_accuracy": 8.24667205604782
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 42.620000000000005,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.3,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 42.54,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 47.153333333333336,
+            "best_prompt": 56.3,
+            "prompt_id": "p2",
+            "CPS": 51.15042666666667,
+            "is_dummy": false,
+            "std_accuracy": 7.9213466868540285
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 37.36,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.2,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 36.58,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 41.38,
+            "best_prompt": 50.2,
+            "prompt_id": "p2",
+            "CPS": 45.77236,
+            "is_dummy": false,
+            "std_accuracy": 7.648293927406296
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_10_GR.json b/e3c_llm_results/unsloth/phi-4_10_GR.json
new file mode 100644
index 0000000000000000000000000000000000000000..8c57b4bec48137c4f2a59ad8a1b369ae56f8560c
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_10_GR.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 55.776253,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "GR",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 57.17,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.11000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 57.17,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 56.81666666666666,
+            "best_prompt": 57.17,
+            "prompt_id": "p1",
+            "CPS": 56.96799933333333,
+            "is_dummy": false,
+            "std_accuracy": 0.6119912853410006
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 49.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.61,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 56.779999999999994,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 52.913333333333334,
+            "best_prompt": 56.779999999999994,
+            "prompt_id": "p3",
+            "CPS": 54.58450666666666,
+            "is_dummy": false,
+            "std_accuracy": 3.724276216036252
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_10_IT.json b/e3c_llm_results/unsloth/phi-4_10_IT.json
new file mode 100644
index 0000000000000000000000000000000000000000..5063a9c7a1389e6aa6729b7ed5bf1bb21267733b
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_10_IT.json
@@ -0,0 +1,163 @@
+{
+    "average_CPS": 49.70468556666667,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "IT",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 66.47,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 67.32000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 68.97,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 67.58666666666667,
+            "best_prompt": 68.97,
+            "prompt_id": "p3",
+            "CPS": 68.015915,
+            "is_dummy": false,
+            "std_accuracy": 1.2711543310445554
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 56.08,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 58.199999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 56.879999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 57.053333333333335,
+            "best_prompt": 58.199999999999996,
+            "prompt_id": "p2",
+            "CPS": 57.53264,
+            "is_dummy": false,
+            "std_accuracy": 1.0705761688611095
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 17.59,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 16.75,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.1,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 8.1,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 12.635000000000002,
+            "best_prompt": 17.59,
+            "prompt_id": "p1",
+            "CPS": 16.7184155,
+            "is_dummy": false,
+            "std_accuracy": 5.247783659158725
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 58.35,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 56.76,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.64,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 56.916666666666664,
+            "best_prompt": 58.35,
+            "prompt_id": "p1",
+            "CPS": 57.51365,
+            "is_dummy": false,
+            "std_accuracy": 1.3617758014200925
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.019999999999996,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 50.06,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 38.59,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 46.55666666666667,
+            "best_prompt": 51.019999999999996,
+            "prompt_id": "p1",
+            "CPS": 48.74280733333334,
+            "is_dummy": false,
+            "std_accuracy": 6.916012820500935
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_10_PL.json b/e3c_llm_results/unsloth/phi-4_10_PL.json
new file mode 100644
index 0000000000000000000000000000000000000000..3386ef045aa6738ecea0b40f79a48d241d4e4d66
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_10_PL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 56.63946383333333,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "PL",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 55.489999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 53.239999999999995,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.489999999999995,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 54.73999999999999,
+            "best_prompt": 55.489999999999995,
+            "prompt_id": "p1",
+            "CPS": 55.07382499999999,
+            "is_dummy": false,
+            "std_accuracy": 1.299038105676658
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 54.230000000000004,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 57.599999999999994,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 59.72,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 57.18333333333334,
+            "best_prompt": 59.72,
+            "prompt_id": "p3",
+            "CPS": 58.20510266666667,
+            "is_dummy": false,
+            "std_accuracy": 2.7686157792899535
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_10_SK.json b/e3c_llm_results/unsloth/phi-4_10_SK.json
new file mode 100644
index 0000000000000000000000000000000000000000..07330908077a33052c93af7b88f10bfc739383de
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_10_SK.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 54.49931766666667,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SK",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 55.61000000000001,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 54.49,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.61000000000001,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 55.23666666666667,
+            "best_prompt": 55.61000000000001,
+            "prompt_id": "p1",
+            "CPS": 55.40238933333334,
+            "is_dummy": false,
+            "std_accuracy": 0.6466323014923835
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.06,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 49.94,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.410000000000004,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 52.13666666666666,
+            "best_prompt": 55.410000000000004,
+            "prompt_id": "p3",
+            "CPS": 53.596246,
+            "is_dummy": false,
+            "std_accuracy": 2.8895732095472764
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/e3c_llm_results/unsloth/phi-4_10_SL.json b/e3c_llm_results/unsloth/phi-4_10_SL.json
new file mode 100644
index 0000000000000000000000000000000000000000..6891b0d5b79ca8f4d04c6f4d23fee553cd3c3a75
--- /dev/null
+++ b/e3c_llm_results/unsloth/phi-4_10_SL.json
@@ -0,0 +1,121 @@
+{
+    "average_CPS": 55.04669683333333,
+    "config": {
+        "model_name": "unsloth/phi-4",
+        "num_fewshot": "10",
+        "batch_size": 1,
+        "LANG": "SL",
+        "model": "unsloth/phi-4",
+        "base_model": "LlamaForCausalLM",
+        "revision": "c6220bde10fff762dbd72c3331894aa4cade249d",
+        "submitted_time": "2025-01-08 21:56:16+00:00",
+        "num_params_billion": 14.6595072,
+        "language": "en"
+    },
+    "tasks": {
+        "NER": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 55.86,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 55.58,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.86,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 55.76666666666667,
+            "best_prompt": 55.86,
+            "prompt_id": "p1",
+            "CPS": 55.807864,
+            "is_dummy": false,
+            "std_accuracy": 0.16165807537309587
+        },
+        "RE": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 51.17,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p2",
+                    "metric": "f1",
+                    "value": 52.32,
+                    "stderr": 0.0
+                },
+                {
+                    "prompt": "p3",
+                    "metric": "f1",
+                    "value": 55.78999999999999,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": 53.093333333333334,
+            "best_prompt": 55.78999999999999,
+            "prompt_id": "p3",
+            "CPS": 54.28552966666666,
+            "is_dummy": false,
+            "std_accuracy": 2.405126469301211
+        },
+        "HIS": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "DIA": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        },
+        "RML": {
+            "prompts": [
+                {
+                    "prompt": "p1",
+                    "metric": "f1",
+                    "value": 0.0,
+                    "stderr": 0.0
+                }
+            ],
+            "average_accuracy": null,
+            "std_accuracy": null,
+            "best_prompt": null,
+            "prompt_id": null,
+            "CPS": null,
+            "is_dummy": true
+        }
+    }
+}
\ No newline at end of file
diff --git a/example_app.py b/example_app.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6e712f9ac66b7f5ae4305c0615540fde9141d85
--- /dev/null
+++ b/example_app.py
@@ -0,0 +1,324 @@
+import gradio as gr
+from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
+import pandas as pd
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
+
+from src.about import (
+    CITATION_BUTTON_LABEL,
+    CITATION_BUTTON_TEXT,
+    EVALUATION_QUEUE_TEXT,
+    INTRODUCTION_TEXT,
+    LLM_BENCHMARKS_TEXT,
+    TITLE,
+)
+
+from src.tasks import (
+    TE_DESCRIPTION,
+)
+
+from src.display.css_html_js import custom_css
+from src.display.utils import (
+    BENCHMARK_COLS,
+    COLS,
+    EVAL_COLS,
+    EVAL_TYPES,
+    AutoEvalColumn,
+    ModelType,
+    fields,
+    WeightType,
+    Precision
+)
+from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
+from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.submission.submit import add_new_eval
+
+
+def restart_space():
+    API.restart_space(repo_id=REPO_ID)
+
+### Space initialisation
+try:
+    print(EVAL_REQUESTS_PATH)
+    snapshot_download(
+        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
+    )
+except Exception:
+    restart_space()
+try:
+    print(EVAL_RESULTS_PATH)
+    snapshot_download(
+        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
+    )
+except Exception:
+    restart_space()
+
+
+LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
+
+(
+    finished_eval_queue_df,
+    running_eval_queue_df,
+    pending_eval_queue_df,
+) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+
+def init_leaderboard(dataframe):
+    print(dataframe)
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+    return Leaderboard(
+        value=dataframe,
+        datatype=[c.type for c in fields(AutoEvalColumn)],
+        select_columns=SelectColumns(
+            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
+            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
+            label="Select Columns to Display:",
+        ),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
+            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
+            ColumnFilter(
+                AutoEvalColumn.params.name,
+                type="slider",
+                min=0.01,
+                max=150,
+                label="Select the number of parameters (B)",
+            ),
+            ColumnFilter(
+                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
+            ),
+        ],
+        bool_checkboxgroup_label="Hide models",
+        interactive=False,
+    )
+
+
+def init_leaderboard2(dataframe, default_selection=None, hidden_columns=None):
+
+    print("entrato===============================================")
+
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+    return Leaderboard(
+        value=dataframe,
+        datatype=[c.type for c in fields(AutoEvalColumn)],
+        select_columns=SelectColumns(
+            default_selection=default_selection or [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
+            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
+            label="Select Columns to Display:",
+        ),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=hidden_columns or [c.name for c in fields(AutoEvalColumn) if c.hidden],
+        filter_columns=[
+            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
+            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
+            ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0.01, max=150, label="Select the number of parameters (B)"),
+            ColumnFilter(AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True),
+        ],
+        bool_checkboxgroup_label="Hide models",
+        interactive=False,
+    )
+
+
+demo = gr.Blocks(css=custom_css)
+with demo:
+    gr.HTML(TITLE)
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 EVALITA-LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
+            #leaderboard = init_leaderboard(LEADERBOARD_DF)
+
+            leaderboard = init_leaderboard2(
+                LEADERBOARD_DF,
+                default_selection=['T', 'Model', "Average ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
+                hidden_columns=[col for col in LEADERBOARD_DF.columns if
+                                col not in ['T', 'Model', "Average ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL" ]]
+            )
+
+
+        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
+            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+
+        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
+            with gr.Column():
+                with gr.Row():
+                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+
+                with gr.Column():
+                    with gr.Accordion(
+                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
+                        open=False,
+                    ):
+                        with gr.Row():
+                            finished_eval_table = gr.components.Dataframe(
+                                value=finished_eval_queue_df,
+                                headers=EVAL_COLS,
+                                datatype=EVAL_TYPES,
+                                row_count=5,
+                            )
+                    with gr.Accordion(
+                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
+                        open=False,
+                    ):
+                        with gr.Row():
+                            running_eval_table = gr.components.Dataframe(
+                                value=running_eval_queue_df,
+                                headers=EVAL_COLS,
+                                datatype=EVAL_TYPES,
+                                row_count=5,
+                            )
+
+                    with gr.Accordion(
+                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
+                        open=False,
+                    ):
+                        with gr.Row():
+                            pending_eval_table = gr.components.Dataframe(
+                                value=pending_eval_queue_df,
+                                headers=EVAL_COLS,
+                                datatype=EVAL_TYPES,
+                                row_count=5,
+                            )
+            with gr.Row():
+                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+
+            with gr.Row():
+                with gr.Column():
+                    model_name_textbox = gr.Textbox(label="Model name")
+                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
+                    model_type = gr.Dropdown(
+                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
+                        label="Model type",
+                        multiselect=False,
+                        value=None,
+                        interactive=True,
+                    )
+
+                with gr.Column():
+                    precision = gr.Dropdown(
+                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
+                        label="Precision",
+                        multiselect=False,
+                        value="float16",
+                        interactive=True,
+                    )
+                    weight_type = gr.Dropdown(
+                        choices=[i.value.name for i in WeightType],
+                        label="Weights type",
+                        multiselect=False,
+                        value="Original",
+                        interactive=True,
+                    )
+                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+
+            submit_button = gr.Button("Submit Eval")
+            submission_result = gr.Markdown()
+            submit_button.click(
+                add_new_eval,
+                [
+                    model_name_textbox,
+                    base_model_name_textbox,
+                    revision_name_textbox,
+                    precision,
+                    weight_type,
+                    model_type,
+                ],
+                submission_result,
+            )
+
+
+        with gr.TabItem("TE", elem_id="llm-benchmark-tab-table", id=4):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+            #leaderboard = init_leaderboard(LEADERBOARD_DF)
+
+            LEADERBOARD_DF_TE = LEADERBOARD_DF.rename(columns={"TE Prompt Average": "Prompt Average",
+                                                            "TE Best Prompt": "Best Prompt",
+                                                            "TE Best Prompt Id": "Best Prompt Id",
+                                                            "TE": "Combined Performance"})
+
+            leaderboard = init_leaderboard2(
+                LEADERBOARD_DF_TE,
+                default_selection=['T', 'Model', 'Combined Performance', 'Prompt Average', 'Best Prompt', 'Best Prompt Id'],
+                hidden_columns=[col for col in LEADERBOARD_DF.columns if
+                                col not in ['T', 'Model', 'Combined Performance', 'Prompt Average', 'Best Prompt', 'Best Prompt Id']]
+            )
+
+
+        with gr.TabItem("SA", elem_id="llm-benchmark-tab-table", id=5):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+
+            LEADERBOARD_DF_SA = LEADERBOARD_DF.rename(columns={"SA Prompt Average": "Prompt Average",
+                                                            "SA Best Prompt": "Best Prompt",
+                                                            "SA Best Prompt Id": "Best Prompt Id",
+                                                            "SA": "Combined Performance"})
+
+            leaderboard = init_leaderboard2(
+                LEADERBOARD_DF_SA,
+                default_selection=['T', 'Model', 'Combined Performance', 'Prompt Average', 'Best Prompt',
+                                   'Best Prompt Id'],
+                hidden_columns=[col for col in LEADERBOARD_DF.columns if
+                                col not in ['T', 'Model', 'Combined Performance', 'Prompt Average', 'Best Prompt',
+                                            'Best Prompt Id']]
+            )
+
+
+
+
+        with gr.TabItem("HS", elem_id="llm-benchmark-tab-table", id=6):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+
+            LEADERBOARD_DF_HS = LEADERBOARD_DF.rename(columns={"HS Prompt Average": "Prompt Average",
+                                                               "HS Best Prompt": "Best Prompt",
+                                                               "HS Best Prompt Id": "Best Prompt Id",
+                                                               "HS": "Combined Performance"})
+
+            leaderboard = init_leaderboard2(
+                LEADERBOARD_DF_HS,
+                default_selection=['T', 'Model', 'Combined Performance', 'Prompt Average', 'Best Prompt',
+                                   'Best Prompt Id'],
+                hidden_columns=[col for col in LEADERBOARD_DF.columns if
+                                col not in ['T', 'Model', 'Combined Performance', 'Prompt Average', 'Best Prompt',
+                                            'Best Prompt Id']]
+            )
+
+
+
+        with gr.TabItem("AT", elem_id="llm-benchmark-tab-table", id=7):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+
+        with gr.TabItem("WIC", elem_id="llm-benchmark-tab-table", id=8):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+
+        with gr.TabItem("FAQ", elem_id="llm-benchmark-tab-table", id=9):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+
+        with gr.TabItem("LS", elem_id="llm-benchmark-tab-table", id=10):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+
+        with gr.TabItem("SU", elem_id="llm-benchmark-tab-table", id=11):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+
+        with gr.TabItem("NER", elem_id="llm-benchmark-tab-table", id=12):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+
+        with gr.TabItem("REL", elem_id="llm-benchmark-tab-table", id=13):
+            gr.Markdown(TE_DESCRIPTION, elem_classes="markdown-text")
+
+
+    with gr.Row():
+        with gr.Accordion("📙 Citation", open=False):
+            citation_button = gr.Textbox(
+                value=CITATION_BUTTON_TEXT,
+                label=CITATION_BUTTON_LABEL,
+                lines=20,
+                elem_id="citation-button",
+                show_copy_button=True,
+            )
+
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=1800)
+scheduler.start()
+demo.queue(default_concurrency_limit=40).launch()
\ No newline at end of file
diff --git a/example_app2.py b/example_app2.py
new file mode 100644
index 0000000000000000000000000000000000000000..9268e66807d66f4d99c6c97a748691f46972e4e8
--- /dev/null
+++ b/example_app2.py
@@ -0,0 +1,216 @@
+import gradio as gr
+from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
+import pandas as pd
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import snapshot_download
+
+from src.about import (
+    CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT,
+    INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE
+)
+from src.tasks import TASK_DESCRIPTIONS, MEASURE_DESCRIPTION
+from src.display.css_html_js import custom_css
+from src.display.utils import (
+    BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES, AutoEvalColumn,
+    ModelType, fields, WeightType, Precision
+)
+from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
+from src.populate import get_evaluation_queue_df, get_leaderboard_df
+from src.submission.submit import add_new_eval
+
+
+
+
+# Define the task icons and names
+TASK_ICONS = {
+    "TE": "📊",  # Textual Entailment
+    "SA": "😃",  # Sentiment Analysis
+    "HS": "⚠️",  # Hate Speech
+    "AT": "🏥",  # Admission Test
+    "WIC": "🔤",  # Word in Context
+    "FAQ": "❓",  # Frequently Asked Questions
+    "LS": "🔄",  # Lexical Substitution
+    "SU": "📝",  # Summarization
+    "NER": "🏷️",  # Named Entity Recognition
+    "REL": "🔗",  # Relation Extraction
+}
+
+TASK_NAMES = {
+    "TE": "Textual Entailment",
+    "SA": "Sentiment Analysis",
+    "HS": "Hate Speech",
+    "AT": "Admission Test",
+    "WIC": "Word in Context",
+    "FAQ": "Frequently Asked Questions",
+    "LS": "Lexical Substitution",
+    "SU": "Summarization",
+    "NER": "Named Entity Recognition",
+    "REL": "Relation Extraction",
+}
+
+
+# Tooltip descriptions for each task
+TASK_TOOLTIPS = {
+    "TE": "Identify logical relationships between two text segments.",
+    "SA": "Classify the sentiment (positive, negative, neutral) of a text.",
+    "HS": "Detect hate speech in a text.",
+    "AT": "Classify whether a clinical statement pertains to an admission test.",
+    "WIC": "Identify words in context and their meaning.",
+    "FAQ": "Answer frequently asked questions based on given text.",
+    "LS": "Identify alternative words in a given context.",
+    "SU": "Summarize long text into a shorter version.",
+    "NER": "Identify named entities (e.g., persons, locations, organizations) in text.",
+    "REL": "Extract and link laboratory test results to the respective tests in clinical narratives.",
+}
+
+
+
+
+def restart_space():
+    """Restart the Hugging Face space."""
+    API.restart_space(repo_id=REPO_ID)
+
+
+def download_snapshot(repo, local_dir):
+    """Try to download a snapshot from the Hugging Face Hub, restarting space on failure."""
+    try:
+        print(f"Downloading from {repo} to {local_dir}...")
+        snapshot_download(repo_id=repo, local_dir=local_dir, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN)
+    except Exception as e:
+        print(f"Error downloading {repo}: {e}")
+        restart_space()
+
+
+# Space initialization
+download_snapshot(QUEUE_REPO, EVAL_REQUESTS_PATH)
+download_snapshot(RESULTS_REPO, EVAL_RESULTS_PATH)
+
+# Load leaderboard and evaluation queue data
+LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
+finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
+
+
+def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
+    """Initialize a leaderboard with specific columns."""
+    if dataframe is None or dataframe.empty:
+        raise ValueError("Leaderboard DataFrame is empty or None.")
+
+    return Leaderboard(
+        value=dataframe,
+        datatype=[c.type for c in fields(AutoEvalColumn)],
+        select_columns=SelectColumns(
+            default_selection=default_selection or [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
+            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
+            label="Select Columns to Display:",
+        ),
+        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
+        hide_columns=hidden_columns or [c.name for c in fields(AutoEvalColumn) if c.hidden],
+        filter_columns=[
+            #ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
+            ColumnFilter(AutoEvalColumn.fewshot_type.name, type="checkboxgroup", label="Few-Shot Learning (FS)"),
+            #ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
+            ColumnFilter(AutoEvalColumn.params.name, type="slider", min=0.01, max=150, label="Select the number of parameters (B)"),
+            #ColumnFilter(AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True),
+        ],
+        bool_checkboxgroup_label="Hide models",
+        interactive=False,
+    )
+
+
+def prepare_leaderboard_df(df, task_prefix):
+    """Rename columns for a specific task to a standard format."""
+    return df.rename(columns={
+        f"{task_prefix} Prompt Average": "Prompt Average",
+        f"{task_prefix} Best Prompt": "Best Prompt",
+        f"{task_prefix} Best Prompt Id": "Best Prompt Id",
+        task_prefix: "Combined Performance"
+    })
+
+
+demo = gr.Blocks(css=custom_css)
+with demo:
+    gr.HTML(TITLE)
+    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
+
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        # Main leaderboard tab
+        with gr.TabItem("🏅 EVALITA-LLM Benchmark", elem_id="llm-benchmark-tab-table"):
+            leaderboard = init_leaderboard(
+                LEADERBOARD_DF,
+                default_selection=['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
+                hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in
+                                ['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
+            )
+
+        # About tab
+        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table"):
+            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+
+        '''
+        # Submission tab
+        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table"):
+            gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
+
+            for queue_name, queue_df in [
+                ("✅ Finished Evaluations", finished_eval_queue_df),
+                ("🔄 Running Evaluation Queue", running_eval_queue_df),
+                ("⏳ Pending Evaluation Queue", pending_eval_queue_df)
+            ]:
+                with gr.Accordion(f"{queue_name} ({len(queue_df)})", open=False):
+                    gr.components.Dataframe(value=queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5)
+
+            gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+            with gr.Row():
+                model_name_textbox = gr.Textbox(label="Model name")
+                revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
+                model_type = gr.Dropdown(choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
+                                         label="Model type", multiselect=False, interactive=True)
+                precision = gr.Dropdown(choices=[i.value.name for i in Precision if i != Precision.Unknown],
+                                        label="Precision", multiselect=False, value="float16", interactive=True)
+                weight_type = gr.Dropdown(choices=[i.value.name for i in WeightType],
+                                          label="Weights type", multiselect=False, value="Original", interactive=True)
+                base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+
+            submit_button = gr.Button("Submit Eval")
+            submission_result = gr.Markdown()
+            submit_button.click(
+                add_new_eval,
+                [model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type],
+                submission_result,
+            )
+        '''
+
+        # Task-specific leaderboards
+        for task in ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]:
+
+            with gr.TabItem(f"{TASK_ICONS[task]}{task}", elem_id="llm-benchmark-tab-table"):
+
+                task_description = TASK_DESCRIPTIONS.get(task, "Description not available.")
+
+
+
+
+                gr.Markdown(task_description, elem_classes="markdown-text")
+
+
+                gr.Markdown(MEASURE_DESCRIPTION, elem_classes="markdown-text")
+
+
+
+                leaderboard = init_leaderboard(
+                    prepare_leaderboard_df(LEADERBOARD_DF, task),
+                    default_selection=['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Best Prompt', 'Best Prompt Id'],
+                    hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in
+                                    ['FS', 'Model', 'Combined Performance', 'Prompt Average', 'Best Prompt', 'Best Prompt Id']]
+                )
+
+    # Citation section
+    with gr.Accordion("📙 Citation", open=False):
+        gr.Textbox(value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True)
+
+# Background job to restart space
+scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=1800)
+scheduler.start()
+
+demo.queue(default_concurrency_limit=40).launch()
\ No newline at end of file
diff --git a/get_model_info.py b/get_model_info.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a1f893faac9c892eff482d8027cd1fe724a1c6c
--- /dev/null
+++ b/get_model_info.py
@@ -0,0 +1,128 @@
+"""
+MODEL METADATA EXTRACTOR
+
+This script processes model evaluation output files (input_folder) from the lm-eval-harness library,
+extracts model identifiers, retrieves detailed metadata from HuggingFace
+and saves the information as structured JSON files (output_folder).
+
+Input: Directory containing .out files from lm-eval-harness
+Output: Directory with JSON files containing model metadata
+"""
+
+# Example input file format (lm-eval-harness output):
+'''
+hf (pretrained=swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA,trust_remote_code=True), gen_kwargs: (None), limit: None, num_fewshot: 5, batch_size: 1
+|         Tasks          |Version|Filter|n-shot| Metric |   |Value |   |Stderr|
+|------------------------|------:|------|-----:|--------|---|-----:|---|------|
+|evalita-mp              |      1|none  |      |acc     |↑  |0.5605|±  |0.0052|
+...
+Job completed
+'''
+
+# Example output JSON format:
+'''
+{
+    "model": "swap-uniba/LLaMAntino-3-ANITA-8B-Inst-DPO-ITA",
+    "base_model": "LlamaForCausalLM",
+    "revision": "2b6e46e4c9d341dc8bf8350a167492c880116b66",
+    "submitted_time": "2024-04-29 09:34:12+00:00",
+    "num_params_billion": 8.030261248,
+    "language": "en_it"
+}
+'''
+
+import os
+import re
+import json
+from huggingface_hub import HfApi
+
+# Configures the Hugging Face token (if needed)
+# TOKEN = "YOUR_HUGGINGFACE_API_TOKEN"
+api = HfApi()
+
+# Directory paths
+# input_folder: Directory containing the output files of the lm-eval-harness library, including model accuracy metrics.
+input_folder = "../evalita_llm_models_output/"
+# output_folder: Directory where JSON files with model characteristics will be saved.
+output_folder = "../evalita_llm_requests/"
+
+# Creates the output folder if it doesn't exist
+os.makedirs(output_folder, exist_ok=True)
+
+# Regular expression to find the model name
+model_pattern = re.compile(r"pretrained=([\w\-./]+)")
+
+# Scans files in the input folder
+for filename in os.listdir(input_folder):
+    if filename.endswith('.out'):
+        file_path = os.path.join(input_folder, filename)
+
+        # Reads the file content
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        # Extracts the model name
+        match = model_pattern.search(content)
+        if match:
+            model_name = match.group(1)
+            print(f"Processing model: {model_name}")
+
+            try:
+                # Retrieves model information from HuggingFace
+                model_info = api.model_info(model_name)
+
+                # Calculates the number of parameters in billions, if available
+                num_params = None
+                if model_info.safetensors and "BF16" in model_info.safetensors.parameters:
+                    num_params = model_info.safetensors.parameters["BF16"] / 1e9  # Convert to billions
+
+                # Extracts and concatenates languages
+                language = "_".join(model_info.card_data.get("language", [])) if model_info.card_data else ""
+
+                #print(model_info)
+
+                # Builds the dictionary with required metadata
+                model_data = {
+                    "model": model_name,
+                    "base_model": model_info.config.get("architectures", [""])[0] if model_info.config else "",
+                    "revision": model_info.sha,
+                    # "precision": "bfloat16",  # If available, replace with real value
+                    # "weight_type": "Original",
+                    # "status": "FINISHED",
+                    "submitted_time": str(model_info.created_at),
+                    # "model_type": "pretrained",
+                    # "likes": model_info.likes,
+                    # "params": model_info.safetensors_size_in_bytes / 1e9 if model_info.safetensors_size_in_bytes else None,
+                    # "license": model_info.license,
+                    # "private": model_info.private,
+                    "num_params_billion": num_params,  # Number of parameters in billions
+                    "language": language,  # Extracted language
+                }
+
+                # Separates the model_name into two parts: directory name and file name
+                if "/" in model_name:
+                    dir_name, file_name = model_name.split("/", 1)
+                else:
+                    dir_name, file_name = model_name, model_name  # If no "/", use the same name
+
+                # Creates the folder for saving the produced json files
+                model_output_folder = os.path.join(output_folder, dir_name)
+                os.makedirs(model_output_folder, exist_ok=True)
+
+                # Saves the JSON file in the appropriate folder
+                output_file = os.path.join(model_output_folder, f"{file_name}.json")
+
+                # Check if the file already exists
+                if os.path.exists(output_file):
+                    print(f"File {output_file} already exists. Skipping...")
+                    continue
+
+                with open(output_file, "w", encoding="utf-8") as f:
+                    json.dump(model_data, f, indent=4)
+
+                print(f"Saved metadata for {model_name} in {output_file}")
+
+            except Exception as e:
+                print(f"Error retrieving info for {model_name}: {e}")
+
+            print("Process finished!")
\ No newline at end of file
diff --git a/preprocess_models_output.py b/preprocess_models_output.py
new file mode 100644
index 0000000000000000000000000000000000000000..62e2a1bcc07c528120cb90c29ab115b41dcc71ca
--- /dev/null
+++ b/preprocess_models_output.py
@@ -0,0 +1,290 @@
+"""
+EVALITA LLM EVALUATION PROCESSOR
+
+Transforms raw model evaluation outputs into structured performance reports for leaderboard integration.
+
+DATA PIPELINE OVERVIEW:
+
+1. Inputs:
+   - Evaluation Results: Raw .out files from lm-eval-harness
+   - Model Metadata: Pre-collected .json files from HuggingFace
+
+2. Output:
+   - Comprehensive evaluation reports in JSON format
+   - Ready for ingestion into the evaluation leaderboard
+
+--------------------------------------------------------------------
+INPUT SPECIFICATION
+
+Evaluation Results (.out format):
+   hf (pretrained=model-org/model-name), num_fewshot: 5, batch_size: 1
+   | Task          | Metric | Value  | Stderr |
+   |---------------|--------|--------|--------|
+   | main-task     | acc    | 0.5605 | 0.0052 |
+   | - sub-task    | acc    | 0.4640 | 0.0088 |
+   |   - prompt-1  | acc    | 0.3720 | 0.0216 |
+
+Model Metadata (.json format):
+   {
+     "model": "model-org/model-name",
+     "base_model": "ModelArchitecture",
+     "revision": "git_commit_hash",
+     "parameters": 8.03,
+     "language": "en_it"
+   }
+
+--------------------------------------------------------------------
+OUTPUT SPECIFICATION
+
+Evaluation Report (.json format):
+   {
+     "summary_metrics": {
+       "average_CPS": 41.74,
+       "num_tasks": 12
+     },
+     "model_config": {
+       "identifier": "model-org/model-name",
+       "architecture": "ModelArchitecture",
+       "parameters": 8.03,
+       "evaluation_settings": {
+         "fewshot": 5,
+         "batch_size": 1
+       }
+     },
+     "task_results": {
+       "task-name": {
+         "average_score": 52.60,
+         "best_prompt": {
+           "id": "prompt-6",
+           "score": 66.57
+         },
+         "prompt_analysis": [
+           {
+             "prompt_id": "prompt-1",
+             "score": 37.20,
+             "stderr": 0.0216
+           }
+         ]
+       }
+     }
+   }
+"""
+
+import json
+import os
+import re
+import statistics
+
+TASKS ={"NER", "RE", "DIA", "RML", "HIS" }
+
+def safe_float(value):
+    """Safely converts a value to float, returning None if the conversion fails."""
+    try:
+        return float(value)
+    except ValueError:
+        return None
+
+
+def calculate_task_metrics(task_info):
+    """Calculates average accuracy, best prompt accuracy, and CPS for a given task."""
+    accuracies = [prompt['value'] for prompt in task_info['prompts'] if prompt['value'] is not None]
+
+    if not accuracies:
+        return None
+
+    task_info['average_accuracy'] = sum(accuracies) / len(accuracies)
+    task_info['std_accuracy'] = statistics.stdev(accuracies) if len(accuracies) > 1 else 0.0
+    best_prompt_data = max(task_info['prompts'], key=lambda x: x['value'])
+    task_info['best_prompt'] = best_prompt_data['value']
+    task_info['prompt_id'] = best_prompt_data['prompt']
+
+    # Calculate CPS
+    avg_acc = task_info['average_accuracy']
+    best_acc = task_info['best_prompt']
+    task_info['CPS'] = (1 - (best_acc - avg_acc) / 100) * best_acc
+
+
+def extract_data_from_file(file_path):
+    """Extracts task and prompt data from a specified file."""
+    
+
+    LANG=""
+    if file_path.find ("__en__")!=-1 : LANG="EN"
+    if file_path.find ("__sl__")!=-1 : LANG="SL"
+    if file_path.find ("__it__")!=-1 : LANG="IT"
+    if file_path.find ("__gr__")!=-1 : LANG="GR"
+    if file_path.find ("__sk__")!=-1 : LANG="SK"
+    if file_path.find ("__pl__")!=-1 : LANG="PL"
+    if LANG=="" : 
+      print ("ERROR: ",file_path) 
+      
+    with open(file_path, 'r') as file:
+        lines = file.readlines()
+
+    tasks_data = {}
+    
+    current_task = None
+
+    for line in lines:
+        line = line.strip()
+
+        # Skips empty lines
+        if not line:
+            continue
+
+        # Skips header lines
+        if line.startswith("|         Tasks")  or line.startswith("   | Task"):
+            continue
+
+        # Extracts model configuration details
+        if line.startswith("hf (pretrained=") or line.startswith("hf(pretrained="):
+            start = line.find("pretrained=") + len("pretrained=")
+            end = line.find(" )", start)
+            pretrained_model = line[start:end]
+
+            num_fewshot_match = re.search(r"num_fewshot:\s*([\w\d]+)", line)
+            num_fewshot = num_fewshot_match.group(1) if num_fewshot_match else None
+
+            batch_size_match = re.search(r"batch_size:\s*(\d+)", line)
+            batch_size = int(batch_size_match.group(1)) if batch_size_match else None
+
+            continue
+
+        columns = line.split('|')
+        if len(columns) != 11:
+            continue
+        print (columns)
+        task_name = columns[1]
+        metric = columns[5].strip()
+        value = safe_float(columns[7])
+        stderr = safe_float(columns[9])
+        print (value)
+        # Skips normalized accuracy metrics
+        if metric == "acc_norm":
+            continue
+
+        # Identifies task and prompt sections in the file
+        if task_name.startswith(" - "):
+            task_name = task_name[3:].strip()
+            current_task = task_name
+            tasks_data.setdefault(current_task,
+                                  {'prompts': [], 'average_accuracy': 0, 'best_prompt': None, 'prompt_id': None,
+                                   'CPS': None, 'is_dummy': False  })
+
+        elif task_name.startswith("   - ") and current_task:
+            prompt_name = task_name[4:].strip()
+            prompt_data = {'prompt': prompt_name, 'metric': metric, 'value': value * 100,
+                           'stderr': stderr}
+            tasks_data[current_task]['prompts'].append(prompt_data)
+
+    # Special handling for evalita NER task to calculate weighted prompt averages
+    if "evalita NER" in tasks_data:
+        task_info = tasks_data["evalita NER"]
+        weight_map = {"ADG prompt-1": 521, "ADG prompt-2": 521, "FIC prompt-1": 1517, "FIC prompt-2": 1517,
+                      "WN prompt-1": 2088, "WN prompt-2": 2088}
+
+        weighted_values = {"prompt-1": 0, "prompt-2": 0}
+        total_weights = sum(weight_map.values())
+
+        for prompt in task_info['prompts']:
+            if prompt['prompt'] in weight_map:
+                if "prompt-1" in prompt['prompt']:
+                    weighted_values["prompt-1"] += weight_map[prompt['prompt']] * prompt['value']
+                elif "prompt-2" in prompt['prompt']:
+                    weighted_values["prompt-2"] += weight_map[prompt['prompt']] * prompt['value']
+
+        task_info['prompts'] = [
+            {"prompt": "prompt-1", "metric": "acc", "value": weighted_values["prompt-1"] / total_weights,
+             'stderr': None},
+            {"prompt": "prompt-2", "metric": "acc", "value": weighted_values["prompt-2"] / total_weights,
+             'stderr': None}]
+
+    # Inject dummy entries for any TASKS not present in the file
+    for must_have in TASKS:
+      if must_have not in tasks_data:
+        tasks_data[must_have] = {
+            'prompts': [
+                {'prompt': 'p1', 'metric': 'f1', 'value': 0.0, 'stderr': 0.0}
+            ],
+            'average_accuracy': None,
+            'std_accuracy': None,
+            'best_prompt': None,
+            'prompt_id': None,
+            'CPS': None,
+            'is_dummy': True  # <--- mark as dummy
+        }
+
+    # Calculates task metrics for each task
+    for task_info in tasks_data.values():
+        if task_info.get('is_dummy'):
+          # leave metrics as None for dummy tasks
+          continue
+        calculate_task_metrics(task_info)
+
+    # Calculates the average CPS across all tasks
+    tasks_with_cps = [
+      t['CPS'] for t in tasks_data.values() if not t.get('is_dummy') and t.get('CPS') is not None ]
+    average_CPS = (sum(tasks_with_cps) / len(tasks_with_cps)) if tasks_with_cps else 0
+
+    #tasks_with_cps = [task['CPS'] for task in tasks_data.values() if task['CPS'] is not None]
+    #average_CPS = sum(tasks_with_cps) / len(tasks_with_cps) if tasks_with_cps else 0
+
+    config = {
+        "model_name": pretrained_model,
+        "num_fewshot": num_fewshot,
+        "batch_size": batch_size,
+        "LANG": LANG
+    }
+
+    return {'average_CPS': average_CPS, 'config': config, 'tasks': tasks_data}
+
+
+"""
+MAIN PROCESSING PIPELINE
+
+This script executes the complete evaluation data processing workflow:
+
+1. Input Sources:
+   - Raw evaluation results (.out files) from: ../evalita_llm_models_output/
+   - Model metadata JSON files from: ../evalita_llm_requests/
+
+2. Processing Steps:
+   - Parses evaluation metrics from .out files
+   - Combines with model metadata
+   - Calculates aggregated performance statistics
+
+3. Output:
+   - Structured JSON results saved to: ../evalita_llm_results/
+   - Organized by model organization/name
+   - Contains complete evaluation results with metadata
+"""
+directory_in_path = '/home/sfarzi/leaderboard/trail_leaderboard/csv_new/output/'
+directory_in_requests_path = '/home/sfarzi/leaderboard/trail_leaderboard/e3c_llm_requests/'
+directory_out_results_path = '/home/sfarzi/leaderboard/trail_leaderboard/e3c_llm_results/'
+
+for filename in os.listdir(directory_in_path):
+    if filename.endswith('.txt'):
+        file_path = os.path.join(directory_in_path, filename)
+        json_output = extract_data_from_file(file_path)
+
+        model_org_name, model_name = json_output['config']['model_name'].split('/')
+
+         
+        config_file_path = os.path.join(directory_in_requests_path, model_org_name, f"{model_name}.json")
+
+        if os.path.exists(config_file_path):
+            with open(config_file_path, 'r', encoding='utf-8') as config_file:
+                additional_config = json.load(config_file)
+            json_output['config'].update(additional_config)
+
+
+        org_folder_path = os.path.join(directory_out_results_path, model_org_name)
+        os.makedirs(org_folder_path, exist_ok=True)
+
+        file_suffix = f"{json_output['config']['num_fewshot']}" +"_"+ f"{json_output['config']['LANG']}"
+        output_file_path = os.path.join(org_folder_path, f"{model_name}_{file_suffix}.json")
+
+        with open(output_file_path, 'w', newline="\n") as outfile:
+            json.dump(json_output, outfile, indent=4)
+
+        print(f"File {filename} processed and saved to {output_file_path}")
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..3b4737924b5a7d81c962a4e28b66ac6cdcc3b004
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,13 @@
+[tool.ruff]
+# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
+select = ["E", "F"]
+ignore = ["E501"] # line too long (black is taking care of this)
+line-length = 119
+fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
+
+[tool.isort]
+profile = "black"
+line_length = 119
+
+[tool.black]
+line-length = 119
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..46d41ea882da58a810ff984860b8fda48abf8f04
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,17 @@
+APScheduler
+black
+datasets
+gradio
+gradio[oauth]
+gradio_leaderboard==0.0.13
+gradio_client
+huggingface-hub>=0.18.0
+matplotlib
+numpy
+pandas
+python-dateutil
+tqdm
+transformers
+tokenizers>=0.15.0
+sentencepiece
+plotly
diff --git a/run_instructions.txt b/run_instructions.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a750b69dfb0a3a63c8ef77fb6bfef3c5bc9b2f2c
--- /dev/null
+++ b/run_instructions.txt
@@ -0,0 +1,42 @@
+Model Evaluation and Leaderboard
+
+1) Model Evaluation
+Before integrating a model into the leaderboard, it must first be evaluated using the lm-eval-harness library in both zero-shot and 5-shot configurations.
+
+This can be done with the following command:
+
+lm_eval --model hf --model_args pretrained=google/gemma-3-12b-it \
+  --tasks evalita-mp --device cuda:0 --batch_size 1 --trust_remote_code \
+  --output_path model_output --num_fewshot 5 --
+
+The output generated by the library will include the model's accuracy scores on the benchmark tasks.
+This output is written to the standard output and should be saved in a txt file (e.g., slurm-8368.out), which needs to be placed in the
+ evalita_llm_models_output directory for further processing.
+
+2) Extracting Model Metadata
+To display model details on the leaderboard (e.g., organization/group, model name, and parameter count), metadata must be retrieved from Hugging Face.
+
+This can be done by running:
+
+python get_model_info.py
+
+This script processes the evaluation files from Step 1 and saves each model's metadata in a JSON file within the evalita_llm_requests directory.
+
+3) Generating Leaderboard Submission File
+The leaderboard requires a structured file containing each model’s metadata along with its benchmark accuracy scores.
+
+To generate this file, run:
+
+python preprocess_model_output.
+
+This script combines the accuracy results from Step 1 with the metadata from Step 2 and outputs a JSON file in the evalita_llm_results directory.
+
+4) Updating the Hugging Face Repository
+The evalita_llm_results repository on HuggingFace must be updated with the newly generated files from Step 3.
+
+5) Running the Leaderboard Application
+Finally, execute the leaderboard application by running:
+
+python app.py
+
+
diff --git a/src/.ipynb_checkpoints/about-checkpoint.py b/src/.ipynb_checkpoints/about-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..36db643246b90c32a8c8262d87af92c878ba7cfe
--- /dev/null
+++ b/src/.ipynb_checkpoints/about-checkpoint.py
@@ -0,0 +1,198 @@
+from dataclasses import dataclass
+from enum import Enum
+
+@dataclass
+class Task:
+    benchmark: str
+    metric: str
+    metric_type: str
+    col_name: str
+
+# Select your tasks here
+# ---------------------------------------------------
+class Tasks(Enum):
+    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
+
+    task1 = Task("text-entailment_1", "acc", "CPS", "TE")
+    task2 = Task("text-entailment_2", "acc", "average_accuracy", "TE Prompt Average")
+    task3 = Task("text-entailment_3", "acc", "std_accuracy", "TE Prompt Std")
+    task4 = Task("text-entailment_4", "acc", "best_prompt", "TE Best Prompt")
+    task5 = Task("text-entailment_5", "acc", "prompt_id", "TE Best Prompt Id")
+
+    task6 = Task("sentiment-analysis_1", "acc", "CPS", "SA")
+    task7 = Task("sentiment-analysis_2", "acc", "average_accuracy", "SA Prompt Average")
+    task8 = Task("sentiment-analysis_3", "acc", "std_accuracy", "SA STD Accuracy")
+    task9 = Task("sentiment-analysis_4", "acc", "best_prompt", "SA Best Prompt")
+    task10 = Task("sentiment-analysis_5", "acc", "prompt_id", "SA Best Prompt Id")
+
+    task11 = Task("hate-speech-detection_1", "acc", "CPS", "HS")
+    task12 = Task("hate-speech-detection_2", "acc", "average_accuracy", "HS Prompt Average")
+    task13 = Task("hate-speech-detection_3", "acc", "std_accuracy", "HS Prompt Std")
+    task14 = Task("hate-speech-detection_4", "acc", "best_prompt", "HS Best Prompt")
+    task15 = Task("hate-speech-detection_5", "acc", "prompt_id", "HS Best Prompt Id")
+
+    task16 = Task("admission-test_1", "acc", "CPS", "AT")
+    task17 = Task("admission-test_2", "acc", "average_accuracy", "AT Prompt Average")
+    task18 = Task("admission-test_3", "acc", "std_accuracy", "AT Prompt Std")
+    task19 = Task("admission-test_4", "acc", "best_prompt", "AT Best Prompt")
+    task20 = Task("admission-test_5", "acc", "prompt_id", "AT Best Prompt Id")
+
+    task21 = Task("word-in-context_1", "acc", "CPS", "WIC")
+    task22 = Task("word-in-context_2", "acc", "average_accuracy", "WIC Prompt Average")
+    task23 = Task("word-in-context_3", "acc", "std_accuracy", "WIC Prompt Std")
+    task24 = Task("word-in-context_4", "acc", "best_prompt", "WIC Best Prompt")
+    task25 = Task("word-in-context_5", "acc", "prompt_id", "WIC Best Prompt Id")
+
+    task26 = Task("faq_1", "acc", "CPS", "FAQ")
+    task27 = Task("faq_2", "acc", "average_accuracy", "FAQ Prompt Average")
+    task28 = Task("faq_3", "acc", "std_accuracy", "FAQ Prompt Std")
+    task29 = Task("faq_4", "acc", "best_prompt", "FAQ Best Prompt")
+    task30 = Task("faq_5", "acc", "prompt_id", "FAQ Best Prompt Id")
+
+    task31 = Task("lexical-substitution_1", "acc", "CPS", "LS")
+    task32 = Task("lexical-substitution_2", "acc", "average_accuracy", "LS Prompt Average")
+    task33 = Task("lexical-substitution_3", "acc", "std_accuracy", "LS Prompt Std")
+    task34 = Task("lexical-substitution_4", "acc", "best_prompt", "LS Best Prompt")
+    task35 = Task("lexical-substitution_5", "acc", "prompt_id", "LS Best Prompt Id")
+
+    task36 = Task("summarization-fanpage_1", "acc", "CPS", "SU")
+    task37 = Task("summarization-fanpage_2", "acc", "average_accuracy", "SU Prompt Average")
+    task38 = Task("summarization-fanpage_3", "acc", "std_accuracy", "SU Prompt Std")
+    task39 = Task("summarization-fanpage_4", "acc", "best_prompt", "SU Best Prompt")
+    task40 = Task("summarization-fanpage_5", "acc", "prompt_id", "SU Best Prompt Id")
+
+    task41 = Task("evalita NER_1", "acc", "CPS", "NER")
+    task42 = Task("evalita NER_2", "acc", "average_accuracy", "NER Prompt Average")
+    task43 = Task("evalita NER_3", "acc", "std_accuracy", "NER Prompt Std")
+    task44 = Task("evalita NER_4", "acc", "best_prompt", "NER Best Prompt")
+    task45 = Task("evalita NER_5", "acc", "prompt_id", "NER Best Prompt Id")
+
+    task46 = Task("relation-extraction_1", "acc", "CPS", "REL")
+    task47 = Task("relation-extraction_2", "acc", "average_accuracy", "REL Prompt Average")
+    task48 = Task("relation-extraction_5", "acc", "std_accuracy", "REL Prompt Std")
+    task49 = Task("relation-extraction_3", "acc", "best_prompt", "REL Best Prompt")
+    task50 = Task("relation-extraction_4", "acc", "prompt_id", "REL Best Prompt Id")
+
+    '''
+    task0 = Task("TextualEntailment", "acc", "Textual Entailment")
+    task1 = Task("TextualEntailment_best", "acc", "TextualEntailment Best")
+    task2 = Task("Sentiment Analysis", "acc", "Sentiment Analysis")
+    task3 = Task("Sentiment Analysis_best", "acc", "Sentiment Analysis_best")
+    task4 = Task("Hate Speech", "acc", "Hate Speech")
+    task5 = Task("Hate Speech_best", "acc", "Hate Speech_best")
+    task6 = Task("Admission Test", "acc", "Admission Test")
+    task7 = Task("Admission Test_best", "acc", "Admission Test_best")
+    task8 = Task("Word in Context", "acc", "Word in Context")
+    task9 = Task("Word in Context_best", "acc", "Word in Context_best")
+    task10 = Task("FAQ", "acc", "FAQ")
+    task11 = Task("FAQ_best", "acc", "FAQ_best")
+    task12 = Task("Lexical Substitution", "acc", "Lexical Substitution")
+    task13 = Task("Lexical Substitution_best", "acc", "Lexical Substitution_best")
+    task14 = Task("Summarization", "acc", "Summarization")
+    task15 = Task("Summarization_best", "acc", "Summarization_best")
+    task16 = Task("NER", "acc", "NER")
+    task17 = Task("NER_best", "acc", "NER_best")
+    task18 = Task("REL", "acc", "REL")
+    task19 = Task("REL_best", "acc", "REL_best")
+    '''
+
+# Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">🚀 EVALITA-LLM Leaderboard 🚀</h1>"""
+
+# What does your leaderboard evaluate?
+INTRODUCTION_TEXT = """
+Evalita-LLM is a benchmark designed to evaluate Large Language Models (LLMs) on Italian tasks. The distinguishing features of Evalita-LLM are the following: (i) **all tasks are native Italian**, avoiding translation issues and potential cultural biases; (ii) the benchmark includes **generative** tasks, enabling more natural interaction with LLMs; (iii) **all tasks are evaluated against multiple prompts**, this way mitigating the model sensitivity to specific prompts and allowing a fairer evaluation.
+ 
+**<small>Multiple-choice tasks:</small>** <small> 📊TE (Textual Entailment), 😃SA (Sentiment Analysis), ⚠️HS (Hate Speech Detection), 🏥AT (Admission Test), 🔤WIC (Word in Context), ❓FAQ (Frequently Asked Questions) </small><br>
+**<small>Generative tasks:</small>** <small>🔄LS (Lexical Substitution), 📝SU (Summarization), 🏷️NER (Named Entity Recognition), 🔗REL (Relation Extraction) </small>
+"""
+
+# Which evaluations are you running? how can people reproduce what you have?
+LLM_BENCHMARKS_TEXT = f"""
+### Groups
+
+- `evalita-mp`: All tasks (perplexity and non-perplexity based).
+- `evalita-mp_gen`: Only generative tasks.
+- `evalita-mp_mc`: Only multiple-choice tasks.
+
+#### Tasks
+
+The following Evalita-LLM tasks can also be evaluated in isolation:
+ - `evalita-mp_te`: Textual Entailment (TE)
+ - `evalita-mp_sa`: Sentiment Analysis (SA)
+ - `evalita-mp_wic`: Word in Context (WIC)
+ - `evalita-mp_hs`: Hate Speech Detection (HS)
+ - `evalita-mp_at`: Admission Tests (AT)
+ - `evalita-mp_faq`: Frequently Asked Questions & Question Answering (FAQ) 
+ - `evalita-mp_sum_fp`:  Summarization (SU)
+ - `evalita-mp_ls`: Lexical Substitution LS)
+ - `evalita-mp_ner_group`: Named Entity Recognition (NER)
+ - `evalita-mp_re`: Relation Extraction (REL)
+
+
+### Usage
+
+```bash
+
+lm_eval --model hf --model_args pretrained=meta-llama/Llama-2-7b-hf --tasks evalita-mp --device cuda:0 --batch_size 1 
+```
+
+<!-- 
+### Checklist
+
+* [x] Is the task an existing benchmark in the literature?
+ * [x] Have you referenced the original paper that introduced the task?
+ * [x] If yes, does the original paper provide a reference implementation?
+   * [x] Yes, original implementation contributed by author of the benchmark
+
+If other tasks on this dataset are already supported:
+* [x] Is the "Main" variant of this task clearly denoted?
+* [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
+* [x] Have you noted which, if any, published evaluation setups are matched by this variant?
+-->
+
+
+"""
+
+EVALUATION_QUEUE_TEXT = """
+## Some good practices before submitting a model
+
+### 1) Make sure you can load your model and tokenizer using AutoClasses:
+```python
+from transformers import AutoConfig, AutoModel, AutoTokenizer
+config = AutoConfig.from_pretrained("your model name", revision=revision)
+model = AutoModel.from_pretrained("your model name", revision=revision)
+tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
+```
+If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
+
+Note: make sure your model is public!
+Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
+
+### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
+It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
+
+### 3) Make sure your model has an open license!
+This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
+
+### 4) Fill up your model card
+When we add extra information about models to the leaderboard, it will be automatically taken from the model card
+
+## In case of model failure
+If your model is displayed in the `FAILED` category, its execution stopped.
+Make sure you have followed the above steps first.
+If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
+"""
+
+CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
+CITATION_BUTTON_TEXT = r"""
+@misc{magnini2025evalitallmbenchmarkinglargelanguage,
+     title={Evalita-LLM: Benchmarking Large Language Models on Italian}, 
+     author={Bernardo Magnini and Roberto Zanoli and Michele Resta and Martin Cimmino and Paolo Albano and Marco Madeddu and Viviana Patti},
+     year={2025},
+     eprint={2502.02289},
+     archivePrefix={arXiv},
+     primaryClass={cs.CL},
+     url={https://arxiv.org/abs/2502.02289}, 
+}
+"""
diff --git a/src/.ipynb_checkpoints/envs-checkpoint.py b/src/.ipynb_checkpoints/envs-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..9db342d84e248b24bae574cb6cb33a42efa92c04
--- /dev/null
+++ b/src/.ipynb_checkpoints/envs-checkpoint.py
@@ -0,0 +1,46 @@
+import os
+
+from huggingface_hub import HfApi
+
+# Info to change for your repository
+# ----------------------------------
+TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
+
+#OWNER = "giux78" # Change to your org - don't forget to create a results and request dataset, with the correct format!
+OWNER = "saeedfarzi"
+# ----------------------------------
+
+#REPO_ID = f"{OWNER}/leaderboard-evalita"
+#QUEUE_REPO = f"{OWNER}/evalita-requests"
+#RESULTS_REPO = f"{OWNER}/evalita-results"
+
+REPO_ID = f"{OWNER}/MediLingua_Leaderboard"
+QUEUE_REPO = f"{OWNER}/e3c_llm_requests"
+RESULTS_REPO = f"{OWNER}/e3c_llm_results"
+
+# If you setup a cache later, just change HF_HOME
+#CACHE_PATH=os.getenv("HF_HOME", "/home/sfarzi/leaderboard/")
+
+# Local caches
+#EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
+#EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
+#EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
+#EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
+
+#EVAL_REQUESTS_PATH ='/home/sfarzi/leaderboard/llm_leaderboard/e3c_llm_requests' #os.path.join(CACHE_PATH, "eval-queue")
+#EVAL_RESULTS_PATH = '/home/sfarzi/leaderboard/llm_leaderboard/e3c_llm_results'#os.path.join(CACHE_PATH, "eval-results")
+#EVAL_REQUESTS_PATH_BACKEND = '/home/sfarzi/leaderboard/llm_leaderboard/e3c_llm_requests' #os.path.join(CACHE_PATH, "eval-queue-bk")
+#EVAL_RESULTS_PATH_BACKEND = '/home/sfarzi/leaderboard/llm_leaderboard/e3c_llm_results' #os.path.join(CACHE_PATH, "eval-results-bk")
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+
+# Go one directory up from BASE_DIR
+PARENT_DIR = os.path.dirname(BASE_DIR)
+
+# Now set the paths to the directories one level up
+EVAL_REQUESTS_PATH = os.path.join(PARENT_DIR, "e3c_llm_requests")
+EVAL_RESULTS_PATH = os.path.join(PARENT_DIR, "e3c_llm_results")
+EVAL_REQUESTS_PATH_BACKEND = EVAL_REQUESTS_PATH
+EVAL_RESULTS_PATH_BACKEND = EVAL_RESULTS_PATH
+
+API = HfApi(token=TOKEN)
diff --git a/src/.ipynb_checkpoints/populate-checkpoint.py b/src/.ipynb_checkpoints/populate-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bbaf385b52d6edf173633353b9458b76c868158
--- /dev/null
+++ b/src/.ipynb_checkpoints/populate-checkpoint.py
@@ -0,0 +1,59 @@
+import json
+import os
+
+import pandas as pd
+
+from src.display.formatting import has_no_nan_values, make_clickable_model
+from src.display.utils import AutoEvalColumn, EvalQueueColumn
+from src.leaderboard.read_evals import get_raw_eval_results
+
+
+def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
+    """Creates a dataframe from all the individual experiment results"""
+    raw_data = get_raw_eval_results(results_path, requests_path)
+    all_data_json = [v.to_dict() for v in raw_data]
+
+    df = pd.DataFrame.from_records(all_data_json)
+    df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
+    df = df[cols].round(decimals=2)
+    #df.to_csv("output.csv", index=False)
+
+    # filter out if any of the benchmarks have not been produced
+    df = df[has_no_nan_values(df, benchmark_cols)]
+    return df
+
+
+def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
+    """Creates the different dataframes for the evaluation queues requestes"""
+    entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
+    all_evals = []
+
+    for entry in entries:
+        if ".json" in entry:
+            file_path = os.path.join(save_path, entry)
+            with open(file_path) as fp:
+                data = json.load(fp)
+
+            data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
+            data[EvalQueueColumn.revision.name] = data.get("revision", "main")
+
+            all_evals.append(data)
+        elif ".md" not in entry:
+            # this is a folder
+            sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
+            for sub_entry in sub_entries:
+                file_path = os.path.join(save_path, entry, sub_entry)
+                with open(file_path) as fp:
+                    data = json.load(fp)
+
+                data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
+                data[EvalQueueColumn.revision.name] = data.get("revision", "main")
+                all_evals.append(data)
+
+    pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
+    running_list = [e for e in all_evals if e["status"] == "RUNNING"]
+    finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
+    df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
+    df_running = pd.DataFrame.from_records(running_list, columns=cols)
+    df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
+    return df_finished[cols], df_running[cols], df_pending[cols]
diff --git a/src/.ipynb_checkpoints/tasks-checkpoint.py b/src/.ipynb_checkpoints/tasks-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b3c7f6764cf25ec0f0a0b3705ed89617235a505
--- /dev/null
+++ b/src/.ipynb_checkpoints/tasks-checkpoint.py
@@ -0,0 +1,183 @@
+from dataclasses import dataclass
+from enum import Enum
+
+@dataclass
+class Task:
+    benchmark: str
+    # metric: str
+    accuracy: str
+    col_name: str
+
+NUM_FEWSHOT = 0  # Change with your few shot
+# ---------------------------------------------------
+
+# Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">🚀 EVALITA-LLM Leaderboard 🚀</h1>"""
+
+# What does your leaderboard evaluate?
+INTRODUCTION_TEXT = """
+Evalita-LLM is a benchmark designed to evaluate Large Language Models (LLMs) on Italian tasks. The distinguishing features of Evalita-LLM are the following: (i) all tasks are native Italian, avoiding translation issues and potential cultural biases; (ii) the benchmark includes generative tasks, enabling more natural interaction with LLMs; (iii) all tasks are evaluated against multiple prompts, this way mitigating the model sensitivity to specific prompts and allowing a fairer evaluation.
+"""
+
+#MEASURE_DESCRIPTION = "Combined Performance = (1 - (Best_Prompt - Prompt_Average) / 100) * Best_Prompt. Prompt Average = accuracy averaged over the six prompts. Best Prompt = accuracy of the best prompt. Prompt ID = ID of the best prompt (see legend above)"
+MEASURE_DESCRIPTION = "<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the assessed prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above).</small>"
+#MEASURE_DESCRIPTION = "<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = avg. accuracy over prompts. **Best Prompt** = accuracy of best prompt. **Prompt ID** = ID of the best prompt (see legend above).</small>"
+
+# Tasks Descriptions
+TE_DESCRIPTION = """### Textual Entailment (TE) --- *Multiple-choice task*
+    The input are two sentences: the text (T) and the hypothesis (H). The model  has to determine whether the meaning of the hypothesis is logically entailed by the text.
+
+| #   | Prompt | Answer Choices |
+|-----|------------|--------------|
+| 1   | La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera? | ["Sì", "No"] |
+| 2   | Devi risolvere un compito di inferenza semantica. La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera? | ["Sì", "No"] |
+| 3   | La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera?\\nA: Sì\\nB: No\\nRisposta: | ["A", "B"] |
+| 4   | Devi risolvere un compito di inferenza semantica. La frase: '{{text1}}' implica logicamente che la frase: '{{text2}}' sia vera?\\nA: Sì\\nB: No\\nRisposta: | ["A", "B"] |
+| 5   | Frase 1: '{{text1}}' Frase 2: '{{text2}}' | ["La frase 1 implica logicamente che la frase 2 sia vera", "La frase 1 non implica logicamente che la frase 2 sia vera"] |
+| 6   | Devi risolvere un compito di inferenza semantica. Frase 1: '{{text1}}' Frase 2: '{{text2}}' | ["La frase 1 implica logicamente che la frase 2 sia vera", "La frase 1 non implica logicamente che la frase 2 sia vera"] |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the 6 prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+SA_DESCRIPTION = """### Sentiment Analysis (SA) --- *Multiple-choice task*
+    The input is a tweet. The model has to determine the sentiment polarity of the text, categorizing it into one of four classes: positive, negative, neutral, or mixed.
+
+| #   | Prompt                                                                                       | Answer Choices               |
+|-----|--------------------------------------------------------------------------------|-----------------------------|
+| 1   | Qual è il sentiment espresso nel seguente tweet: '{{text}}'?                     | ["Positivo", "Negativo", "Neutro", "Misto"] |
+| 2   | Devi svolgere un compito di analisi del sentiment. Qual è il sentiment espresso nel seguente tweet: '{{text}}'? | ["Positivo", "Negativo", "Neutro", "Misto"] |
+| 3   | Qual è il sentiment espresso nel seguente tweet: '{{text}}'?\\nA: Positivo\\nB: Negativo\\nC: Neutro\\nD: Misto\\nRisposta: | ["A", "B", "C", "D"] |
+| 4   | Devi svolgere un compito di analisi del sentiment. Qual è il sentiment espresso nel seguente tweet: '{{text}}'?\\nA: Positivo\\nB: Negativo\\nC: Neutro\\nD: \\nRisposta: | ["A", "B", "C", "D"] |
+| 5   | Il seguente tweet: '{{text}}' esprime un sentiment                                | ["Positivo", "Negativo", "Neutro", "Misto"]  |
+| 6   | Devi svolgere un compito di analisi del sentiment. Il seguente tweet: '{{text}}' esprime un sentiment | ["Positivo", "Negativo", "Neutro", "Misto"] |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1-macro averaged over the 6 prompts. **Best Prompt** = F1-macro of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+HS_DESCRIPTION = """### Hate Speech (HS) --- *Multiple-choice task*
+    The input is a tweet. The model has to determine whether the text contains hateful content directed towards marginalized or minority groups. The output is a binary classification: hateful or not hateful.
+
+| #   | Prompt                                                                                       | Answer Choices                                   |
+|-----|--------------------------------------------------------------------------------|-------------------------------------------------|
+| 1   | C'è incitamento all'odio nel seguente tweet: '{{full_text}}'?                     | ["Falso", "Vero"] |
+| 2   | Devi svolgere un compito di identificazione di incitamento all'odio. C'è incitamento all'odio nel seguente tweet: '{{full_text}}'? | ["Falso", "Vero"] |
+| 3   | C'è incitamento all'odio nel seguente tweet: '{{full_text}}'?\\nA: Vero\\nB: Falso\\nRisposta: | ["B", "A"]                                      |
+| 4   | Devi svolgere un compito di identificazione di incitamento all'odio. C'è incitamento all'odio nel seguente tweet: '{{full_text}}'?\\nA: Vero\\nB: Falso\\nRisposta: | ["B", "A"]                                      |
+| 5   | Il tweet: '{{full_text}}'                                                      | ["non contiene incitamento all'odio", "contiene incitamento all'odio"] |
+| 6   | Devi svolgere un compito di identificazione di incitamento all'odio. Il tweet: '{{full_text}}' | ["non contiene incitamento all'odio", "contiene incitamento all'odio"] |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1-micro averaged over the 6 prompts. **Best Prompt** = F1-micro of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+AT_DESCRIPTION = """### Admission Tests (AT) --- *Multiple-choice task*
+    The input is a multiple-choice question with five options (A-E) from Italian medical specialty entrance exams, and the model must identify the correct answer.
+
+| #   | Prompt                                                                                       | Answer Choices               |
+|-----|--------------------------------------------------------------------------------|-----------------------------|
+| 1   | Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta? | ["A", "B", "C", "D", "E"]   |
+| 2   | Devi risolvere un compito di risposte a domande. Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta? | ["A", "B", "C", "D", "E"]   |
+| 3   | Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nE: {{E}}\\nRisposta: | ["A", "B", "C", "D", "E"]   |
+| 4   | Devi risolvere un compito a scelta multipla. Dato il seguente caso clinico: '{{background}}', qual è la risposta corretta alla domanda: '{{domanda}}'?\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nE: {{E}}\\nRisposta:Devi risolvere un compito a scelta multipla. Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nE: {{E}}\\nRisposta: | ["A", "B", "C", "D", "E"]   |
+| 5   | Dato il seguente quesito di medicina '{{Question}}' la risposta corretta è: | ["A", "B", "C", "D", "E"]   |
+| 6   | Devi risolvere un compito di risposte a domande. Dato il seguente quesito di medicina '{{Question}}' la risposta corretta è: | ["A", "B", "C", "D", "E"]   |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the 6 prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+WIC_DESCRIPTION = """### Word in Context (WIC) --- *Multiple-choice task*
+    The input consists of a word (w) and two sentences. The model has to determine whether the word w has the same meaning in both sentences. The output is a binary classification: 1 (same meaning) or 0 (different meaning).
+
+| #   | Prompt                                                                                       | Answer Choices                                   |
+|-----|--------------------------------------------------------------------------------|-------------------------------------------------|
+| 1   | La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'? | ["No", "Sì"]                         |
+| 2   | Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'? | ["No", "Sì"] |
+| 3   | La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'?\\nA: Sì\\nB: No\\nRisposta: | ["B", "A"]                                      |
+| 4   | Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'?\\nA: \\nB: No\\nRisposta: | ["B", "A"]  |
+| 5   | La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' e la parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}' | ["non hanno lo stesso significato", "hanno lo stesso significato"] |
+| 6   | Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' e la parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}' | ["non hanno lo stesso significato", "hanno lo stesso significato"] |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1-macro averaged over the 6 prompts. **Best Prompt** = F1-macro of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+FAQ_DESCRIPTION = """### Frequently Asked Questions & Question Answering (FAQ) --- *Multiple-choice task*
+    The input is a user query regarding the water supply service. The model must identify the correct answer from the 4 available options.
+
+| #   | Prompt                                                                                       | Answer Choices               |
+|-----|--------------------------------------------------------------------------------|-----------------------------|
+| 1   | Rispondi alla seguente domanda: '{{question}}'                                              | {{[A, B, C, D]}}            |
+| 2   | Devi risolvere un compito di risposte a domande. Rispondi alla seguente domanda: '{{question}}' | {{[A, B, C, D]}}            |
+| 3   | Rispondi alla seguente domanda: '{{question}}'\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nRisposta: | ["A", "B", "C", "D"] |
+| 4   | Devi risolvere un compito a scelta multipla. Rispondi alla seguente domanda: '{{question}}'\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nRisposta: | ["A", "B", "C", "D"] |
+| 5   | La risposta alla domanda: '{{question}}' è:                                                | {{[A, B, C, D]}}            |
+| 6   | Devi risolvere un compito di risposte a domande. La risposta alla domanda: '{{question}}' è: | {{[A, B, C, D]}}  |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the 6 prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+LS_DESCRIPTION = """### Lexical Substitution (LS) --- *Generative task*
+    The input is a sentence containing a target word (w). The model has to replace the target word w with its most suitable synonyms that are contextually relevant. 
+
+| #   | Prompt                                                                                       |
+|-----|--------------------------------------------------------------------------------|
+| 7   | Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori `<head>` nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta: |
+| 8   | Devi risolvere un compito di sostituzione lessicale. Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori `<head>` nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta: |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+SU_DESCRIPTION = """### Summarization (SUM) --- *Generative task*
+    The input is a news article. The model has to generate a concise summary of the input text, capturing the key information and main points.
+
+| #   | Prompt                                                                                       |
+|-----|--------------------------------------------------------------------------------|
+| 7   | Riassumi il seguente articolo di giornale: '{{source}}'\\nRiassunto:             |
+| 8   | Devi risolvere un compito di sintesi automatica del testo. Riassumi il seguente articolo di giornale: '{{source}}'\\nRiassunto: |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+NER_DESCRIPTION = """### Named Entity Recognition (NER) --- *Generative task*
+    The input is a sentence. The model has to identify and classify Named Entities into predefined categories such as person, organization, and location.
+
+| #   | Prompt                                                                                       |
+|-----|--------------------------------------------------------------------------------|
+| 7   | Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'.\\nTesto: '{{text}}'\\nEntità: |
+| 8   | Devi svolgere un compito di riconoscimento delle entità nei testi. Estrai tutte le entità di tipo PER (persona), LOC (luogo) e ORG (organizzazione) dal testo seguente. Riporta ogni entità con il formato: Entità$Tipo, separando ciascuna coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'.\\nTesto: '{{text}}'\\nEntità: |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+REL_DESCRIPTION = """### Relation Extraction (REL) --- *Generative task*
+    The input is a sentence of a clinical text. The model must identify and extract relationships between laboratory test results (e.g., blood pressure) and the corresponding tests or procedures that generated them (e.g., blood pressure test).
+
+| #   | Prompt                                                                                       |
+|-----|--------------------------------------------------------------------------------|
+| 7   | Dato un documento medico devi estrarre tutte le misurazioni degli esami medici presenti. Riporta ogni relazione nel formato: misurazione$esame, separando ciascuna coppia con '%'. Se non ci sono relazioni da estrarre, rispondi con '&&NOREL&&'.\\nTesto: '{{text}}'\\nRelazioni: |
+| 8   | Devi svolgere un compito di estrazione di relazioni da documenti medici. Dato un documento medico devi estrarre tutte le misurazioni degli esami medici presenti. Riporta ogni relazione nel formato: misurazione$esame, separando ciascuna coppia con '%'. Se non ci sono relazioni da estrarre, rispondi con '&&NOREL&&'.\\nTesto: '{{text}}'\\nRelazioni: |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+# Create a dictionary to map task names to their descriptions
+TASK_DESCRIPTIONS = {
+    "TE": TE_DESCRIPTION,
+    "SA": SA_DESCRIPTION,
+    "HS": HS_DESCRIPTION,
+    "AT": AT_DESCRIPTION,
+    "WIC": WIC_DESCRIPTION,
+    "FAQ": FAQ_DESCRIPTION,
+    "LS": LS_DESCRIPTION,
+    "SU": SU_DESCRIPTION,
+    "NER": NER_DESCRIPTION,
+    "REL": REL_DESCRIPTION
+}
\ No newline at end of file
diff --git a/src/__pycache__/about.cpython-310.pyc b/src/__pycache__/about.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cbda2e403d65ed8d4dd1241f0570fd9ba76c09c8
Binary files /dev/null and b/src/__pycache__/about.cpython-310.pyc differ
diff --git a/src/__pycache__/envs.cpython-310.pyc b/src/__pycache__/envs.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6bd39ddac2141e91072782e80e67cb0024ba03bb
Binary files /dev/null and b/src/__pycache__/envs.cpython-310.pyc differ
diff --git a/src/__pycache__/populate.cpython-310.pyc b/src/__pycache__/populate.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58d0b6452f839a4f869e923bc26c831f6f5e7922
Binary files /dev/null and b/src/__pycache__/populate.cpython-310.pyc differ
diff --git a/src/__pycache__/tasks.cpython-310.pyc b/src/__pycache__/tasks.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7398e8e67ffdf2626fd183050638841f51b03422
Binary files /dev/null and b/src/__pycache__/tasks.cpython-310.pyc differ
diff --git a/src/about.py b/src/about.py
new file mode 100644
index 0000000000000000000000000000000000000000..bca5fccde80ed6f165afbd30825b0f257b43ded0
--- /dev/null
+++ b/src/about.py
@@ -0,0 +1,222 @@
+from dataclasses import dataclass
+from enum import Enum
+
+@dataclass
+class Task:
+    benchmark: str
+    metric: str
+    metric_type: str
+    col_name: str
+
+# Select your tasks here
+# ---------------------------------------------------
+class Tasks(Enum):
+    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
+
+    #task1 = Task("text-entailment_1", "acc", "CPS", "TE")
+    #task2 = Task("text-entailment_2", "acc", "average_accuracy", "TE Prompt Average")
+    #task3 = Task("text-entailment_3", "acc", "std_accuracy", "TE Prompt Std")
+    #task4 = Task("text-entailment_4", "acc", "best_prompt", "TE Best Prompt")
+    #task5 = Task("text-entailment_5", "acc", "prompt_id", "TE Best Prompt Id")
+
+    #task6 = Task("sentiment-analysis_1", "acc", "CPS", "SA")
+    #task7 = Task("sentiment-analysis_2", "acc", "average_accuracy", "SA Prompt Average")
+    #task8 = Task("sentiment-analysis_3", "acc", "std_accuracy", "SA STD Accuracy")
+    #task9 = Task("sentiment-analysis_4", "acc", "best_prompt", "SA Best Prompt")
+    #task10 = Task("sentiment-analysis_5", "acc", "prompt_id", "SA Best Prompt Id")
+
+    #task11 = Task("hate-speech-detection_1", "acc", "CPS", "HS")
+    #task12 = Task("hate-speech-detection_2", "acc", "average_accuracy", "HS Prompt Average")
+    #task13 = Task("hate-speech-detection_3", "acc", "std_accuracy", "HS Prompt Std")
+    #task14 = Task("hate-speech-detection_4", "acc", "best_prompt", "HS Best Prompt")
+    #task15 = Task("hate-speech-detection_5", "acc", "prompt_id", "HS Best Prompt Id")
+
+    #task16 = Task("admission-test_1", "acc", "CPS", "AT")
+    #task17 = Task("admission-test_2", "acc", "average_accuracy", "AT Prompt Average")
+    #task18 = Task("admission-test_3", "acc", "std_accuracy", "AT Prompt Std")
+    #task19 = Task("admission-test_4", "acc", "best_prompt", "AT Best Prompt")
+    #task20 = Task("admission-test_5", "acc", "prompt_id", "AT Best Prompt Id")
+
+    #task21 = Task("word-in-context_1", "acc", "CPS", "WIC")
+    #task22 = Task("word-in-context_2", "acc", "average_accuracy", "WIC Prompt Average")
+    #task23 = Task("word-in-context_3", "acc", "std_accuracy", "WIC Prompt Std")
+    #task24 = Task("word-in-context_4", "acc", "best_prompt", "WIC Best Prompt")
+    #task25 = Task("word-in-context_5", "acc", "prompt_id", "WIC Best Prompt Id")
+
+    #task26 = Task("faq_1", "acc", "CPS", "FAQ")
+    #task27 = Task("faq_2", "acc", "average_accuracy", "FAQ Prompt Average")
+    #task28 = Task("faq_3", "acc", "std_accuracy", "FAQ Prompt Std")
+    #task29 = Task("faq_4", "acc", "best_prompt", "FAQ Best Prompt")
+    #task30 = Task("faq_5", "acc", "prompt_id", "FAQ Best Prompt Id")
+
+    #task31 = Task("lexical-substitution_1", "acc", "CPS", "LS")
+    #task32 = Task("lexical-substitution_2", "acc", "average_accuracy", "LS Prompt Average")
+    #task33 = Task("lexical-substitution_3", "acc", "std_accuracy", "LS Prompt Std")
+    #task34 = Task("lexical-substitution_4", "acc", "best_prompt", "LS Best Prompt")
+    #task35 = Task("lexical-substitution_5", "acc", "prompt_id", "LS Best Prompt Id")
+
+    #task36 = Task("summarization-fanpage_1", "acc", "CPS", "SU")
+    #task37 = Task("summarization-fanpage_2", "acc", "average_accuracy", "SU Prompt Average")
+    #task38 = Task("summarization-fanpage_3", "acc", "std_accuracy", "SU Prompt Std")
+    #task39 = Task("summarization-fanpage_4", "acc", "best_prompt", "SU Best Prompt")
+    #task40 = Task("summarization-fanpage_5", "acc", "prompt_id", "SU Best Prompt Id")
+
+    #task41 = Task("evalita NER_1", "acc", "CPS", "NER")
+    #task42 = Task("evalita NER_2", "acc", "average_accuracy", "NER Prompt Average")
+    #task43 = Task("evalita NER_3", "acc", "std_accuracy", "NER Prompt Std")
+    #task44 = Task("evalita NER_4", "acc", "best_prompt", "NER Best Prompt")
+    #task45 = Task("evalita NER_5", "acc", "prompt_id", "NER Best Prompt Id")
+
+    #task46 = Task("relation-extraction_1", "acc", "CPS", "REL")
+    #task47 = Task("relation-extraction_2", "acc", "average_accuracy", "REL Prompt Average")
+    #task48 = Task("relation-extraction_5", "acc", "std_accuracy", "REL Prompt Std")
+    #task49 = Task("relation-extraction_3", "acc", "best_prompt", "REL Best Prompt")
+    #task50 = Task("relation-extraction_4", "acc", "prompt_id", "REL Best Prompt Id")
+    task1 = Task("RE_1", "acc", "CPS", "REL")
+    task2 = Task("RE_2", "acc", "average_accuracy", "REL Prompt Average")
+    task3 = Task("RE_5", "acc", "std_accuracy", "REL Prompt Std")
+    task4 = Task("RE_3", "acc", "best_prompt", "REL Best Prompt")
+    task5 = Task("RE_4", "acc", "prompt_id", "REL Best Prompt Id")
+
+    task6 = Task("NER_1", "acc", "CPS", "NER")
+    task7 = Task("NER_2", "acc", "average_accuracy", "NER Prompt Average")
+    task8 = Task("NER_3", "acc", "std_accuracy", "NER Prompt Std")
+    task9 = Task("NER_4", "acc", "best_prompt", "NER Best Prompt")
+    task10 = Task("NER_5", "acc", "prompt_id", "NER Best Prompt Id") 
+
+    task11 = Task("RML_1", "acc", "CPS", "RML")
+    task12 = Task("RML_2", "acc", "average_accuracy", "RML Prompt Average")
+    task13 = Task("RML_3", "acc", "std_accuracy", "RML Prompt Std")
+    task14 = Task("RML_4", "acc", "best_prompt", "RML Best Prompt")
+    task15 = Task("RML_5", "acc", "prompt_id", "RML Best Prompt Id") 
+
+
+
+    task16 = Task("DIA_1", "acc", "CPS", "DIA")
+    task17 = Task("DIA_2", "acc", "average_accuracy", "DIA Prompt Average")
+    task18 = Task("DIA_3", "acc", "std_accuracy", "DIA Prompt Std")
+    task19 = Task("DIA_4", "acc", "best_prompt", "DIA Best Prompt")
+    task20 = Task("DIA_5", "acc", "prompt_id", "DIA Best Prompt Id") 
+
+    task21 = Task("HIS_1", "acc", "CPS", "HIS")
+    task22 = Task("HIS_2", "acc", "average_accuracy", "HIS Prompt Average")
+    task23 = Task("HIS_3", "acc", "std_accuracy", "HIS Prompt Std")
+    task24 = Task("HIS_4", "acc", "best_prompt", "HIS Best Prompt")
+    task25 = Task("HIS_5", "acc", "prompt_id", "HIS Best Prompt Id")
+    '''
+    task0 = Task("TextualEntailment", "acc", "Textual Entailment")
+    task1 = Task("TextualEntailment_best", "acc", "TextualEntailment Best")
+    task2 = Task("Sentiment Analysis", "acc", "Sentiment Analysis")
+    task3 = Task("Sentiment Analysis_best", "acc", "Sentiment Analysis_best")
+    task4 = Task("Hate Speech", "acc", "Hate Speech")
+    task5 = Task("Hate Speech_best", "acc", "Hate Speech_best")
+    task6 = Task("Admission Test", "acc", "Admission Test")
+    task7 = Task("Admission Test_best", "acc", "Admission Test_best")
+    task8 = Task("Word in Context", "acc", "Word in Context")
+    task9 = Task("Word in Context_best", "acc", "Word in Context_best")
+    task10 = Task("FAQ", "acc", "FAQ")
+    task11 = Task("FAQ_best", "acc", "FAQ_best")
+    task12 = Task("Lexical Substitution", "acc", "Lexical Substitution")
+    task13 = Task("Lexical Substitution_best", "acc", "Lexical Substitution_best")
+    task14 = Task("Summarization", "acc", "Summarization")
+    task15 = Task("Summarization_best", "acc", "Summarization_best")
+    task16 = Task("NER", "acc", "NER")
+    task17 = Task("NER_best", "acc", "NER_best")
+    task18 = Task("REL", "acc", "REL")
+    task19 = Task("REL_best", "acc", "REL_best")
+    '''
+
+# Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">🚀 ECREAM-LLM Leaderboard 🚀</h1>"""
+
+# What does your leaderboard evaluate?
+INTRODUCTION_TEXT = """
+<br><br><b>The eCream-LLM leaderboard </b>, developed within <a href='https://ecreamproject.eu/'> the eCream Project </a> (enabling Clinical Research in Emergency and Acute care Medicine), is designed to evaluate Large Language Models (LLMs) on two tasks pertaining to the medical domain. Its distinguishing features are:<b> <br> (i) all tasks are implemented for six languages including English, Italian, Slovak, Slovenian, Polish and Greek; <br> (ii) all tasks are generative, thus allowing for a more natural interaction with LLMs; <br> (iii) all tasks are evaluated against multiple prompts, this way mitigating the model sensitivity to specific prompts and allowing a fairer evaluation.</b>
+<br><br>**<small>Generative tasks:</small>** <small> 🏷️NER (Named Entity Recognition), 🔗REL (Relation Extraction), 😃RML(CRF RML) </small>
+<br>**<small>Multiple-choice task:</small>** <small>   🏥DIA (CRF Diagnosis), 📝HIS (CRF History)  </small>
+
+""" 
+
+
+
+# Which evaluations are you running? how can people reproduce what you have?
+LLM_BENCHMARKS_TEXT = f"""
+### Groups
+
+- `evalita-mp`: All tasks (perplexity and non-perplexity based).
+- `evalita-mp_gen`: Only generative tasks.
+
+#### Tasks
+
+The following Evalita-LLM tasks can also be evaluated in isolation:
+ - `evalita-mp_ner_group`: Named Entity Recognition (NER)
+ - `evalita-mp_re`: Relation Extraction (REL)
+
+
+### Usage
+
+```bash
+
+lm_eval --model hf --model_args pretrained=meta-llama/Llama-2-7b-hf --tasks evalita-mp_re --device cuda:0 --batch_size 1 
+```
+
+<!-- 
+### Checklist
+
+* [x] Is the task an existing benchmark in the literature?
+ * [x] Have you referenced the original paper that introduced the task?
+ * [x] If yes, does the original paper provide a reference implementation?
+   * [x] Yes, original implementation contributed by author of the benchmark
+
+If other tasks on this dataset are already supported:
+* [x] Is the "Main" variant of this task clearly denoted?
+* [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
+* [x] Have you noted which, if any, published evaluation setups are matched by this variant?
+-->
+
+
+"""
+
+EVALUATION_QUEUE_TEXT = """
+## Some good practices before submitting a model
+
+### 1) Make sure you can load your model and tokenizer using AutoClasses:
+```python
+from transformers import AutoConfig, AutoModel, AutoTokenizer
+config = AutoConfig.from_pretrained("your model name", revision=revision)
+model = AutoModel.from_pretrained("your model name", revision=revision)
+tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
+```
+If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
+
+Note: make sure your model is public!
+Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
+
+### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
+It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
+
+### 3) Make sure your model has an open license!
+This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model 🤗
+
+### 4) Fill up your model card
+When we add extra information about models to the leaderboard, it will be automatically taken from the model card
+
+## In case of model failure
+If your model is displayed in the `FAILED` category, its execution stopped.
+Make sure you have followed the above steps first.
+If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
+"""
+
+CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
+CITATION_BUTTON_TEXT = r""" 
+@article{magnini2025cost,
+  title={A cost-effective approach to counterbalance the scarcity of medical datasets},
+  author={Magnini, Bernardo and Farzi, Saeed and Ferrazzi, Pietro and Ghosh, Soumitra and Lavelli, Alberto and Mezzanotte, Giulia and Speranza, Manuela},
+  journal={Frontiers in Disaster and Emergency Medicine},
+  volume={3},
+  pages={1558200},
+  year={2025},
+  publisher={Frontiers Media SA}, 
+  url={https://www.frontiersin.org/journals/disaster-and-emergency-medicine/articles/10.3389/femer.2025.1558200/full}
+}
+"""
diff --git a/src/display/.ipynb_checkpoints/css_html_js-checkpoint.py b/src/display/.ipynb_checkpoints/css_html_js-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..721bc2e346c9e2578013986ee477c46a5fe11b17
--- /dev/null
+++ b/src/display/.ipynb_checkpoints/css_html_js-checkpoint.py
@@ -0,0 +1,122 @@
+custom_css = """
+
+.markdown-text {
+    font-size: 16px !important;
+}
+
+#models-to-add-text {
+    font-size: 18px !important;
+}
+
+#citation-button span {
+    font-size: 16px !important;
+}
+
+#citation-button textarea {
+    font-size: 16px !important;
+}
+
+#citation-button > label > button {
+    margin: 6px;
+    transform: scale(1.3);
+}
+
+#leaderboard-table {
+    margin-top: 15px
+}
+
+#leaderboard-table-lite {
+    margin-top: 15px
+}
+
+#search-bar-table-box > div:first-child {
+    background: none;
+    border: none;
+}
+ 
+#search-bar {
+    padding: 0px;
+}
+
+/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
+#leaderboard-table td:nth-child(2),
+#leaderboard-table th:nth-child(2) {
+    max-width: 400px;
+    overflow: auto;
+    white-space: nowrap;
+}
+
+.tab-buttons button {
+    font-size: 20px;
+}
+
+#scale-logo {
+    border-style: none !important;
+    box-shadow: none;
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+    max-width: 600px;
+}
+
+#scale-logo .download {
+    display: none;
+}
+#filter_type{
+    border: 0;
+    padding-left: 0;
+    padding-top: 0;
+}
+#filter_type label {
+    display: flex;
+}
+#filter_type label > span{
+    margin-top: var(--spacing-lg);
+    margin-right: 0.5em;
+}
+#filter_type label > .wrap{
+    width: 103px;
+}
+#filter_type label > .wrap .wrap-inner{  
+    padding: 2px;
+}
+#filter_type label > .wrap .wrap-inner input{
+    width: 1px
+}
+#filter-columns-type{
+    border:0;
+    padding:0.5;
+}
+#filter-columns-size{
+    border:0;
+    padding:0.5;
+}
+#box-filter > .form{
+    border: 0
+}
+
+/* === Added scaling for plots === */
+#line-chart, 
+#boxplot-task {
+    max-width: 100%;
+    width: 100%;
+    height: auto;
+    margin: 0 auto;
+    display: block;
+}
+
+/* nasconde la barra degli strumenti Plotly */
+.modebar {
+    display: none !important;
+}
+
+"""
+
+get_window_url_params = """
+    function(url_params) {
+        const params = new URLSearchParams(window.location.search);
+        url_params = Object.fromEntries(params);
+        return url_params;
+    }
+    """
+
diff --git a/src/display/.ipynb_checkpoints/formatting-checkpoint.py b/src/display/.ipynb_checkpoints/formatting-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba340b8c51c98be420f01682eedda01099dc92a3
--- /dev/null
+++ b/src/display/.ipynb_checkpoints/formatting-checkpoint.py
@@ -0,0 +1,30 @@
+def model_hyperlink(link, model_name):
+    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+
+
+def make_clickable_model(model_name):
+    link = f"https://huggingface.co/{model_name}"
+    #Remove author prefix from model names for EVALITA-LLM
+    model_name = model_name.split("/")[-1]
+    #print(model_name)
+    return model_hyperlink(link, model_name)
+
+
+def styled_error(error):
+    return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
+
+
+def styled_warning(warn):
+    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
+
+
+def styled_message(message):
+    return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
+
+
+def has_no_nan_values(df, columns):
+    return df[columns].notna().all(axis=1)
+
+
+def has_nan_values(df, columns):
+    return df[columns].isna().any(axis=1)
diff --git a/src/display/.ipynb_checkpoints/utils-checkpoint.py b/src/display/.ipynb_checkpoints/utils-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..f11c07243e402f48a8ba018a8942b2190e5747e7
--- /dev/null
+++ b/src/display/.ipynb_checkpoints/utils-checkpoint.py
@@ -0,0 +1,188 @@
+from dataclasses import dataclass, make_dataclass
+from enum import Enum
+
+import pandas as pd
+
+from src.about import Tasks
+
+def fields(raw_class):
+    return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
+
+
+# These classes are for user facing column names,
+# to avoid having to change them all around the code
+# when a modif is needed
+@dataclass
+class ColumnContent:
+    name: str
+    type: str
+    displayed_by_default: bool
+    hidden: bool = False
+    never_hidden: bool = False
+
+## Leaderboard columns
+auto_eval_column_dict = []
+# Init
+#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
+
+auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("Rank", "number", True, never_hidden=True)])
+auto_eval_column_dict.append(["size_symbol", ColumnContent, ColumnContent("Size", "number", True, never_hidden=True)])
+
+auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
+auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
+
+auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
+#auto_eval_column_dict.append(["fewshot", ColumnContent, ColumnContent("Few-Shot", "str", True)])
+
+#Scores
+auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg. Comb. Perf. ⬆️", "number", True)])
+for task in Tasks:
+    auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
+
+# Model information
+#auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
+auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
+auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
+#auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
+auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
+auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
+auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
+auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
+auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
+#auto_eval_column_dict.append(["submitted_time", ColumnContent, ColumnContent("Submitted time", "date", False)])
+
+# We use make dataclass to dynamically fill the scores from Tasks
+AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
+
+## For the queue columns in the submission tab
+@dataclass(frozen=True)
+class EvalQueueColumn:  # Queue column
+    model = ColumnContent("model", "markdown", True)
+    revision = ColumnContent("revision", "str", True)
+    private = ColumnContent("private", "bool", True)
+    #precision = ColumnContent("precision", "str", True)
+    weight_type = ColumnContent("weight_type", "str", "Original")
+    status = ColumnContent("status", "str", True)
+
+## All the model information that we might need
+@dataclass
+class ModelDetails:
+    name: str
+    display_name: str = ""
+    symbol: str = "" # emoji
+
+
+class ModelType(Enum):
+    PT = ModelDetails(name="pretrained", symbol="🟢")
+    FT = ModelDetails(name="fine-tuned", symbol="🔶")
+    IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
+    RL = ModelDetails(name="RL-tuned", symbol="🟦")
+    Unknown = ModelDetails(name="", symbol="?")
+
+    def to_str(self, separator=" "):
+        return f"{self.value.symbol}{separator}{self.value.name}"
+
+    @staticmethod
+    def from_str(type):
+        if "fine-tuned" in type or "🔶" in type:
+            return ModelType.FT
+        if "pretrained" in type or "🟢" in type:
+            return ModelType.PT
+        if "RL-tuned" in type or "🟦" in type:
+            return ModelType.RL
+        if "instruction-tuned" in type or "⭕" in type:
+            return ModelType.IFT
+        return ModelType.Unknown
+
+@dataclass
+class FewShotDetails:
+    name: str
+    symbol: str = ""  # emoji
+
+class FewShotType(Enum):
+    ZS = FewShotDetails(name="zero-shot", symbol="🅾️")
+    FS = FewShotDetails(name="5-few-shot", symbol="5️⃣")
+    Unknown = FewShotDetails(name="unknown", symbol="❓")
+
+    def to_str(self, separator=" "):
+        return f"{self.value.symbol}{separator}{self.value.name}"
+
+    @staticmethod
+    def from_num_fewshot(is_5fewshot):
+        """Determines FewShotType based on num_fewshot."""
+        if is_5fewshot is False:
+            return FewShotType.ZS
+        elif is_5fewshot is True:
+            return FewShotType.FS
+        return FewShotType.Unknown
+
+@dataclass
+class SizeDetails:
+    name: str
+    symbol: str = ""  # emoji
+
+class SizeType(Enum):
+    SMALL = SizeDetails(name="small", symbol="🔵")
+    MEDIUM = SizeDetails(name="medium", symbol="🔵🔵")
+    LARGE = SizeDetails(name="large", symbol="🔵🔵🔵")
+    Unknown = SizeDetails(name="unknown", symbol="❓")
+
+    def to_str(self, separator=" "):
+        return f"{self.value.symbol}{separator}{self.value.name}"
+
+    @staticmethod
+    def num2type(size):
+        """Determines FewShotType based on num_fewshot."""
+        if size <= 10:
+            return SizeType.SMALL
+        elif size > 10 and size <= 50:
+            return SizeType.MEDIUM
+        else:
+            return SizeType.LARGE
+
+class WeightType(Enum):
+    Adapter = ModelDetails("Adapter")
+    Original = ModelDetails("Original")
+    Delta = ModelDetails("Delta")
+
+class Precision(Enum):
+    float16 = ModelDetails("float16")
+    bfloat16 = ModelDetails("bfloat16")
+    Unknown = ModelDetails("?")
+
+    def from_str(precision):
+        if precision in ["torch.float16", "float16"]:
+            return Precision.float16
+        if precision in ["torch.bfloat16", "bfloat16"]:
+            return Precision.bfloat16
+        return Precision.Unknown
+
+# Column selection
+COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
+
+EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
+EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
+
+BENCHMARK_COLS = [t.value.col_name for t in Tasks]
+
+'''
+# Nuovi valori per CPS, AVERAGE, BEST, e ID nella tabella
+@dataclass
+class NewColumnContent:
+    name: str
+    type: str
+    displayed_by_default: bool
+    hidden: bool = False
+    never_hidden: bool = False
+'''
+
+'''
+new_column_dict = []
+# Aggiungi CPS, VERAGE, BEST, ID
+new_column_dict.append(["CPS", NewColumnContent, NewColumnContent("CPS", "number", True)])
+new_column_dict.append(["AVERAGE", NewColumnContent, NewColumnContent("Average ⬆️", "number", True)])
+new_column_dict.append(["BEST", NewColumnContent, NewColumnContent("Best Performance", "number", True)])
+new_column_dict.append(["ID", NewColumnContent, NewColumnContent("ID", "str", True)])
+NewColumn = make_dataclass("NewColumn", new_column_dict, frozen=True)
+NEW_COLS = [c.name for c in fields(NewColumn) if not c.hidden]
+'''
diff --git a/src/display/__pycache__/css_html_js.cpython-310.pyc b/src/display/__pycache__/css_html_js.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1449d56d8d763c9a9c0645e8c9c5598a7b29738f
Binary files /dev/null and b/src/display/__pycache__/css_html_js.cpython-310.pyc differ
diff --git a/src/display/__pycache__/formatting.cpython-310.pyc b/src/display/__pycache__/formatting.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e183b7463adcf188048f008b51c49e84531113a4
Binary files /dev/null and b/src/display/__pycache__/formatting.cpython-310.pyc differ
diff --git a/src/display/__pycache__/utils.cpython-310.pyc b/src/display/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5c7f9272e5f5572d8022b193d6b66762b4d49c24
Binary files /dev/null and b/src/display/__pycache__/utils.cpython-310.pyc differ
diff --git a/src/display/css_html_js.py b/src/display/css_html_js.py
new file mode 100644
index 0000000000000000000000000000000000000000..70f9e59d881e34cf3b6a371bff0a9b56876dba17
--- /dev/null
+++ b/src/display/css_html_js.py
@@ -0,0 +1,139 @@
+custom_css = """
+/* gray background behind the combo row only */
+#filters-wrap {
+  background: #f2f3f5;       /* light gray */
+  border-radius: 12px;       /* optional */
+  padding: 12px 16px;        /* breathing room */
+  margin-bottom: 8px;        /* space before plots */
+  box-sizing: border-box;
+}
+.markdown-text1 {
+    font-size: 16px !important;
+     max-height: 300px;   /* adjust height as you like */
+    overflow-y: auto;    /* vertical scroll when text is too long */
+    overflow-x: hidden;  /* hide horizontal scroll bar completely */
+    white-space: normal; /* allow line wrapping */
+    word-wrap: break-word;
+    display: block;
+    padding-right: 8px;  /* optional: avoid text sticking to scrollbar */
+}
+.markdown-text {
+    font-size: 16px !important;
+     
+}
+#models-to-add-text {
+    font-size: 18px !important;
+}
+
+#citation-button span {
+    font-size: 16px !important;
+}
+
+#citation-button textarea {
+    font-size: 16px !important;
+}
+
+#citation-button > label > button {
+    margin: 6px;
+    transform: scale(1.3);
+}
+
+#leaderboard-table {
+    margin-top: 15px
+}
+
+#leaderboard-table-lite {
+    margin-top: 15px
+}
+
+#search-bar-table-box > div:first-child {
+    background: none;
+    border: none;
+}
+ 
+#search-bar {
+    padding: 0px;
+}
+
+/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
+#leaderboard-table td:nth-child(2),
+#leaderboard-table th:nth-child(2) {
+    max-width: 400px;
+    overflow: auto;
+    white-space: nowrap;
+}
+
+.tab-buttons button {
+    font-size: 20px;
+}
+
+#scale-logo {
+    border-style: none !important;
+    box-shadow: none;
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+    max-width: 600px;
+}
+
+#scale-logo .download {
+    display: none;
+}
+#filter_type{
+    border: 0;
+    padding-left: 0;
+    padding-top: 0;
+}
+#filter_type label {
+    display: flex;
+}
+#filter_type label > span{
+    margin-top: var(--spacing-lg);
+    margin-right: 0.5em;
+}
+#filter_type label > .wrap{
+    width: 103px;
+}
+#filter_type label > .wrap .wrap-inner{  
+    padding: 2px;
+}
+#filter_type label > .wrap .wrap-inner input{
+    width: 1px
+}
+#filter-columns-type{
+    border:0;
+    padding:0.5;
+}
+#filter-columns-size{
+    border:0;
+    padding:0.5;
+}
+#box-filter > .form{
+    border: 0
+}
+
+/* === Added scaling for plots === */
+#line-chart, 
+#boxplot-task {
+    max-width: 100%;
+    width: 100%;
+    height: auto;
+    margin: 0 auto;
+    display: block;
+}
+
+/* nasconde la barra degli strumenti Plotly */
+.modebar {
+    display: none !important;
+}
+
+"""
+
+get_window_url_params = """
+    function(url_params) {
+        const params = new URLSearchParams(window.location.search);
+        url_params = Object.fromEntries(params);
+        return url_params;
+    }
+    """
+
diff --git a/src/display/formatting.py b/src/display/formatting.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba340b8c51c98be420f01682eedda01099dc92a3
--- /dev/null
+++ b/src/display/formatting.py
@@ -0,0 +1,30 @@
+def model_hyperlink(link, model_name):
+    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+
+
+def make_clickable_model(model_name):
+    link = f"https://huggingface.co/{model_name}"
+    #Remove author prefix from model names for EVALITA-LLM
+    model_name = model_name.split("/")[-1]
+    #print(model_name)
+    return model_hyperlink(link, model_name)
+
+
+def styled_error(error):
+    return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
+
+
+def styled_warning(warn):
+    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
+
+
+def styled_message(message):
+    return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
+
+
+def has_no_nan_values(df, columns):
+    return df[columns].notna().all(axis=1)
+
+
+def has_nan_values(df, columns):
+    return df[columns].isna().any(axis=1)
diff --git a/src/display/utils.py b/src/display/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce06546426f490f8841725ec7b93d7b8c3fab089
--- /dev/null
+++ b/src/display/utils.py
@@ -0,0 +1,189 @@
+from dataclasses import dataclass, make_dataclass
+from enum import Enum
+
+import pandas as pd
+
+from src.about import Tasks
+
+def fields(raw_class):
+    return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
+
+
+# These classes are for user facing column names,
+# to avoid having to change them all around the code
+# when a modif is needed
+@dataclass
+class ColumnContent:
+    name: str
+    type: str
+    displayed_by_default: bool
+    hidden: bool = False
+    never_hidden: bool = False
+
+## Leaderboard columns
+auto_eval_column_dict = []
+# Init
+#auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
+
+auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("Rank", "number", True, never_hidden=True)])
+auto_eval_column_dict.append(["size_symbol", ColumnContent, ColumnContent("Size", "number", True, never_hidden=True)])
+
+auto_eval_column_dict.append(["fewshot_symbol", ColumnContent, ColumnContent("FS", "str", True, never_hidden=True)])
+auto_eval_column_dict.append(["is_5fewshot", ColumnContent, ColumnContent("IS_FS", "bool", True)])
+auto_eval_column_dict.append(["LANG", ColumnContent, ColumnContent("LANG", "str", True, never_hidden=True)])
+
+auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
+#auto_eval_column_dict.append(["fewshot", ColumnContent, ColumnContent("Few-Shot", "str", True)])
+
+#Scores
+auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg. Comb. Perf. ⬆️", "number", True)])
+for task in Tasks:
+    auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
+
+# Model information
+#auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
+auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
+auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
+#auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
+auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
+auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
+auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
+auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
+auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
+#auto_eval_column_dict.append(["submitted_time", ColumnContent, ColumnContent("Submitted time", "date", False)])
+
+# We use make dataclass to dynamically fill the scores from Tasks
+AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
+
+## For the queue columns in the submission tab
+@dataclass(frozen=True)
+class EvalQueueColumn:  # Queue column
+    model = ColumnContent("model", "markdown", True)
+    revision = ColumnContent("revision", "str", True)
+    private = ColumnContent("private", "bool", True)
+    #precision = ColumnContent("precision", "str", True)
+    weight_type = ColumnContent("weight_type", "str", "Original")
+    status = ColumnContent("status", "str", True)
+
+## All the model information that we might need
+@dataclass
+class ModelDetails:
+    name: str
+    display_name: str = ""
+    symbol: str = "" # emoji
+
+
+class ModelType(Enum):
+    PT = ModelDetails(name="pretrained", symbol="🟢")
+    FT = ModelDetails(name="fine-tuned", symbol="🔶")
+    IFT = ModelDetails(name="instruction-tuned", symbol="⭕")
+    RL = ModelDetails(name="RL-tuned", symbol="🟦")
+    Unknown = ModelDetails(name="", symbol="?")
+
+    def to_str(self, separator=" "):
+        return f"{self.value.symbol}{separator}{self.value.name}"
+
+    @staticmethod
+    def from_str(type):
+        if "fine-tuned" in type or "🔶" in type:
+            return ModelType.FT
+        if "pretrained" in type or "🟢" in type:
+            return ModelType.PT
+        if "RL-tuned" in type or "🟦" in type:
+            return ModelType.RL
+        if "instruction-tuned" in type or "⭕" in type:
+            return ModelType.IFT
+        return ModelType.Unknown
+
+@dataclass
+class FewShotDetails:
+    name: str
+    symbol: str = ""  # emoji
+
+class FewShotType(Enum):
+    ZS = FewShotDetails(name="zero-shot", symbol="🅾️")
+    FS = FewShotDetails(name="10-few-shot", symbol="🔟")
+    Unknown = FewShotDetails(name="unknown", symbol="❓")
+
+    def to_str(self, separator=" "):
+        return f"{self.value.symbol}{separator}{self.value.name}"
+
+    @staticmethod
+    def from_num_fewshot(is_5fewshot):
+        """Determines FewShotType based on num_fewshot."""
+        if is_5fewshot is False:
+            return FewShotType.ZS
+        elif is_5fewshot is True:
+            return FewShotType.FS
+        return FewShotType.Unknown
+
+@dataclass
+class SizeDetails:
+    name: str
+    symbol: str = ""  # emoji
+
+class SizeType(Enum):
+    SMALL = SizeDetails(name="small", symbol="🔵")
+    MEDIUM = SizeDetails(name="medium", symbol="🔵🔵")
+    LARGE = SizeDetails(name="large", symbol="🔵🔵🔵")
+    Unknown = SizeDetails(name="unknown", symbol="❓")
+
+    def to_str(self, separator=" "):
+        return f"{self.value.symbol}{separator}{self.value.name}"
+
+    @staticmethod
+    def num2type(size):
+        """Determines FewShotType based on num_fewshot."""
+        if size <= 10:
+            return SizeType.SMALL
+        elif size > 10 and size <= 50:
+            return SizeType.MEDIUM
+        else:
+            return SizeType.LARGE
+
+class WeightType(Enum):
+    Adapter = ModelDetails("Adapter")
+    Original = ModelDetails("Original")
+    Delta = ModelDetails("Delta")
+
+class Precision(Enum):
+    float16 = ModelDetails("float16")
+    bfloat16 = ModelDetails("bfloat16")
+    Unknown = ModelDetails("?")
+
+    def from_str(precision):
+        if precision in ["torch.float16", "float16"]:
+            return Precision.float16
+        if precision in ["torch.bfloat16", "bfloat16"]:
+            return Precision.bfloat16
+        return Precision.Unknown
+
+# Column selection
+COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
+
+EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
+EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
+
+BENCHMARK_COLS = [t.value.col_name for t in Tasks]
+
+'''
+# Nuovi valori per CPS, AVERAGE, BEST, e ID nella tabella
+@dataclass
+class NewColumnContent:
+    name: str
+    type: str
+    displayed_by_default: bool
+    hidden: bool = False
+    never_hidden: bool = False
+'''
+
+'''
+new_column_dict = []
+# Aggiungi CPS, VERAGE, BEST, ID
+new_column_dict.append(["CPS", NewColumnContent, NewColumnContent("CPS", "number", True)])
+new_column_dict.append(["AVERAGE", NewColumnContent, NewColumnContent("Average ⬆️", "number", True)])
+new_column_dict.append(["BEST", NewColumnContent, NewColumnContent("Best Performance", "number", True)])
+new_column_dict.append(["ID", NewColumnContent, NewColumnContent("ID", "str", True)])
+NewColumn = make_dataclass("NewColumn", new_column_dict, frozen=True)
+NEW_COLS = [c.name for c in fields(NewColumn) if not c.hidden]
+'''
diff --git a/src/envs.py b/src/envs.py
new file mode 100644
index 0000000000000000000000000000000000000000..9db342d84e248b24bae574cb6cb33a42efa92c04
--- /dev/null
+++ b/src/envs.py
@@ -0,0 +1,46 @@
+import os
+
+from huggingface_hub import HfApi
+
+# Info to change for your repository
+# ----------------------------------
+TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
+
+#OWNER = "giux78" # Change to your org - don't forget to create a results and request dataset, with the correct format!
+OWNER = "saeedfarzi"
+# ----------------------------------
+
+#REPO_ID = f"{OWNER}/leaderboard-evalita"
+#QUEUE_REPO = f"{OWNER}/evalita-requests"
+#RESULTS_REPO = f"{OWNER}/evalita-results"
+
+REPO_ID = f"{OWNER}/MediLingua_Leaderboard"
+QUEUE_REPO = f"{OWNER}/e3c_llm_requests"
+RESULTS_REPO = f"{OWNER}/e3c_llm_results"
+
+# If you setup a cache later, just change HF_HOME
+#CACHE_PATH=os.getenv("HF_HOME", "/home/sfarzi/leaderboard/")
+
+# Local caches
+#EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
+#EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
+#EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
+#EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
+
+#EVAL_REQUESTS_PATH ='/home/sfarzi/leaderboard/llm_leaderboard/e3c_llm_requests' #os.path.join(CACHE_PATH, "eval-queue")
+#EVAL_RESULTS_PATH = '/home/sfarzi/leaderboard/llm_leaderboard/e3c_llm_results'#os.path.join(CACHE_PATH, "eval-results")
+#EVAL_REQUESTS_PATH_BACKEND = '/home/sfarzi/leaderboard/llm_leaderboard/e3c_llm_requests' #os.path.join(CACHE_PATH, "eval-queue-bk")
+#EVAL_RESULTS_PATH_BACKEND = '/home/sfarzi/leaderboard/llm_leaderboard/e3c_llm_results' #os.path.join(CACHE_PATH, "eval-results-bk")
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+
+# Go one directory up from BASE_DIR
+PARENT_DIR = os.path.dirname(BASE_DIR)
+
+# Now set the paths to the directories one level up
+EVAL_REQUESTS_PATH = os.path.join(PARENT_DIR, "e3c_llm_requests")
+EVAL_RESULTS_PATH = os.path.join(PARENT_DIR, "e3c_llm_results")
+EVAL_REQUESTS_PATH_BACKEND = EVAL_REQUESTS_PATH
+EVAL_RESULTS_PATH_BACKEND = EVAL_RESULTS_PATH
+
+API = HfApi(token=TOKEN)
diff --git a/src/leaderboard/.ipynb_checkpoints/read_evals-checkpoint.py b/src/leaderboard/.ipynb_checkpoints/read_evals-checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..dee8db5a6e176e56893d5d1e39b2d009c1835832
--- /dev/null
+++ b/src/leaderboard/.ipynb_checkpoints/read_evals-checkpoint.py
@@ -0,0 +1,231 @@
+import glob
+import json
+import math
+import os
+from dataclasses import dataclass, field
+
+import dateutil
+import numpy as np
+from typing import Dict, Union
+from datetime import datetime
+
+#from get_model_info import num_params
+from src.display.formatting import make_clickable_model
+from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType, FewShotType, SizeType
+from src.submission.check_validity import is_model_on_hub
+
+
+@dataclass
+class EvalResult:
+    """Represents one full evaluation. Built from a combination of the result and request file for a given run.
+    """
+    eval_name: str # org_model_precision (uid)
+    full_model: str # org/model (path on hub)
+    org: str 
+    model: str
+    revision: str # commit hash, "" if main
+    #submitted_time: datetime
+    results: Dict[str, Union[float, int]]  # float o int
+    average_CPS: float
+    is_5fewshot: bool
+    fewshot_symbol: FewShotType = FewShotType.Unknown
+    weight_type: WeightType = WeightType.Original # Original or Adapter
+    architecture: str = "Unknown" 
+    license: str = "?"
+    likes: int = 0
+    num_params: int = 0
+    date: str = "" # submission date of request file
+    still_on_hub: bool = False
+    rank: int = 0#str = field(default=0)  # nuovo campo con default = 0
+    size_symbol: SizeType = SizeType.Unknown
+
+    @classmethod
+    def init_from_json_file(self, json_filepath):
+        """Inits the result from the specific model result file"""
+        with open(json_filepath) as fp:
+            data = json.load(fp)
+
+        config = data.get("config")
+
+        #average_CPS = f"{data.get('average_CPS'):.2f}"
+        # Get average_CPS
+        average_CPS = float(data.get('average_CPS', 0.0))  # 0.0 come valore di default
+        # Get number of fewshot
+        fewshot = config.get("num_fewshot", False)
+
+        rank = 0
+
+        try:
+            if fewshot == "5":
+                is_5fewshot = True
+            else:
+                is_5fewshot = False
+        except ValueError:
+            is_5fewshot = False
+        # Determine the few-shot type (ZS or FS) based on num_fewshot
+        fewshot_symbol = FewShotType.from_num_fewshot(is_5fewshot)  # Use the new
+
+        # Determine the number of parameters of the models
+        num_params = int(0)
+        num_params_billion = config.get("num_params_billion")
+        if num_params_billion is not None:
+            num_params = math.ceil(num_params_billion)
+
+        size_symbol = SizeType.num2type(num_params)
+
+        # Get model and org
+        org_and_model = config.get("model_name", config.get("model_args", None))
+        org_and_model = org_and_model.split("/", 1)
+
+        if len(org_and_model) == 1:
+            org = None
+            model = org_and_model[0]
+            #result_key = f"{model}_{precision.value.name}"
+            result_key = f"{model}_{is_5fewshot}"
+        else:
+            org = org_and_model[0]
+            model = org_and_model[1]
+            #result_key = f"{org}_{model}_{precision.value.name}"
+            result_key = f"{org}_{model}_{is_5fewshot}"
+        full_model = "/".join(org_and_model)
+
+        still_on_hub, _, model_config = is_model_on_hub(
+            full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
+        )
+        architecture = "?"
+        if model_config is not None:
+            architectures = getattr(model_config, "architectures", None)
+            if architectures:
+                architecture = ";".join(architectures)
+
+        # Extract the results of the models
+        results = {}
+        for task in Tasks:
+            task = task.value
+
+            for k, v in data["tasks"].items():
+                if task.benchmark[:-2] == k:
+                    if "Best Prompt Id" in task.col_name:
+                        results[task.benchmark] = int(v[task.metric_type][-1:])
+                    else:
+                        #results[task.benchmark] = f"{v[task.metric_type]:.2f}"  # Ensure two decimals for display
+                        results[task.benchmark] = float(v[task.metric_type])
+                        #value = float(v[task.metric_type])
+                        #results[task.benchmark] = round(value, 2)  # Arrotonda a 2 decimali
+
+        return self(
+            eval_name=result_key,
+            full_model=full_model,
+            org=org,
+            model=model,
+            results=results,
+            average_CPS=average_CPS,
+            fewshot_symbol=fewshot_symbol,
+            is_5fewshot=is_5fewshot,
+            revision= config.get("model_sha", ""),
+            still_on_hub=still_on_hub,
+            architecture=architecture,
+            num_params=num_params,
+            rank = rank,
+            size_symbol=size_symbol
+            #submitted_time=config.get("submitted_time", ""),
+        )
+
+    '''
+    def update_with_request_file(self, requests_path):
+        """Finds the relevant request file for the current model and updates info with it"""
+        request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
+
+        try:
+            with open(request_file, "r") as f:
+                request = json.load(f)
+            self.model_type = ModelType.from_str(request.get("model_type", ""))
+            self.weight_type = WeightType[request.get("weight_type", "Original")]
+            self.license = request.get("license", "?")
+            self.likes = request.get("likes", 0)
+            self.num_params = request.get("params", 0)
+            self.date = request.get("submitted_time", "")
+        except Exception:
+            print(f"Could not find request file for {self.org}/{self.model} with precision 
+    '''
+
+    def to_dict(self):
+        """Converts the Eval Result to a dict compatible with our dataframe display"""
+        average = self.average_CPS
+
+        fewshot_symbol = (
+            self.fewshot_symbol.value.symbol if isinstance(self.fewshot_symbol, FewShotType) else "❓"
+        )
+
+        size_symbol = (
+            self.size_symbol.value.symbol if isinstance(self.size_symbol, SizeType) else "❓"
+        )
+
+        data_dict = {
+            "eval_name": self.eval_name,  # not a column, just a save name,
+            #AutoEvalColumn.precision.name: self.precision.value.name,
+            #AutoEvalColumn.model_type.name: self.model_type.value.name,
+            #AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
+            #AutoEvalColumn.model_type.name: self.model_type.value.name if self.model_type else "Unknown",
+            #AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol if self.model_type else "Unknown",
+            AutoEvalColumn.fewshot_symbol.name: fewshot_symbol,
+            AutoEvalColumn.weight_type.name: self.weight_type.value.name,
+            AutoEvalColumn.architecture.name: self.architecture,
+            AutoEvalColumn.model.name: make_clickable_model(self.full_model),
+            AutoEvalColumn.revision.name: self.revision,
+            AutoEvalColumn.average.name: average,
+            AutoEvalColumn.is_5fewshot.name: self.is_5fewshot,
+            AutoEvalColumn.license.name: self.license,
+            AutoEvalColumn.likes.name: self.likes,
+            AutoEvalColumn.params.name: self.num_params,
+            AutoEvalColumn.still_on_hub.name: self.still_on_hub,
+            AutoEvalColumn.rank.name: self.rank,
+            AutoEvalColumn.size_symbol.name: size_symbol
+        }
+
+        for task in Tasks:
+            data_dict[task.value.col_name] = self.results[task.value.benchmark]
+
+        return data_dict
+
+
+def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
+    """From the path of the results folder root, extract all needed info for results"""
+    model_result_filepaths = []
+
+    for root, _, files in os.walk(results_path):
+        # We should only have json files in model results
+        if len(files) == 0 or any([not f.endswith(".json") for f in files]):
+            continue
+
+        # Sort the files by date
+        try:
+            files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
+        except dateutil.parser._parser.ParserError:
+            files = [files[-1]]
+
+        for file in files:
+            model_result_filepaths.append(os.path.join(root, file))
+
+    eval_results = {}
+    for model_result_filepath in model_result_filepaths:
+        # Creation of result
+        eval_result = EvalResult.init_from_json_file(model_result_filepath)
+        #eval_result.update_with_request_file(requests_path)
+
+        # Store results of same eval together
+        eval_name = eval_result.eval_name
+        if eval_name in eval_results.keys():
+            eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
+        else:
+            eval_results[eval_name] = eval_result
+
+    results = []
+    for v in eval_results.values():
+        try:
+            v.to_dict() # we test if the dict version is complete
+            results.append(v)
+        except KeyError:  # not all eval values present
+            continue
+
+    return results
diff --git a/src/leaderboard/__pycache__/read_evals.cpython-310.pyc b/src/leaderboard/__pycache__/read_evals.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..589cc9af8005efdec6ec464513f3be2ce6d3804f
Binary files /dev/null and b/src/leaderboard/__pycache__/read_evals.cpython-310.pyc differ
diff --git a/src/leaderboard/read_evals.py b/src/leaderboard/read_evals.py
new file mode 100644
index 0000000000000000000000000000000000000000..213d6eeb8364e23ec83252752376f6bbe945999f
--- /dev/null
+++ b/src/leaderboard/read_evals.py
@@ -0,0 +1,243 @@
+import glob
+import json
+import math
+import os
+from dataclasses import dataclass, field
+
+import dateutil
+import numpy as np
+from typing import Dict, Union
+from datetime import datetime
+
+#from get_model_info import num_params
+from src.display.formatting import make_clickable_model
+from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType, FewShotType, SizeType
+from src.submission.check_validity import is_model_on_hub
+
+
+@dataclass
+class EvalResult:
+    """Represents one full evaluation. Built from a combination of the result and request file for a given run.
+    """
+    eval_name: str # org_model_precision (uid)
+    full_model: str # org/model (path on hub)
+    org: str 
+    model: str
+    revision: str # commit hash, "" if main
+    #submitted_time: datetime
+    results: Dict[str, Union[float, int]]  # float o int
+    average_CPS: float
+    is_5fewshot: bool
+    fewshot_symbol: FewShotType = FewShotType.Unknown
+    weight_type: WeightType = WeightType.Original # Original or Adapter
+    architecture: str = "Unknown" 
+    license: str = "?"
+    likes: int = 0
+    Lang:str="EN"
+    num_params: int = 0
+    date: str = "" # submission date of request file
+    still_on_hub: bool = False
+    rank: int = 0#str = field(default=0)  # nuovo campo con default = 0
+    size_symbol: SizeType = SizeType.Unknown
+
+    @classmethod
+    def init_from_json_file(self, json_filepath):
+        """Inits the result from the specific model result file"""
+        with open(json_filepath) as fp:
+            data = json.load(fp)
+
+        config = data.get("config")
+
+        #average_CPS = f"{data.get('average_CPS'):.2f}"
+        # Get average_CPS
+        average_CPS = float(data.get('average_CPS', 0.0))  # 0.0 come valore di default
+        # Get number of fewshot
+        fewshot = config.get("num_fewshot", False)
+
+        rank = 0
+        Lang=config.get("LANG", "EN")
+        try:
+            if fewshot == "10":
+                is_5fewshot = True
+            else:
+                is_5fewshot = False
+        except ValueError:
+            is_5fewshot = False
+        # Determine the few-shot type (ZS or FS) based on num_fewshot
+        fewshot_symbol = FewShotType.from_num_fewshot(is_5fewshot)  # Use the new
+
+        # Determine the number of parameters of the models
+        num_params = int(0)
+        num_params_billion = config.get("num_params_billion")
+        if num_params_billion is not None:
+            num_params = math.ceil(num_params_billion)
+
+        size_symbol = SizeType.num2type(num_params)
+
+        # Get model and org
+        org_and_model = config.get("model_name", config.get("model_args", None))
+        org_and_model = org_and_model.split("/", 1)
+
+        if len(org_and_model) == 1:
+            org = None
+            model = org_and_model[0]
+            #result_key = f"{model}_{precision.value.name}"
+            result_key = f"{model}_{is_5fewshot}"
+        else:
+            org = org_and_model[0]
+            model = org_and_model[1]
+            #result_key = f"{org}_{model}_{precision.value.name}"
+            result_key = f"{org}_{model}_{is_5fewshot}"
+        full_model = "/".join(org_and_model)
+
+        still_on_hub, _, model_config = is_model_on_hub(
+            full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
+        )
+        architecture = "?"
+        if model_config is not None:
+            architectures = getattr(model_config, "architectures", None)
+            if architectures:
+                architecture = ";".join(architectures)
+
+        # Extract the results of the models
+        results = {}
+        #print (data)
+        #print ("Tasks", Tasks)
+        for task in Tasks:
+            task = task.value
+            
+            for k, v in data["tasks"].items():
+                #print ("k , v ", k, v)
+                #print ("benchmark: ",task.benchmark) 
+                if task.benchmark[:-2] == k:
+                    if "Best Prompt Id" in task.col_name:
+                        if v["is_dummy"] : results[task.benchmark]="n/a"
+                        else : results[task.benchmark] = int(v[task.metric_type][-1:])
+                    else:
+                        #results[task.benchmark] = f"{v[task.metric_type]:.2f}"  # Ensure two decimals for display
+                        #print (v)
+                        if v["is_dummy"] : results[task.benchmark]="n/a"
+                        else: results[task.benchmark] = round(float(v[task.metric_type]), 2)#float(v[task.metric_type])
+                        #value = float(v[task.metric_type])
+                        #results[task.benchmark] = round(value, 2)  # Arrotonda a 2 decimali
+        #print (results)
+        #print ( "************ End of Reading file ****************")
+        return self(
+            eval_name=result_key+"_"+Lang, #result_key,
+            full_model=full_model,
+            Lang=Lang,
+            org=org,
+            model=model,
+            results=results,
+            average_CPS=average_CPS,
+            fewshot_symbol=fewshot_symbol,
+            is_5fewshot=is_5fewshot,
+            revision= config.get("model_sha", ""),
+            still_on_hub=still_on_hub,
+            architecture=architecture,
+            num_params=num_params,
+            rank = rank,
+            size_symbol=size_symbol
+            #submitted_time=config.get("submitted_time", ""),
+        )
+
+    '''
+    def update_with_request_file(self, requests_path):
+        """Finds the relevant request file for the current model and updates info with it"""
+        request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
+
+        try:
+            with open(request_file, "r") as f:
+                request = json.load(f)
+            self.model_type = ModelType.from_str(request.get("model_type", ""))
+            self.weight_type = WeightType[request.get("weight_type", "Original")]
+            self.license = request.get("license", "?")
+            self.likes = request.get("likes", 0)
+            self.num_params = request.get("params", 0)
+            self.date = request.get("submitted_time", "")
+        except Exception:
+            print(f"Could not find request file for {self.org}/{self.model} with precision 
+    '''
+
+    def to_dict(self):
+        """Converts the Eval Result to a dict compatible with our dataframe display"""
+        average = self.average_CPS
+
+        fewshot_symbol = (
+            self.fewshot_symbol.value.symbol if isinstance(self.fewshot_symbol, FewShotType) else "❓"
+        )
+
+        size_symbol = (
+            self.size_symbol.value.symbol if isinstance(self.size_symbol, SizeType) else "❓"
+        )
+
+        data_dict = {
+            "eval_name": self.eval_name,  # not a column, just a save name,
+            #AutoEvalColumn.precision.name: self.precision.value.name,
+            #AutoEvalColumn.model_type.name: self.model_type.value.name,
+            #AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
+            #AutoEvalColumn.model_type.name: self.model_type.value.name if self.model_type else "Unknown",
+            #AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol if self.model_type else "Unknown",
+            AutoEvalColumn.fewshot_symbol.name: fewshot_symbol,
+            AutoEvalColumn.weight_type.name: self.weight_type.value.name,
+            AutoEvalColumn.architecture.name: self.architecture,
+            AutoEvalColumn.model.name: make_clickable_model(self.full_model),
+            AutoEvalColumn.revision.name: self.revision,
+            AutoEvalColumn.average.name: average,
+            AutoEvalColumn.is_5fewshot.name: self.is_5fewshot,
+            AutoEvalColumn.license.name: self.license,
+            AutoEvalColumn.likes.name: self.likes,
+            AutoEvalColumn.params.name: self.num_params,
+            AutoEvalColumn.still_on_hub.name: self.still_on_hub,
+            AutoEvalColumn.rank.name: self.rank,
+            AutoEvalColumn.size_symbol.name: size_symbol,
+            AutoEvalColumn.LANG.name:self.Lang
+        }
+
+        for task in Tasks:
+            data_dict[task.value.col_name] = self.results[task.value.benchmark]
+
+        return data_dict
+
+
+def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
+    """From the path of the results folder root, extract all needed info for results"""
+    model_result_filepaths = []
+
+    for root, _, files in os.walk(results_path):
+        # We should only have json files in model results
+        if len(files) == 0 or any([not f.endswith(".json") for f in files]):
+            continue
+
+        # Sort the files by date
+        try:
+            files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
+        except dateutil.parser._parser.ParserError:
+            files = [files[-1]]
+
+        for file in files:
+            model_result_filepaths.append(os.path.join(root, file))
+
+    eval_results = {}
+    for model_result_filepath in model_result_filepaths:
+        # Creation of result
+        eval_result = EvalResult.init_from_json_file(model_result_filepath)
+        #eval_result.update_with_request_file(requests_path)
+
+        # Store results of same eval together
+        eval_name = eval_result.eval_name
+        if eval_name in eval_results.keys():
+            eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
+        else:
+            eval_results[eval_name] = eval_result
+
+    results = []
+    for v in eval_results.values():
+        try:
+            v.to_dict() # we test if the dict version is complete
+            results.append(v)
+        except KeyError:  # not all eval values present
+            #print (KeyError)
+            continue
+
+    return results
diff --git a/src/populate.py b/src/populate.py
new file mode 100644
index 0000000000000000000000000000000000000000..54d3a3b775a5c24d898f8e76e10a7d461babbd38
--- /dev/null
+++ b/src/populate.py
@@ -0,0 +1,59 @@
+import json
+import os
+
+import pandas as pd
+
+from src.display.formatting import has_no_nan_values, make_clickable_model
+from src.display.utils import AutoEvalColumn, EvalQueueColumn
+from src.leaderboard.read_evals import get_raw_eval_results
+
+
+def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
+    """Creates a dataframe from all the individual experiment results"""
+    raw_data = get_raw_eval_results(results_path, requests_path)
+    all_data_json = [v.to_dict() for v in raw_data]
+    print (all_data_json)
+    df = pd.DataFrame.from_records(all_data_json)
+    df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
+    df = df[cols].round(decimals=2)
+    #df.to_csv("output.csv", index=False)
+
+    # filter out if any of the benchmarks have not been produced
+    df = df[has_no_nan_values(df, benchmark_cols)]
+    return df
+
+
+def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
+    """Creates the different dataframes for the evaluation queues requestes"""
+    entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
+    all_evals = []
+
+    for entry in entries:
+        if ".json" in entry:
+            file_path = os.path.join(save_path, entry)
+            with open(file_path) as fp:
+                data = json.load(fp)
+
+            data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
+            data[EvalQueueColumn.revision.name] = data.get("revision", "main")
+
+            all_evals.append(data)
+        elif ".md" not in entry:
+            # this is a folder
+            sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
+            for sub_entry in sub_entries:
+                file_path = os.path.join(save_path, entry, sub_entry)
+                with open(file_path) as fp:
+                    data = json.load(fp)
+
+                data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
+                data[EvalQueueColumn.revision.name] = data.get("revision", "main")
+                all_evals.append(data)
+
+    pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
+    running_list = [e for e in all_evals if e["status"] == "RUNNING"]
+    finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]
+    df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
+    df_running = pd.DataFrame.from_records(running_list, columns=cols)
+    df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
+    return df_finished[cols], df_running[cols], df_pending[cols]
diff --git a/src/submission/__pycache__/check_validity.cpython-310.pyc b/src/submission/__pycache__/check_validity.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..003d1bc369d07a5f69718e741182e92e5a3d16e8
Binary files /dev/null and b/src/submission/__pycache__/check_validity.cpython-310.pyc differ
diff --git a/src/submission/__pycache__/submit.cpython-310.pyc b/src/submission/__pycache__/submit.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1b3c40cb1de073d2011bb643428ca80f61b5e5ba
Binary files /dev/null and b/src/submission/__pycache__/submit.cpython-310.pyc differ
diff --git a/src/submission/check_validity.py b/src/submission/check_validity.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c3ce45c4dacd2d600544c87584ee72c81d3b956
--- /dev/null
+++ b/src/submission/check_validity.py
@@ -0,0 +1,99 @@
+import json
+import os
+import re
+from collections import defaultdict
+from datetime import datetime, timedelta, timezone
+
+import huggingface_hub
+from huggingface_hub import ModelCard
+from huggingface_hub.hf_api import ModelInfo
+from transformers import AutoConfig
+from transformers.models.auto.tokenization_auto import AutoTokenizer
+
+def check_model_card(repo_id: str) -> tuple[bool, str]:
+    """Checks if the model card and license exist and have been filled"""
+    try:
+        card = ModelCard.load(repo_id)
+    except huggingface_hub.utils.EntryNotFoundError:
+        return False, "Please add a model card to your model to explain how you trained/fine-tuned it."
+
+    # Enforce license metadata
+    if card.data.license is None:
+        if not ("license_name" in card.data and "license_link" in card.data):
+            return False, (
+                "License not found. Please add a license to your model card using the `license` metadata or a"
+                " `license_name`/`license_link` pair."
+            )
+
+    # Enforce card content
+    if len(card.text) < 200:
+        return False, "Please add a description to your model card, it is too short."
+
+    return True, ""
+
+def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
+    """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
+    try:
+        config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
+        if test_tokenizer:
+            try:
+                tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
+            except ValueError as e:
+                return (
+                    False,
+                    f"uses a tokenizer which is not in a transformers release: {e}",
+                    None
+                )
+            except Exception as e:
+                return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
+        return True, None, config
+
+    except ValueError:
+        return (
+            False,
+            "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
+            None
+        )
+
+    except Exception as e:
+        return False, "was not found on hub!", None
+
+
+def get_model_size(model_info: ModelInfo, precision: str):
+    """Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
+    try:
+        model_size = round(model_info.safetensors["total"] / 1e9, 3)
+    except (AttributeError, TypeError):
+        return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in example_app.py
+
+    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
+    model_size = size_factor * model_size
+    return model_size
+
+def get_model_arch(model_info: ModelInfo):
+    """Gets the model architecture from the configuration"""
+    return model_info.config.get("architectures", "Unknown")
+
+def already_submitted_models(requested_models_dir: str) -> set[str]:
+    """Gather a list of already submitted models to avoid duplicates"""
+    depth = 1
+    file_names = []
+    users_to_submission_dates = defaultdict(list)
+
+    for root, _, files in os.walk(requested_models_dir):
+        current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
+        if current_depth == depth:
+            for file in files:
+                if not file.endswith(".json"):
+                    continue
+                with open(os.path.join(root, file), "r") as f:
+                    info = json.load(f)
+                    file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
+
+                    # Select organisation
+                    if info["model"].count("/") == 0 or "submitted_time" not in info:
+                        continue
+                    organisation, _ = info["model"].split("/")
+                    users_to_submission_dates[organisation].append(info["submitted_time"])
+
+    return set(file_names), users_to_submission_dates
diff --git a/src/submission/submit.py b/src/submission/submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..cac6ea48e803a0af42dabe5226191c769dbec71d
--- /dev/null
+++ b/src/submission/submit.py
@@ -0,0 +1,119 @@
+import json
+import os
+from datetime import datetime, timezone
+
+from src.display.formatting import styled_error, styled_message, styled_warning
+from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
+from src.submission.check_validity import (
+    already_submitted_models,
+    check_model_card,
+    get_model_size,
+    is_model_on_hub,
+)
+
+REQUESTED_MODELS = None
+USERS_TO_SUBMISSION_DATES = None
+
+def add_new_eval(
+    model: str,
+    base_model: str,
+    revision: str,
+    precision: str,
+    weight_type: str,
+    model_type: str,
+):
+    global REQUESTED_MODELS
+    global USERS_TO_SUBMISSION_DATES
+    if not REQUESTED_MODELS:
+        REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
+
+    user_name = ""
+    model_path = model
+    if "/" in model:
+        user_name = model.split("/")[0]
+        model_path = model.split("/")[1]
+
+    precision = precision.split(" ")[0]
+    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    if model_type is None or model_type == "":
+        return styled_error("Please select a model type.")
+
+    # Does the model actually exist?
+    if revision == "":
+        revision = "main"
+
+    # Is the model on the hub?
+    if weight_type in ["Delta", "Adapter"]:
+        base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
+        if not base_model_on_hub:
+            return styled_error(f'Base model "{base_model}" {error}')
+
+    if not weight_type == "Adapter":
+        model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
+        if not model_on_hub:
+            return styled_error(f'Model "{model}" {error}')
+
+    # Is the model info correctly filled?
+    try:
+        model_info = API.model_info(repo_id=model, revision=revision)
+    except Exception:
+        return styled_error("Could not get your model information. Please fill it up properly.")
+
+    model_size = get_model_size(model_info=model_info, precision=precision)
+
+    # Were the model card and license filled?
+    try:
+        license = model_info.cardData["license"]
+    except Exception:
+        return styled_error("Please select a license for your model")
+
+    modelcard_OK, error_msg = check_model_card(model)
+    if not modelcard_OK:
+        return styled_error(error_msg)
+
+    # Seems good, creating the eval
+    print("Adding new eval")
+
+    eval_entry = {
+        "model": model,
+        "base_model": base_model,
+        "revision": revision,
+        "precision": precision,
+        "weight_type": weight_type,
+        "status": "PENDING",
+        "submitted_time": current_time,
+        "model_type": model_type,
+        "likes": model_info.likes,
+        "params": model_size,
+        "license": license,
+        "private": False,
+    }
+
+    # Check for duplicate submission
+    if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
+        return styled_warning("This model has been already submitted.")
+
+    print("Creating eval file")
+    OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
+    os.makedirs(OUT_DIR, exist_ok=True)
+    out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
+
+    with open(out_path, "w") as f:
+        f.write(json.dumps(eval_entry))
+
+    print("Uploading eval file")
+    API.upload_file(
+        path_or_fileobj=out_path,
+        path_in_repo=out_path.split("eval-queue/")[1],
+        repo_id=QUEUE_REPO,
+        repo_type="dataset",
+        commit_message=f"Add {model} to eval queue",
+    )
+
+    # Remove the local file
+    os.remove(out_path)
+
+    return styled_message(
+        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
+    )
diff --git a/src/tasks.py b/src/tasks.py
new file mode 100644
index 0000000000000000000000000000000000000000..085a7e9d0aed6c071974d5882917cb01e9bf36ce
--- /dev/null
+++ b/src/tasks.py
@@ -0,0 +1,255 @@
+from dataclasses import dataclass
+from enum import Enum
+
+@dataclass
+class Task:
+    benchmark: str
+    # metric: str
+    accuracy: str
+    col_name: str
+
+NUM_FEWSHOT = 0  # Change with your few shot
+# ---------------------------------------------------
+
+# Your leaderboard name
+TITLE = """<h1 align="center" id="space-title">🚀 ECREAM-LLM Leaderboard 🚀</h1>"""
+
+# What does your leaderboard evaluate?
+INTRODUCTION_TEXT = """
+ECREAM-LLM is a benchmark designed to evaluate Large Language Models (LLMs) on Italian tasks. The distinguishing features of Evalita-LLM are the following: (i) all tasks are native Italian, avoiding translation issues and potential cultural biases; (ii) the benchmark includes generative tasks, enabling more natural interaction with LLMs; (iii) all tasks are evaluated against multiple prompts, this way mitigating the model sensitivity to specific prompts and allowing a fairer evaluation.
+"""
+
+#MEASURE_DESCRIPTION = "Combined Performance = (1 - (Best_Prompt - Prompt_Average) / 100) * Best_Prompt. Prompt Average = accuracy averaged over the six prompts. Best Prompt = accuracy of the best prompt. Prompt ID = ID of the best prompt (see legend above)"
+MEASURE_DESCRIPTION = "<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the assessed prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above).</small>"
+#MEASURE_DESCRIPTION = "<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = avg. accuracy over prompts. **Best Prompt** = accuracy of best prompt. **Prompt ID** = ID of the best prompt (see legend above).</small>"
+
+# Tasks Descriptions
+RML_DESCRIPTION = """### RML (RML) --- *Multiple-choice task*
+| #   | Prompt (EN)                                                                    |
+|-----|--------------------------------------------------------------------------------|
+| 1   | You are a clinical medical expert. You must answer a question about laboratory test results and measurements for a patient. To do this, you have access to the patient’s medical history. Here is the patient’s medical history: <<<{{clinical_note}}>>> If the test/exam was performed only once, answer with the test/exam results. If the information is not contained in the medical history, answer with “not available.” Answer this question based on the patient’s medical history: <<<What are the results and measurements of {{item}}?>>> |
+| 2   | Given a clinical note describing the history of a patient, answer a question about laboratory test results and measurements. Answer with the test/exam results exactly as reported in the text. If the information is not contained in the clinical history, answer with “not available.” Here is the patient’s clinical history: <<<{{clinical_note}}>>> What are the results and measurements of {{item}}?|
+| 3   |Here it is the clinical history of a patient:<<<{{clinical_note}}>>> What are the results and measurements of {{item}}?|
+
+| #   | Prompt (IT)                                                                    |
+|-----|--------------------------------------------------------------------------------|
+| 1   |Sei un esperto medico clinico. Devi rispondere ad una domanda su risultati e misure di test di laboratorio rispetto ad un paziente. Per farlo, hai a disposizione la storia clinica del paziente. Ecco la storia clinica del paziente:<<<{{clinical_note}}>>> Se il test/esame è stato effettuato sono una volta, rispondi con i risultati del test/esame. Se l'informazione non è contenuta nella storia clinica, rispondi con 'non disponibile'. Rispondi a questa domanda sulla base della storia clinica del paziente: <<<Quali sono i risultati e le misure di {{item}}?>>>  |
+| 2   | Data una nota clinica che descrive la storia di un paziente, rispondi ad una domanda su risultati e misure di test di laboratorio. Rispondi con i risultati del test/esame così come riportati nel testo. Se l'informazione non è contenuta nella storia clinica, rispondi con 'non disponibile'. Ecco la storia clinica del paziente:<<<{{clinical_note}}>>> Quali sono i risultati e le misure di {{item}}?|
+| 3   | Ecco la storia clinica di un paziente:<<<{{clinical_note}}>>> Quali sono i risultati e le misure di {{item}}?|
+
+
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the 6 prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+DIA_DESCRIPTION = """### DIAGNOSIS (DIA) --- *Multiple-choice task*
+| #   | Prompt (EN)                                                                    |
+|-----|--------------------------------------------------------------------------------|
+| 1   | You are a clinical medical expert. You must answer a question about the patient’s diagnosis. To do this, you have access to the patient’s medical history. Answer “Yes” if the patient’s definitive diagnosis is the one indicated. If the information is not contained in the medical history, answer “not available. Patient medical history: <<<{{clinical_note}}>>> Answer this question based on the patient’s medical history: <<<Is the diagnosis {{item}}?>>>"|
+| 2   | Given a patient's medical history, answer a question. Patient medical history: <<<{{clinical_note}}>>>. Question: <<<Is the definitive diagnosis {{item}}?>>>. The answer can be 'Yes', 'No' or 'not available' if the information is not contained in the medical history.|
+| 3   |Hai a disposizione una nota clinica relativa ad un paziente:  <<<{{clinical_note}}>>>. Non è detto che la nota clinica contenga informazioni rilevanti per rispondere alla domanda. In tal caso, rispondi con 'non disponibile'. Data la storia clinica sopra presentata, la diagnosi è {{item}}?|
+
+| #   | Prompt (IT)                                                                    |
+|-----|--------------------------------------------------------------------------------|
+| 1   | Sei un esperto medico clinico. Devi rispondere ad una domanda sulla diagnosi del paziente. Per farlo, hai a disposizione la storia clinica del paziente. Risondi 'Sì' se la diagnosi definitiva del paziente è quella indicata. Se l'informazione non è contenuta nella storia clinica, rispondi con 'non disponibile'. Storia clinica: <<<{{clinical_note}}>>>. Rispondi a questa domanda sulla base della storia clinica del paziente: <<<La diagnosi è {{item}}?>>> |
+| 2   | Data la storia medica di un paziente, rispondi ad una domanda. Storia clinica: <<<{{clinical_note}}>>>. Domanda: <<<La diagnosi conslusiva è {{item}}?>>>. La risposta può essere 'Sì', 'No' oppure 'non disponibile' se l'informazione non è contenuta nella storia clinica|
+| 3   |Hai a disposizione una nota clinica relativa ad un paziente:  <<<{{clinical_note}}>>>. Non è detto che la nota clinica contenga informazioni rilevanti per rispondere alla domanda. In tal caso, rispondi con 'non disponibile'. Data la storia clinica sopra presentata, la diagnosi è {{item}}? |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1-macro averaged over the 6 prompts. **Best Prompt** = F1-macro of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+HIS_DESCRIPTION = """### HISTORY (HIS) ---  *Multiple-choice task*
+    
+
+| #   | Prompt (EN)                                                                    |
+|-----|--------------------------------------------------------------------------------|
+| 1   | You are a clinical medical expert. You must answer a question about the patient’s history. To do this, you have access to the patient’s medical history. The answer must consist of three components: polarity, contextual modality, and chronicity. You must combine these three components to answer the question. Contextual modality can be: a) “Certainly” if the answer is certain, b) “Possibly” if the answer is hypothetical, c) “Probably” if the answer is probable. Polarity can be: a) “Yes” if the answer is affirmative, b) “No” if the answer is negative. Chronicity can be: a) “Chronic” if the condition in question is certainly permanent,b) “Certainly not chronic” if the condition is temporary or transient, c) “Possibly chronic” otherwise. Patient medical history: <<<{{clinical_note}}>>>. Answer this question based on the patient’s medical history: <<<Does the patient have a history of {{item}}?>> |
+| 2   | Given the patient’s medical history, answer a question. Patient medical history: <<<{{clinical_note}}>>>. Question: <<<Does the patient have a history of {{item}}?>>>. The possible answers are: Yes, possibly chronic Certainly yes, certainly not chronic Probably yes, possibl chronic Possibly yes, chronic Certainly yes, chronic Possibly yes, possibly chronic Probably yes, chronic Yes, chronic Yes, certainly not chronic Probably yes, certainly not chronic Certainly yes, possibly chronic Possibly yes, certainly not chronic Not available Probably no, chronic Certainly no, chronic Possibly no, chronic Probably no, possibly chronic Certainly no, possibly chronic Possibly no, possibly chronic Certainly no, certainly not chronic Probably no, certainly not chronic Possibly no, certainly not chronic Probably no Certainly no Possibly no Probably no, certainly not chronic Certainly no, possibly chronic Possibly no, certainly not chronic Probably no, possibly chronic Certainly no, certainly not chronic|
+| 3   | You are a clinical medical expert. You must answer a question about the patient’s history. To do this, you have access to the patient’s medical history. Patient medical history: <<<{{clinical_note}}>>>. The answer must consist of three components: polarity, contextual modality, and chronicity. You must combine these three components to answer the question. Contextual modality can be: a) “Certainly” if the answer is certain, b) “Possibly” if the answer is hypothetical, c) “Probably” if the answer is probable. Polarity can be: a) “Yes” if the answer is affirmative, b) “No” if the answer is negative. Chronicity can be: a) “Chronic” if the condition in question is certainly permanent, b) “Certainly not chronic” if the condition is temporary or transient, c) “Possibly chronic” otherwise. Answer this question based on the patient’s medical history: <<<Does the patient have a history of {{item}}?>>|
+
+| #   | Prompt (IT)                                                                    |
+|-----|--------------------------------------------------------------------------------|
+| 1   | Sei un esperto medico clinico. Devi rispondere ad una domanda su la storia del paziente. Per farlo, hai a disposizione la storia clinica del paziente. La risposta è composta da tre componenti: polarità, modalità contestuale e permanenza. Devi mettere insieme queste tre componenti per rispondere alla domanda. Modalità contestuale può essere:  a)'Certamente' se la risposta è certa,  b)'Possibilmente' se la risposta è ipotetica,  c)'Probabilmente' se la risposta è probabile. Polarità può essere:  a)'sì' se la risposta è affermativa,  b) 'no' se la risposta è negativa.  Permanenza può essere:  a)'cronico' se l'oggetto della domanda è sicuramente permanente per sempre,  b)'certamente non cronico' se se l'oggetto della domanda è temporaeo o transitorio,  c)'possibilmente cronico' altrimenti. Storia clinica: <<<{{clinical_note}}>>>. Rispondi a questa domanda sulla base della storia clinica del paziente: <<<Il paziente ha storia di  {{item}}?>>> |
+| 2   | Data la storia medica di un paziente, rispondi ad una domanda. Storia clinica: <<<{{clinical_note}}>>>. Domanda: <<<Il paziente ha storia di  {{item}}?>>>. Le opzioni sono:  - sì, possibilmente cronico - Certamente sì, Certamente non cronico - probabilmente sì, possibilmente cronico - possibilmente sì, cronico - Certamente sì, cronico - possibilmente sì, possibilmente cronico - probabilmente sì, cronico - sì, cronico - sì, Certamente non cronico - probabilmente sì, Certamente non cronico - Certamente sì, possibilmente cronico - possibilmente sì, Certamente non cronico - non disponibile - probabilmente no, cronico - Certamente no, cronico - possibilmente no, cronico - probabilmente no, possibilmente cronico - Certamente no, possibilmente cronico - possibilmente no, possibilmente cronico - Certamente no, Certamente non cronico - probabilmente no, Certamente non cronico - possibilmente no, Certamente non cronico - probabilmente no - Certamente no - possibilmente no - probabilmente no, Certamente non cronico - Certamente no, possibilmente cronico - possibilmente no, Certamente non cronico - probabilmente no, possibilment cronico - Certamente no, Certamente non cronico|
+| 3   | Sei un esperto medico clinico. Devi rispondere ad una domanda su la storia del paziente. Per farlo, hai a disposizione la storia clinica del paziente. Storia clinica: <<<{{clinical_note}}>>>. La risposta è composta da tre componenti: polarità, modalità contestuale e permanenza. Devi mettere insieme queste tre componenti per rispondere alla domanda. - modalità contestuale può essere: a)'Certamente' se la risposta è certa, b)'Possibilmente' se la risposta è ipotetica, c)'Probabilmente' se la risposta è probabile. - polarità può essere: a)'sì' se la risposta è affermativa, b) 'no' se la risposta è negativa.  - permanenza può essere: a)'cronico' se l'oggetto della domanda è sicuramente permanente per sempre, b)'certamente non cronico' se se l'oggetto della domanda è temporaeo o transitorio, c)'possibilmente cronico' altrimenti. Rispondi a questa domanda sulla base della storia clinica del paziente: <<<Il paziente ha storia di  {{item}}?>>>|
+
+
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1-micro averaged over the 6 prompts. **Best Prompt** = F1-micro of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+AT_DESCRIPTION = """### Admission Tests (AT) --- *Multiple-choice task*
+    The input is a multiple-choice question with five options (A-E) from Italian medical specialty entrance exams, and the model must identify the correct answer.
+
+| #   | Prompt                                                                                       | Answer Choices               |
+|-----|--------------------------------------------------------------------------------|-----------------------------|
+| 1   | Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta? | ["A", "B", "C", "D", "E"]   |
+| 2   | Devi risolvere un compito di risposte a domande. Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta? | ["A", "B", "C", "D", "E"]   |
+| 3   | Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nE: {{E}}\\nRisposta: | ["A", "B", "C", "D", "E"]   |
+| 4   | Devi risolvere un compito a scelta multipla. Dato il seguente caso clinico: '{{background}}', qual è la risposta corretta alla domanda: '{{domanda}}'?\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nE: {{E}}\\nRisposta:Devi risolvere un compito a scelta multipla. Dato il seguente quesito di medicina: '{{Question}}' qual è la risposta corretta?\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nE: {{E}}\\nRisposta: | ["A", "B", "C", "D", "E"]   |
+| 5   | Dato il seguente quesito di medicina '{{Question}}' la risposta corretta è: | ["A", "B", "C", "D", "E"]   |
+| 6   | Devi risolvere un compito di risposte a domande. Dato il seguente quesito di medicina '{{Question}}' la risposta corretta è: | ["A", "B", "C", "D", "E"]   |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the 6 prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+WIC_DESCRIPTION = """### Word in Context (WIC) --- *Multiple-choice task*
+    The input consists of a word (w) and two sentences. The model has to determine whether the word w has the same meaning in both sentences. The output is a binary classification: 1 (same meaning) or 0 (different meaning).
+
+| #   | Prompt                                                                                       | Answer Choices                                   |
+|-----|--------------------------------------------------------------------------------|-------------------------------------------------|
+| 1   | La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'? | ["No", "Sì"]                         |
+| 2   | Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'? | ["No", "Sì"] |
+| 3   | La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'?\\nA: Sì\\nB: No\\nRisposta: | ["B", "A"]                                      |
+| 4   | Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' ha lo stesso significato della parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}'?\\nA: \\nB: No\\nRisposta: | ["B", "A"]  |
+| 5   | La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' e la parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}' | ["non hanno lo stesso significato", "hanno lo stesso significato"] |
+| 6   | Devi determinare se una stessa parola usata in due frasi differenti ha lo stesso significato in entrambi i contesti. La parola: '{{sentence1[start1:end1]}}' nella frase: '{{sentence1}}' e la parola: '{{sentence2[start2:end2]}}' nella frase: '{{sentence2}}' | ["non hanno lo stesso significato", "hanno lo stesso significato"] |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1-macro averaged over the 6 prompts. **Best Prompt** = F1-macro of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+FAQ_DESCRIPTION = """### Frequently Asked Questions & Question Answering (FAQ) --- *Multiple-choice task*
+    The input is a user query regarding the water supply service. The model must identify the correct answer from the 4 available options.
+
+| #   | Prompt                                                                                       | Answer Choices               |
+|-----|--------------------------------------------------------------------------------|-----------------------------|
+| 1   | Rispondi alla seguente domanda: '{{question}}'                                              | {{[A, B, C, D]}}            |
+| 2   | Devi risolvere un compito di risposte a domande. Rispondi alla seguente domanda: '{{question}}' | {{[A, B, C, D]}}            |
+| 3   | Rispondi alla seguente domanda: '{{question}}'\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nRisposta: | ["A", "B", "C", "D"] |
+| 4   | Devi risolvere un compito a scelta multipla. Rispondi alla seguente domanda: '{{question}}'\\nA: {{A}}\\nB: {{B}}\\nC: {{C}}\\nD: {{D}}\\nRisposta: | ["A", "B", "C", "D"] |
+| 5   | La risposta alla domanda: '{{question}}' è:                                                | {{[A, B, C, D]}}            |
+| 6   | Devi risolvere un compito di risposte a domande. La risposta alla domanda: '{{question}}' è: | {{[A, B, C, D]}}  |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = accuracy averaged over the 6 prompts. **Best Prompt** = accuracy of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+LS_DESCRIPTION = """### Lexical Substitution (LS) --- *Generative task*
+    The input is a sentence containing a target word (w). The model has to replace the target word w with its most suitable synonyms that are contextually relevant. 
+
+| #   | Prompt                                                                                  |
+|-----|--------------------------------------------------------------------------------|
+| 7   | Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori `<head>` nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta: |
+| 8   | Devi risolvere un compito di sostituzione lessicale. Trova 10 parole che possono sostituire la parola racchiusa tra i marcatori `<head>` nella seguente frase: '{{context}}', mantenendo lo stesso significato. Elenca i lemmi (forme base) di queste parole, separandoli con una virgola, ad esempio: lemma1, lemma2, lemma3, lemma4, lemma5. Non aggiungere commenti o altro testo. Risposta: |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+SU_DESCRIPTION = """### Summarization (SUM) --- *Generative task*
+    The input is a news article. The model has to generate a concise summary of the input text, capturing the key information and main points.
+
+| #   | Prompt                                                                                       |
+|-----|--------------------------------------------------------------------------------|
+| 7   | Riassumi il seguente articolo di giornale: '{{source}}'\\nRiassunto:             |
+| 8   | Devi risolvere un compito di sintesi automatica del testo. Riassumi il seguente articolo di giornale: '{{source}}'\\nRiassunto: |
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+NER_DESCRIPTION = """### Named Entity Recognition (NER) --- *Generative task*
+    The input is a sentence of a clinical text. The model has to identify the clinical entities, which include all occurrences of clinical disorders (i.e. diseases and symptoms).
+
+| #   | Prompt (IT)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   | Devi svolgere un compito di riconoscimento di entità in testi medici. Dalla seguente frase, estrai tutte le entità del tipo CLINENTITY, che include tutti i disturbi di carattere medico in una singola categoria (cioè, sia malattie che sintomi). Riporta ogni entità nel formato: Entity$CLINENTITY, separando ogni coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'. |
+| 2   | Devi svolgere un compito di riconoscimento di entità in testi medici. Dalla seguente frase, estrai tutte le entità del tipo CLINENTITY, che include tutti i disturbi di carattere medico (un disturbo è definito come un processo patologico definito, con un insieme caratteristico di segni e sintomi). Restituisci ogni entità nel seguente formato: Entity$CLINENTITY, separando ogni coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'.|
+| 3   | Devi svolgere un compito di riconoscimento di entità in note cliniche. Dalla seguente frase, estrai tutte le entità del tipo CLINENTITY, che include tutti i disturbi di carattere medico in una singola categoria (cioè, sia malattie che sintomi). Restituisci ogni entità nel seguente formato: Entity$CLINENTITY, separando ogni coppia con ','. Se non ci sono entità da estrarre, rispondi con '&&NOENT&&'.|
+
+| #   | Prompt (SK)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   |  Máš za úlohu rozpoznať entity v lekárskych textoch. Z nasledujúcej vety vyber všetky entity typu CLINENTITY, ktoré zahŕňajú všetky zdravotné poruchy v jednej kategórii (t. j. choroby aj symptómy). Každú entitu uveď vo formáte: Entity$CLINENTITY, pričom každú dvojicu oddeľ znakom „,“. Ak nie sú žiadne entity, ktoré by sa mohli/dali vybrať, odpovedz/vráť „&&NOENT&&“.|
+| 2   | Máš za úlohu rozpoznať entity v lekárskych textoch. Z nasledujúcej vety vyber všetky entity typu CLINENTITY, ktoré zahŕňajú všetky lekárske poruchy (porucha je definovaná ako určitý patologický proces s charakteristickým súborom príznakov a symptómov). Vráť každú entitu v nasledujúcom formáte: Entity$CLINENTITY, pričom každú dvojicu oddeľ znakom „,“. Ak nie sú žiadne entity, ktoré by sa mohli/dali vybrať, odpovedz/vráť „&&NOENT&&“.|
+| 3   | Máš za úlohu rozpoznať entity v klinických poznámkach. Z nasledujúcej vety vyber všetky entity typu CLINENTITY, ktoré zahŕňajú všetky zdravotné poruchy v jednej kategórii (t. j. choroby aj symptómy). Vráť každú entitu v nasledujúcom formáte: Entity$CLINENTITY, pričom každú dvojicu oddeľ znakom „,“. Ak nie sú žiadne entity, ktoré by bolo možné vybrať, odpovedz/vráť „&&NOENT&&“.|
+
+| #   | Prompt (SL)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   |  Tvoja naloga je prepoznavanje entitet v medicinskih besedilih. Iz naslednjega stavka izlušči vse entitete tipa CLINENTITY, kamor spadajo vse medicinske motnje v posamezni kategoriji (tj. tako bolezni kot simptomi). Vsako entiteto zapiši v obliki: Entity$CLINENTITY, posamezne pare pa loči z vejico ','. Če ni nobene entitete za izluščiti, vrni &&NOENT&&.|
+| 2   | Tvoja naloga je prepoznavanje entitet v medicinskih besedilih. Iz naslednjega stavka izlušči vse entitete tipa CLINENTITY, kamor spadajo vse medicinske motnje (motnja je opredeljena kot določen patološki proces s značilnim naborom znakov in simptomov). Vsako entiteto zapiši v obliki: Entity$CLINENTITY, posamezne pare pa loči z vejico ','. Če ni nobene entitete za izluščiti, vrni &&NOENT&&.|
+| 3   | Tvoja naloga je prepoznavanje entitet v kliničnih zapisih. Iz naslednjega stavka izlušči vse entitete tipa CLINENTITY, kamor spadajo vse medicinske motnje v posamezni kategoriji (tako bolezni kot simptomi). Vsako entiteto zapiši v obliki: Entity$CLINENTITY, posamezne pare pa loči z vejico ','. Če ni nobene entitete za izluščiti, vrni &&NOENT&&.|
+
+| #   | Prompt (GR)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   |  Έχεις να εκτελέσεις τη δραστηριότητα του να εντοπίσεις οντότητες μέσα σε ιατρικά κείμενα. Στην παρακάτω πρόταση, να εξάγεις όλες τις οντότητες του τύπου CLINENTITY, η οποία περιλαμβάνει όλες τις ιατρικές διαταραχές σε μία μόνο κατηγορία (δλδ τόσο νοσήματα όσο και συμπτώματα). Να αναφέρεις κάθε οντότητα με την μορφή: Οντότητα$CLINENTITY, χωρίζοντας κάθε ζευγάρι με ','. Αν δεν υπάρχουν οντότητες για να εξαχθούν, απάντησε με το '&&NOENT&&'.|
+| 2   | Έχεις να εκτελέσεις μία δραστηριότητα αναγνώρισης οντοτήτων σε ιατρικά κείμενα. Από τις ακόλουθες προτάσεις, να εξάγεις όλες τις οντότητες του τύπου CLINENTITY, ο οποίος περιλαμβάνει όλες τις ιατρικές διαταραχές (μια διαταραχή ορίζεται ως μία ξεκάθαρα παθολογική διαδικασία με ένα χαρακτηριστικό συνδυασμό σημείων και συμπτωμάτων). Επέστρεφε κάθε οντότητα με την ακόλουθη μορφή: Οντότητα$CLINENTITY, χωρίζοντας κάθε ζευγάρι με ','. Αν δεν υπάρχουν οντότητες να εξαχθούν, απάντησε με το '&&NOENT&&'.|
+| 3   | Έχεις να εκτελέσεις μια δραστηριότητα αναγνώρισης οντοτήτων σε κλινικά σημειώματα. Από την ακόλουθη πρόταση να εξάγεις όλες τις οντότητες του τύπου CLINENTITY, που περιλαμβάνει όλες τις ιατρικές διαταραχές σε μία μόνο κατηγορία  (δλδ τόσο νοσήματα όσο και συμπτώματα). Επέστρεψε κάθε οντότητα με την ακόλουθη μορφή: Οντότητα$CLINENTITY, χωρίζοντας κάθε ζευγάρι με ','. Αν δεν υπάρχουν οντότητες για να εξαχθούν, απάντησε με το  '&&NOENT&&'.|
+
+| #   | Prompt (PL)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   | Zadanie polega na rozpoznawania jednostek (chorobowych) w tekstach medycznych. Z poniższego zdania wyodrębnij wszystkie jednostki typu CLINENTITY, które obejmują wszystkie schorzenia medyczne danej kategorii (tj. zarówno choroby jak i objawy). Każda jednostka powinna być zgłoszona w formacie: Entity$CLINENTITY, z oddzieleniem każdej pary znakiem ”,”. Jeśli nie ma żadnych jednostek do wyodrębnienia, odpowiedz '&&NOENT&&'. |
+| 2   |Zadanie polega na rozpoznawaniu jednostek (chorobowych) w tekstach medycznych. Z poniższego zdania wyodrębnij wszystkie jednostki typu CLINENTITY, które obejmują wszystkie schorzenia medyczne (schorzenie definiuje się jako określony proces patologiczny z charakterystycznym zestawem objawów). Zwróć każdą jednostkę w następującym formacie: Entity$CLINENTITY, oddzielając każdą parę znakiem ”,”. Jeśli nie ma jednostek do wyodrębnienia, odpowiedz '&&NOENT&&'. |
+| 3   | Zadanie polega na rozpoznawania jednostek (chorobowych) w notatkach klinicznych. Z poniższego zdania wyodrębnij wszystkie jednostki typu CLINENTITY, które obejmują wszystkie schorzenia medyczne z danej kategorii (tj. zarówno choroby jak i objawy). Zapisz każdą jednostkę w następującym formacie: Entity$CLINENTITY, oddzielając każdą parę znakiem ”,”. Jeśli nie ma jednostek do wyodrębnienia, odpowiedz '&&NOENT&&'.|
+
+| #   | Prompt (EN)                                                                                                                          |
+|-----|--------------------------------------------------------------------------------------------------------------------------------------|
+| 1   | You have to perform a task of entity recognition in medical texts. From the following sentence, extract all the entities of type CLINENTITY, which includes all medical disorders in a single category (i.e. both diseases and symptoms). Report each entity with the format: Entity$CLINENTITY, separating each pair with ','. If there are no entities to extract, answer with '&&NOENT&&'. |
+| 2   | You have to perform a task of entity recognition in medical texts. From the following sentence, extract all the entities of type CLINENTITY, which includes all medical disorders (a disorder is defined as a definite pathologic process with a characteristic set of signs and symptoms). Return each entity in the following format: Entity$CLINENTITY, separating each pair with ','. If there are no entities to extract, answer with '&&NOENT&&'.|
+| 3   | You have to perform a task of entity recognition in clinical notes. From the following sentence, extract all the entities of type CLINENTITY, which includes all medical disorders in a single category (i.e. both diseases and symptoms). Return each entity in the following format: Entity$CLINENTITY, separating each pair with ','. If there are no entities to extract, answer with '&&NOENT&&'.|
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+REL_DESCRIPTION = """### Relation Extraction (REL) --- *Generative task*
+    The input is a sentence of a clinical text. The model must identify and extract relations between laboratory test results (e.g.,122/81mmHg) and the corresponding tests or procedures (e.g., blood pressure).
+
+| #   | Prompt (IT)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   | Devi estrarre relazioni da una frase nel campo medico. Data una frase in italiano, estrai tutti i test di laboratorio con i loro valori. Ritorna i risultati come: valore$voce_medica&valore$voce_medica. Usa '&&NOREL&&' se non trovi nessuna relazione. |
+| 2   | Estrai tutte le coppie test-valore menzionate nella seguente frase nel campo medico. Includi solamente misurazioni esplicite in cui il nome di un test di laboratorio è chiaramente collegato alla sua misurazione. Scrivi ciascuna coppia nel formato:  valore$nome_test. Congiungi coppie multiple usando '&'. Se nessuma coppia valida esiste, ritorna esattamente: '&&NOREL&&'.|
+| 3   | Estrai tutte le coppie test-valore dalla seguente frase medica.  Includi solamente test di laboratorio e i valori delle corrispondenti misurazioni. Formatta ciascuna coppia come  valore$nome_test, e separa coppie multiple usando '&'. Se non c'è nessuna coppia, ritorna '&&NOREL&&'.|
+
+| #   | Prompt (SK)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   | Vyber súvislosti z lekárskej vety. Na základe slovenskej vety vyber všetky položky laboratórnych testov spolu s ich hodnotami. Vráť výsledky v tvare: value$medical_item&value$medical_item. Ak sa nenašli žiadne súvislosti, použi ‚&&NOREL&&‘. |
+| 2   | Vyber všetky dvojice laboratórnych testov a hodnôt uvedené v nasledujúcej lekárskej vete. Zahrň iba explicitné/jednoznačné merania, kde je názov laboratórneho testu jasne prepojený/spätý s nameranou hodnotou. Každú dvojicu zapíš vo formáte: value$test_name. Viaceré dvojice spoj pomocou znaku '&'. Ak neexistujú žiadne platné dvojice, vráť: „&&NOREL&&“.|
+| 3   | Vyber všetky dvojice laboratórnych testov a hodnôt z nasledujúcej lekárskej vety. Zahrň iba laboratórne testy a ich zodpovedajúce namerané hodnoty. Každý pár formátuj ako value$test_name a viacero párov oddeľ znakom „&“. Ak takéto páry neexistujú, vráť „&&NOREL&&“.|
+
+| #   | Prompt (SL)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   |  Iz medicinskega stavka je treba izluščiti relacije. Iz slovenskega stavka izlušči vse laboratorijske preiskave skupaj z njihovimi vrednostmi. Rezultat vrni v obliki: value$medical_item&value$medical_item. Če v stavku ni mogoče najti nobene relacije, uporabi '&&NOREL&&'.|
+| 2   |Iz danega medicinskega stavka izlušči vse pare laboratorijska preiskava–vrednost. Vključi samo tiste vrednosti, kjer je ime laboratorijske preiskave jasno povezano z izmerjeno vrednostjo. Vsak par zapiši v obliki: value$test_name. Če je parov več, jih poveži z znakom '&'. Če veljavnih parov ni, vrni natanko: &&NOREL&&. |
+| 3   | Iz naslednjega medicinskega stavka izlušči vse pare laboratorijska preiskava–vrednost. Vključi samo laboratorijske preiskave in njihove pripadajoče izmerjene vrednosti. Vsak par zapiši v obliki: value$test_name, več parov pa loči z znakom '&'. Če takih parov ni, vrni &&NOREL&&.|
+
+| #   | Prompt (GR)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   | Πρέπει να εξάγεις σχέσεις από μια πρόταση ενός ιατρικού αρχείου. Θα σου δοθεί μία πρόταση στα Ελληνικά και θα πρέπει να εξάγεις όλες τις τιμές εργαστηριακών αποτελεσμάτων με τις τιμές τους. Παρουσίασε τα αποτελέσματα με την παρακάτω μορφή: τιμή$ιατρικό_αντικείμενο&τιμή$ιατρικό_αντικείμενο. Χρησιμοποίησε '&&NOREL&&' αν δεν βρεθούν σχέσεις.|
+| 2   | Πρέπει να εξάγεις όλα τα ζευγάρια εργαστηριακών εξετάσεων και αποτελεσμάτων από την παρακάτω πρόταση ενός ιατρικού αρχείου. Να περιλάβεις μόνο συγκεκριμένες μετρήσεις όπου βρεις ότι το όνομα μιας εργαστηριακής  εξέτασης συνδέεται ξεκάθαρα με την τιμή που μετρήθηκε. Γράψε κάθε ζευγάρι με την μορφή: τιμή$όνομα_εξέτασης. Ένωσε πολλαπλά ζευγάρια χρησιμοποιώντας το σύμβολο'&'. Αν δεν υπάρχουν ζευγάρια να επιστρέψεις την τιμή: '&&NOREL&&'.|
+| 3   | Να εξάγεις όλα τα ζευγάρια εργαστηριακών εξετάσεων-τιμών από την παρακάτω πρόταση ενός ιατρικού αρχείου. Να συμπεριλάβεις μόνο εργαστηριακές εξετάσεις και τις αντίστοιχες μετρημένες τιμές τους. Φτιάξε κάθε ζευγάρι με τη μορφή τιμή$όνομα_εξέτασης, και χώρισε πολλαπλά ζευγάρια χρησιμοποιώντας το σύμβολο '&'. Αν δεν υπάρχουν τέτοια ζευγάρια να επιστρέψεις την τιμή '&&NOREL&&'.|
+
+| #   | Prompt (PL)                                                                                      |
+|-----|--------------------------------------------------------------------------------|
+| 1   |  Wyodrębnij zależności z wyrażenia medycznego. Dla danego polskiego zdania wyodrębnij wszystkie pozycje badań laboratoryjnych wraz z ich wartościami. Wyniki należy zapisać w formacie: value$medical_item&value$medical_item. Jeśli nie znaleziono żadnych zależności, napisz '&&NOREL&&'.|
+| 2   |Wyodrębnij wszystkie pary badań laboratoryjnych – wartość wymienione w poniższym wyrażeniu medycznym. Uwzględnij tylko wyraźne pomiary, w których nazwa badania laboratoryjnego jest wyraźnie powiązana z jego wartością pomiarową. Zapisz każdą parę w formacie: value$test_name. wyodrębnij pary za pomocą znaku '&'. Jeśli nie ma żadnych par, napisz '&&NOREL&&'. |
+| 3   | Wyodrębnij wszystkie pary badań laboratoryjnych – wartość z poniższego wyrażenia medycznego. Uwzględnij tylko badania laboratoryjne i odpowiadające im wartości pomiarowe. Zapisz każdą parę jako value$test_name i oddziel pary za pomocą znaku '&'. Jeśli nie ma żadnych par, napisz '&&NOREL&&'.|
+
+| #   | Prompt (EN)                                                                                                                          |
+|-----|--------------------------------------------------------------------------------------------------------------------------------------|
+| 1   | You have to extract relations from a medical sentence. Given an English sentence, extract all lab test items with their values. Return results like: value$medical_item&value$medical_item. Use '&&NOREL&&' if no relations are found. |
+| 2   | Extract all lab test–value pairs mentioned in the following medical sentence. Include only explicit measurements where a lab test name is clearly linked to its measured value. Write each pair in the format: value$test_name. Join multiple pairs using '&'. If no valid pairs exist, return exactly: '&&NOREL&&'.|
+| 3   | Extract all lab test–value pairs from the following medical sentence. Only include lab tests and their corresponding measured values. Format each pair as value$test_name, and separate multiple pairs using '&'. If there are no such pairs, return '&&NOREL&&'.|
+
+
+<small>**Combined Performance** = (1 - (**Best Prompt** - **Prompt Average**) / 100) * **Best Prompt**. **Prompt Average** = F1 averaged over the 2 prompts. **Best Prompt** = F1 of the best prompt. **Prompt ID** = ID of the best prompt (see legend above). </small>
+
+"""
+
+# Create a dictionary to map task names to their descriptions
+TASK_DESCRIPTIONS = {
+    "RML": RML_DESCRIPTION,
+    "DIA": DIA_DESCRIPTION,
+    "HIS": HIS_DESCRIPTION,
+    "AT": AT_DESCRIPTION,
+    "WIC": WIC_DESCRIPTION,
+    "FAQ": FAQ_DESCRIPTION,
+    "LS": LS_DESCRIPTION,
+    "SU": SU_DESCRIPTION,
+    "NER": NER_DESCRIPTION,
+    "REL": REL_DESCRIPTION
+}
\ No newline at end of file