Spaces:
Build error
Build error
translation time analyzed
Browse files- .gitattributes +4 -0
- llm_toolkit/translation_utils.py +80 -0
- logs/l40-1gpu.txt +0 -0
- logs/l40-4gpu.txt +0 -0
- logs/openai-gpt-4o-mini.txt +0 -0
- logs/openai-gpt-4o.txt +3 -74
- notebooks/00b_Data Analysis_Few_Shots.ipynb +2 -2
- results/mac-results_few_shots_metrics.csv +29 -23
.gitattributes
CHANGED
@@ -59,3 +59,7 @@ notebooks/00a_Data[[:space:]]Analysis_greedy_decoding.ipynb filter=lfs diff=lfs
|
|
59 |
notebooks/00b_Data[[:space:]]Analysis_Few_Shots.ipynb filter=lfs diff=lfs merge=lfs -text
|
60 |
notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
61 |
notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
59 |
notebooks/00b_Data[[:space:]]Analysis_Few_Shots.ipynb filter=lfs diff=lfs merge=lfs -text
|
60 |
notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
61 |
notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
62 |
+
logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
|
63 |
+
logs/l40-4gpu.txt filter=lfs diff=lfs merge=lfs -text
|
64 |
+
logs/openai-gpt-4o-mini.txt filter=lfs diff=lfs merge=lfs -text
|
65 |
+
logs/openai-gpt-4o.txt filter=lfs diff=lfs merge=lfs -text
|
llm_toolkit/translation_utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
import re
|
|
|
3 |
import pandas as pd
|
4 |
import evaluate
|
5 |
import seaborn as sns
|
@@ -482,3 +483,82 @@ def eval_openai(num_shots, datasets, model="gpt-4o-mini", max_new_tokens=300):
|
|
482 |
predictions.append(output)
|
483 |
|
484 |
return predictions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import re
|
3 |
+
import glob
|
4 |
import pandas as pd
|
5 |
import evaluate
|
6 |
import seaborn as sns
|
|
|
483 |
predictions.append(output)
|
484 |
|
485 |
return predictions
|
486 |
+
|
487 |
+
|
488 |
+
def convert_time_to_minutes(time_str):
|
489 |
+
# print(f"converting time_str: {time_str}")
|
490 |
+
# Split the time string into its components
|
491 |
+
time_parts = list(map(int, time_str.split(":")))
|
492 |
+
|
493 |
+
# Initialize total minutes
|
494 |
+
total_minutes = 0
|
495 |
+
|
496 |
+
# Calculate total minutes based on the number of parts
|
497 |
+
if len(time_parts) == 3: # HH:MM:SS
|
498 |
+
hours, minutes, seconds = time_parts
|
499 |
+
total_minutes = hours * 60 + minutes + seconds / 60
|
500 |
+
elif len(time_parts) == 2: # MM:SS
|
501 |
+
minutes, seconds = time_parts
|
502 |
+
total_minutes = minutes + seconds / 60
|
503 |
+
elif len(time_parts) == 1: # SS
|
504 |
+
seconds = time_parts[0]
|
505 |
+
total_minutes = seconds / 60
|
506 |
+
|
507 |
+
return total_minutes
|
508 |
+
|
509 |
+
|
510 |
+
time_pattern = re.compile(r"\[(.{5,10})<00:00")
|
511 |
+
metrics_pattern = re.compile(r"(.*)/shots-(.*) metrics:")
|
512 |
+
|
513 |
+
|
514 |
+
def process_log_file(log_file):
|
515 |
+
model = []
|
516 |
+
shots = []
|
517 |
+
eval_time = []
|
518 |
+
|
519 |
+
with open(log_file, "r") as f:
|
520 |
+
try:
|
521 |
+
for line in f:
|
522 |
+
matches = time_pattern.search(line)
|
523 |
+
if matches:
|
524 |
+
time_pattern_matches = matches
|
525 |
+
else:
|
526 |
+
matches = metrics_pattern.search(line)
|
527 |
+
if matches:
|
528 |
+
metrics_pattern_matches = matches
|
529 |
+
groups = metrics_pattern_matches.groups()
|
530 |
+
|
531 |
+
model.append(groups[0])
|
532 |
+
shots.append(groups[1])
|
533 |
+
|
534 |
+
groups = time_pattern_matches.groups()
|
535 |
+
time_str = groups[0]
|
536 |
+
eval_time.append(convert_time_to_minutes(time_str))
|
537 |
+
except Exception as e:
|
538 |
+
print(f"Error processing log file: {log_file}")
|
539 |
+
print(e)
|
540 |
+
|
541 |
+
df = pd.DataFrame(
|
542 |
+
{
|
543 |
+
"model": model,
|
544 |
+
"shots": shots,
|
545 |
+
"eval_time": eval_time,
|
546 |
+
}
|
547 |
+
)
|
548 |
+
return df
|
549 |
+
|
550 |
+
|
551 |
+
def load_eval_times(logs_folder):
|
552 |
+
# Get a list of all files in the logs folder
|
553 |
+
log_files = glob.glob(os.path.join(logs_folder, "*"))
|
554 |
+
log_files.sort()
|
555 |
+
|
556 |
+
time_df = pd.DataFrame({"model": [], "shots": [], "eval_time": []})
|
557 |
+
|
558 |
+
for log_file in log_files:
|
559 |
+
print(f"Loading content of {log_file}")
|
560 |
+
df = process_log_file(log_file)
|
561 |
+
time_df = pd.concat([time_df, df], ignore_index=True)
|
562 |
+
|
563 |
+
time_df["shots"] = time_df["shots"].apply(lambda x: int(x))
|
564 |
+
return time_df
|
logs/l40-1gpu.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
logs/l40-4gpu.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
logs/openai-gpt-4o-mini.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
logs/openai-gpt-4o.txt
CHANGED
@@ -1,74 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
|
5 |
-
the same time. Both libraries are known to be incompatible and this
|
6 |
-
can cause random crashes or deadlocks on Linux when loaded in the
|
7 |
-
same Python program.
|
8 |
-
Using threadpoolctl may cause crashes or deadlocks. For more
|
9 |
-
information and possible workarounds, please see
|
10 |
-
https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md
|
11 |
-
|
12 |
-
warnings.warn(msg, RuntimeWarning)
|
13 |
-
[nltk_data] Downloading package wordnet to
|
14 |
-
[nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
|
15 |
-
[nltk_data] Package wordnet is already up-to-date!
|
16 |
-
[nltk_data] Downloading package punkt to
|
17 |
-
[nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
|
18 |
-
[nltk_data] Package punkt is already up-to-date!
|
19 |
-
[nltk_data] Downloading package omw-1.4 to
|
20 |
-
[nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
|
21 |
-
[nltk_data] Package omw-1.4 is already up-to-date!
|
22 |
-
loading: D:\code\projects\rapget-translation\eval_modules\calc_repetitions.py
|
23 |
-
loading D:\code\projects\rapget-translation\llm_toolkit\translation_utils.py
|
24 |
-
[nltk_data] Downloading package wordnet to
|
25 |
-
[nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
|
26 |
-
[nltk_data] Package wordnet is already up-to-date!
|
27 |
-
[nltk_data] Downloading package punkt to
|
28 |
-
[nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
|
29 |
-
[nltk_data] Package punkt is already up-to-date!
|
30 |
-
[nltk_data] Downloading package omw-1.4 to
|
31 |
-
[nltk_data] C:\Users\dongh\AppData\Roaming\nltk_data...
|
32 |
-
[nltk_data] Package omw-1.4 is already up-to-date!
|
33 |
-
gpt-4o datasets/mac/mac.tsv results/mac-results_few_shots_openai.csv 300
|
34 |
-
Evaluating model: gpt-4o
|
35 |
-
loading train/test data files
|
36 |
-
DatasetDict({
|
37 |
-
train: Dataset({
|
38 |
-
features: ['chinese', 'english'],
|
39 |
-
num_rows: 4528
|
40 |
-
})
|
41 |
-
test: Dataset({
|
42 |
-
test: Dataset({
|
43 |
-
features: ['chinese', 'english'],
|
44 |
-
num_rows: 1133
|
45 |
-
})
|
46 |
-
})
|
47 |
-
--------------------------------------------------
|
48 |
-
chinese: 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
|
49 |
-
chinese: 老耿端起枪,眯缝起一只三角眼,一搂扳机响了枪,冰雹般的金麻雀劈哩啪啦往下落,铁砂子在柳枝间飞迸着,嚓嚓有声。
|
50 |
-
--------------------------------------------------
|
51 |
-
english: Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.
|
52 |
-
*** Evaluating with num_shots: 0
|
53 |
-
*** Evaluating with num_shots: 0
|
54 |
-
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [28:52<00:00, 1.53s/it]
|
55 |
-
gpt-4o/shots-00 metrics: {'meteor': 0.3797419877414444, 'bleu_scores': {'bleu': 0.12054600115274576, 'precisions': [0.4395170970950372, 0.1657507850413931, 0.08008175399479747, 0.041705426356589144], 'brevity_penalty': 0.965191371371961, 'length_ratio': 0.965783371977476, 'translation_length': 29157, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42488525198918325, 'rouge2': 0.17659595999851255, 'rougeL': 0.37036814222422193, 'rougeLsum': 0.37043557409027883}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
|
56 |
-
*** Evaluating with num_shots: 1
|
57 |
-
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [22:44<00:00, 1.20s/it]
|
58 |
-
gpt-4o/shots-01 metrics: {'meteor': 0.37588586538591867, 'bleu_scores': {'bleu': 0.12049862468096047, 'precisions': [0.4438186524872315, 0.16850617418861327, 0.08162258566387129, 0.043228692450813504], 'brevity_penalty': 0.9454338245859127, 'length_ratio': 0.9468698244451805, 'translation_length': 28586, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4200247346821462, 'rouge2': 0.17611482166851536, 'rougeL': 0.36555347015620193, 'rougeLsum': 0.36597227925335113}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
|
59 |
-
*** Evaluating with num_shots: 3
|
60 |
-
100%|���████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [38:45<00:00, 2.05s/it]
|
61 |
-
gpt-4o/shots-03 metrics: {'meteor': 0.3768512103553621, 'bleu_scores': {'bleu': 0.12408746322526747, 'precisions': [0.4504073680481757, 0.17455806915894748, 0.08641500730375952, 0.04606687515034881], 'brevity_penalty': 0.9329257300005195, 'length_ratio': 0.9350778403444849, 'translation_length': 28230, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42185440095437376, 'rouge2': 0.18099296897772787, 'rougeL': 0.36683121325656565572, 'rougeLsum': 0.36692420445626067}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
|
62 |
-
*** Evaluating with num_shots: 5
|
63 |
-
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [31:48<00:00, 1.68s/it]
|
64 |
-
gpt-4o/shots-05 metrics: {'meteor': 0.35772544915145654, 'bleu_scores': {'bleu': 0.12169683347842021, 'precisions': [0.45675271230826786, 0.1799429620658671, 0.0908092273892347, 0.04932145886344359], 'brevity_penalty': 0.8785850406914042, 'length_ratio': 0.8853925140775091, 'translation_length': 26730, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3989536343087876, 'rouge2': 0.17450105082463535, 'rougeL': 0.348320055666115, 'rougeLsum': 0.3483328999510906}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
|
65 |
-
*** Evaluating with num_shots: 10
|
66 |
-
'rougeLsum': 0.3483328999510906}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
|
67 |
-
*** Evaluating with num_shots: 10
|
68 |
-
'rougeLsum': 0.3483328999510906}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
|
69 |
-
*** Evaluating with num_shots: 10
|
70 |
-
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [33:48<00:00, 1.79s/it]
|
71 |
-
gpt-4o/shots-10 metrics: {'meteor': 0.3746444651189953, 'bleu_scores': {'bleu': 0.12498238983123719, 'precisions': [0.45538813929351135, 0.17677558937630558, 0.08810041971086585, 0.04747233145498034], 'brevity_penalty': 0.9226631755170949, 'length_ratio': 0.9255051341503809, 'translation_length': 27941, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42057276805902843, 'rouge2': 0.182701868068981, 'rougeL': 0.3668754130715727, 'rougeLsum': 0.3673183260659394}, 'accuracy': 0.00176522506619594, 'correct_ids': [77, 364]}
|
72 |
-
*** Evaluating with num_shots: 50
|
73 |
-
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [38:15<00:00, 2.03s/it]
|
74 |
-
gpt-4o/shots-50 metrics: {'meteor': 0.40413933252744955, 'bleu_scores': {'bleu': 0.13782450337569063, 'precisions': [0.4695234708392603, 0.19261125727201986, 0.09873251410464487, 0.05424823410696267], 'brevity_penalty': 0.9290310787259491, 'length_ratio': 0.9314342497515734, 'translation_length': 28120, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.44343703034704307, 'rouge2': 0.20310004059554654, 'rougeL': 0.3908878454222482, 'rougeLsum': 0.39082492657743595}, 'accuracy': 0.00353045013239188, 'correct_ids': [77, 364, 567, 1000]}
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad80c611cc68e4bca4120d818e49a283694a72c8b1dc71abe908ec8dd68e4497
|
3 |
+
size 9795
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
notebooks/00b_Data Analysis_Few_Shots.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c9785edd77e41276edd7d949b04796bb3a2b98fbf4efc56049f074ef0c74e39
|
3 |
+
size 645130
|
results/mac-results_few_shots_metrics.csv
CHANGED
@@ -1,23 +1,29 @@
|
|
1 |
-
model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
Qwen/Qwen2-
|
8 |
-
Qwen/Qwen2-
|
9 |
-
Qwen/Qwen2-
|
10 |
-
Qwen/Qwen2-
|
11 |
-
Qwen/Qwen2-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
gpt-4o,0,0.
|
19 |
-
gpt-4o,1,0.
|
20 |
-
gpt-4o,3,0.
|
21 |
-
gpt-4o,5,0.
|
22 |
-
gpt-4o,10,0.
|
23 |
-
gpt-4o,50,0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens,eval_time
|
2 |
+
Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.3850228285100058,0.0,0.19593998234774934,0.19593998234774934,0.3970180421898014,1,167.96666666666667
|
3 |
+
Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.39473841744219135,0.0,0.15798764342453664,0.15798764342453664,0.4041216347207881,1,169.63333333333333
|
4 |
+
Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.3982274035420223,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0,220.13333333333333
|
5 |
+
Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.4038550675754348,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0,324.18333333333334
|
6 |
+
Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.40675496346798323,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0,561.3666666666667
|
7 |
+
Qwen/Qwen2-7B-Instruct,0,0.377477070949433,0.11783492823424507,0.36803838669677474,0.0,0.07149161518093557,0.07149161518093557,0.3763128359886437,0,18.516666666666666
|
8 |
+
Qwen/Qwen2-7B-Instruct,1,0.38000752971097884,0.11731917392837622,0.3717522235071834,0.0,0.07413945278022947,0.07413945278022947,0.37879237953430883,0,19.883333333333333
|
9 |
+
Qwen/Qwen2-7B-Instruct,3,0.38678180999660744,0.12368875746156333,0.37795982050404114,0.0,0.1412180052956752,0.1412180052956752,0.38444052153933106,0,30.233333333333334
|
10 |
+
Qwen/Qwen2-7B-Instruct,5,0.38784856371389564,0.1227725469820483,0.38271501720823375,0.0,0.09179170344218888,0.09179170344218888,0.38631555618548774,0,43.233333333333334
|
11 |
+
Qwen/Qwen2-7B-Instruct,10,0.38526484346757095,0.12535252418966952,0.381921964717894,0.0,0.10326566637246248,0.10326566637246248,0.3835535147682633,0,75.65
|
12 |
+
internlm/internlm2_5-7b-chat,0,0.36816799960793073,0.11360521358693174,0.3604145848797059,0.0,0.2144748455428067,0.2144748455428067,0.3648059323539847,0,23.116666666666667
|
13 |
+
internlm/internlm2_5-7b-chat,1,0.3719587471180722,0.1157707566176535,0.3640593681230251,0.0,0.14033539276257723,0.14033539276257723,0.36972107700643503,0,24.783333333333335
|
14 |
+
internlm/internlm2_5-7b-chat,3,0.3747105229822289,0.1154826016668525,0.3690935886519632,0.0,0.17740511915269197,0.17740511915269197,0.37187052462735126,0,35.083333333333336
|
15 |
+
internlm/internlm2_5-7b-chat,5,0.37285562384505977,0.11541534709366409,0.36890113755410703,0.0,0.14827890556045895,0.14827890556045895,0.37048732274065205,0,54.016666666666666
|
16 |
+
internlm/internlm2_5-7b-chat,10,0.3750895095392996,0.11696492920010637,0.3680889860873816,0.0,0.13062665489849956,0.13062665489849956,0.37298723763770353,0,108.06666666666666
|
17 |
+
internlm/internlm2_5-7b-chat,50,0.37213069871716603,0.11404688073207249,0.3626467635078598,0.0,0.16857899382171226,0.16857899382171226,0.3694484047441432,8,798.6166666666667
|
18 |
+
gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.3705758962787884,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0,30.1
|
19 |
+
gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.36747964103348624,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0,29.583333333333332
|
20 |
+
gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.3682804376759824,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0,24.3
|
21 |
+
gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.34720271618869103,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0,22.733333333333334
|
22 |
+
gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.3659023214917385,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0,22.433333333333334
|
23 |
+
gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.3916652577211933,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0,24.35
|
24 |
+
gpt-4o,0,0.3797419877414444,0.12054600115274576,0.3704105629830775,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0,28.866666666666667
|
25 |
+
gpt-4o,1,0.37588586538591867,0.12049862468096047,0.3658901615314057,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0,22.733333333333334
|
26 |
+
gpt-4o,3,0.3768512103553621,0.12408746322526747,0.3668998399542892,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0,38.75
|
27 |
+
gpt-4o,5,0.35772544915145654,0.12169683347842021,0.3483810267695463,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0,31.8
|
28 |
+
gpt-4o,10,0.3746444651189953,0.12498238983123719,0.36712547974906085,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0,33.8
|
29 |
+
gpt-4o,50,0.40413933252744955,0.13782450337569063,0.3909558691963182,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0,38.25
|