machine-translation

Build error

App Files Files Community

dh-mc commited on Aug 6, 2024

Commit

95ba169

1 Parent(s): 007b986

translation time analyzed

Browse files

Files changed (8) hide show

.gitattributes +4 -0
llm_toolkit/translation_utils.py +80 -0
logs/l40-1gpu.txt +0 -0
logs/l40-4gpu.txt +0 -0
logs/openai-gpt-4o-mini.txt +0 -0
logs/openai-gpt-4o.txt +3 -74
notebooks/00b_Data Analysis_Few_Shots.ipynb +2 -2
results/mac-results_few_shots_metrics.csv +29 -23

.gitattributes CHANGED Viewed

@@ -59,3 +59,7 @@ notebooks/00a_Data[[:space:]]Analysis_greedy_decoding.ipynb filter=lfs diff=lfs
 notebooks/00b_Data[[:space:]]Analysis_Few_Shots.ipynb filter=lfs diff=lfs merge=lfs -text
 notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
 notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text

 notebooks/00b_Data[[:space:]]Analysis_Few_Shots.ipynb filter=lfs diff=lfs merge=lfs -text
 notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
 notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
+logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
+logs/l40-4gpu.txt filter=lfs diff=lfs merge=lfs -text
+logs/openai-gpt-4o-mini.txt filter=lfs diff=lfs merge=lfs -text
+logs/openai-gpt-4o.txt filter=lfs diff=lfs merge=lfs -text

llm_toolkit/translation_utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import re
 import pandas as pd
 import evaluate
 import seaborn as sns
@@ -482,3 +483,82 @@ def eval_openai(num_shots, datasets, model="gpt-4o-mini", max_new_tokens=300):
         predictions.append(output)
     return predictions

 import os
 import re
+import glob
 import pandas as pd
 import evaluate
 import seaborn as sns
         predictions.append(output)
     return predictions
+def convert_time_to_minutes(time_str):
+    # print(f"converting time_str: {time_str}")
+    # Split the time string into its components
+    time_parts = list(map(int, time_str.split(":")))
+    # Initialize total minutes
+    total_minutes = 0
+    # Calculate total minutes based on the number of parts
+    if len(time_parts) == 3:  # HH:MM:SS
+        hours, minutes, seconds = time_parts
+        total_minutes = hours * 60 + minutes + seconds / 60
+    elif len(time_parts) == 2:  # MM:SS
+        minutes, seconds = time_parts
+        total_minutes = minutes + seconds / 60
+    elif len(time_parts) == 1:  # SS
+        seconds = time_parts[0]
+        total_minutes = seconds / 60
+    return total_minutes
+time_pattern = re.compile(r"\[(.{5,10})<00:00")
+metrics_pattern = re.compile(r"(.*)/shots-(.*) metrics:")
+def process_log_file(log_file):
+    model = []
+    shots = []
+    eval_time = []
+    with open(log_file, "r") as f:
+        try:
+            for line in f:
+                matches = time_pattern.search(line)
+                if matches:
+                    time_pattern_matches = matches
+                else:
+                    matches = metrics_pattern.search(line)
+                    if matches:
+                        metrics_pattern_matches = matches
+                        groups = metrics_pattern_matches.groups()
+                        model.append(groups[0])
+                        shots.append(groups[1])
+                        groups = time_pattern_matches.groups()
+                        time_str = groups[0]
+                        eval_time.append(convert_time_to_minutes(time_str))
+        except Exception as e:
+            print(f"Error processing log file: {log_file}")
+            print(e)
+    df = pd.DataFrame(
+        {
+            "model": model,
+            "shots": shots,
+            "eval_time": eval_time,
+        }
+    )
+    return df
+def load_eval_times(logs_folder):
+    # Get a list of all files in the logs folder
+    log_files = glob.glob(os.path.join(logs_folder, "*"))
+    log_files.sort()
+    time_df = pd.DataFrame({"model": [], "shots": [], "eval_time": []})
+    for log_file in log_files:
+        print(f"Loading content of {log_file}")
+        df = process_log_file(log_file)
+        time_df = pd.concat([time_df, df], ignore_index=True)
+    time_df["shots"] = time_df["shots"].apply(lambda x: int(x))
+    return time_df

logs/l40-1gpu.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

logs/l40-4gpu.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

logs/openai-gpt-4o-mini.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

logs/openai-gpt-4o.txt CHANGED Viewed

@@ -1,74 +1,3 @@
-loading env vars from: D:\code\projects\rapget-translation\.env
-Adding D:\code\projects\rapget-translation to sys.path
-C:\Users\dongh\.conda\envs\rapget\Lib\site-packages\threadpoolctl.py:1214: RuntimeWarning:
-Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
-the same time. Both libraries are known to be incompatible and this
-can cause random crashes or deadlocks on Linux when loaded in the
-same Python program.
-Using threadpoolctl may cause crashes or deadlocks. For more
-information and possible workarounds, please see
-    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md
-  warnings.warn(msg, RuntimeWarning)
-[nltk_data] Downloading package wordnet to
-[nltk_data]     C:\Users\dongh\AppData\Roaming\nltk_data...
-[nltk_data]   Package wordnet is already up-to-date!
-[nltk_data] Downloading package punkt to
-[nltk_data]     C:\Users\dongh\AppData\Roaming\nltk_data...
-[nltk_data]   Package punkt is already up-to-date!
-[nltk_data] Downloading package omw-1.4 to
-[nltk_data]     C:\Users\dongh\AppData\Roaming\nltk_data...
-[nltk_data]   Package omw-1.4 is already up-to-date!
-loading: D:\code\projects\rapget-translation\eval_modules\calc_repetitions.py
-loading D:\code\projects\rapget-translation\llm_toolkit\translation_utils.py
-[nltk_data] Downloading package wordnet to
-[nltk_data]     C:\Users\dongh\AppData\Roaming\nltk_data...
-[nltk_data]   Package wordnet is already up-to-date!
-[nltk_data] Downloading package punkt to
-[nltk_data]     C:\Users\dongh\AppData\Roaming\nltk_data...
-[nltk_data]   Package punkt is already up-to-date!
-[nltk_data] Downloading package omw-1.4 to
-[nltk_data]     C:\Users\dongh\AppData\Roaming\nltk_data...
-[nltk_data]   Package omw-1.4 is already up-to-date!
-gpt-4o datasets/mac/mac.tsv results/mac-results_few_shots_openai.csv 300
-Evaluating model: gpt-4o
-loading train/test data files
-DatasetDict({
-    train: Dataset({
-        features: ['chinese', 'english'],
-        num_rows: 4528
-    })
-    test: Dataset({
-    test: Dataset({
-        features: ['chinese', 'english'],
-        num_rows: 1133
-    })
-})
---------------------------------------------------
-chinese: 老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞迸着，嚓嚓有声。
-chinese: 老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞迸着，嚓嚓有声。
---------------------------------------------------
-english: Old Geng picked up his shotgun, squinted, and pulled the trigger. Two sparrows crashed to the ground like hailstones as shotgun pellets tore noisily through the branches.
-*** Evaluating with num_shots: 0
-*** Evaluating with num_shots: 0
-100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [28:52<00:00,  1.53s/it]
-gpt-4o/shots-00 metrics: {'meteor': 0.3797419877414444, 'bleu_scores': {'bleu': 0.12054600115274576, 'precisions': [0.4395170970950372, 0.1657507850413931, 0.08008175399479747, 0.041705426356589144], 'brevity_penalty': 0.965191371371961, 'length_ratio': 0.965783371977476, 'translation_length': 29157, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42488525198918325, 'rouge2': 0.17659595999851255, 'rougeL': 0.37036814222422193, 'rougeLsum': 0.37043557409027883}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
-*** Evaluating with num_shots: 1
-100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [22:44<00:00,  1.20s/it]
-gpt-4o/shots-01 metrics: {'meteor': 0.37588586538591867, 'bleu_scores': {'bleu': 0.12049862468096047, 'precisions': [0.4438186524872315, 0.16850617418861327, 0.08162258566387129, 0.043228692450813504], 'brevity_penalty': 0.9454338245859127, 'length_ratio': 0.9468698244451805, 'translation_length': 28586, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4200247346821462, 'rouge2': 0.17611482166851536, 'rougeL': 0.36555347015620193, 'rougeLsum': 0.36597227925335113}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
-*** Evaluating with num_shots: 3
-100%|���████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [38:45<00:00,  2.05s/it]
-gpt-4o/shots-03 metrics: {'meteor': 0.3768512103553621, 'bleu_scores': {'bleu': 0.12408746322526747, 'precisions': [0.4504073680481757, 0.17455806915894748, 0.08641500730375952, 0.04606687515034881], 'brevity_penalty': 0.9329257300005195, 'length_ratio': 0.9350778403444849, 'translation_length': 28230, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42185440095437376, 'rouge2': 0.18099296897772787, 'rougeL': 0.36683121325656565572, 'rougeLsum': 0.36692420445626067}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
-*** Evaluating with num_shots: 5
-100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [31:48<00:00,  1.68s/it]
-gpt-4o/shots-05 metrics: {'meteor': 0.35772544915145654, 'bleu_scores': {'bleu': 0.12169683347842021, 'precisions': [0.45675271230826786, 0.1799429620658671, 0.0908092273892347, 0.04932145886344359], 'brevity_penalty': 0.8785850406914042, 'length_ratio': 0.8853925140775091, 'translation_length': 26730, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3989536343087876, 'rouge2': 0.17450105082463535, 'rougeL': 0.348320055666115, 'rougeLsum': 0.3483328999510906}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
-*** Evaluating with num_shots: 10
- 'rougeLsum': 0.3483328999510906}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
-*** Evaluating with num_shots: 10
- 'rougeLsum': 0.3483328999510906}, 'accuracy': 0.00088261253309797, 'correct_ids': [77]}
-*** Evaluating with num_shots: 10
-100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [33:48<00:00,  1.79s/it]
-gpt-4o/shots-10 metrics: {'meteor': 0.3746444651189953, 'bleu_scores': {'bleu': 0.12498238983123719, 'precisions': [0.45538813929351135, 0.17677558937630558, 0.08810041971086585, 0.04747233145498034], 'brevity_penalty': 0.9226631755170949, 'length_ratio': 0.9255051341503809, 'translation_length': 27941, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42057276805902843, 'rouge2': 0.182701868068981, 'rougeL': 0.3668754130715727, 'rougeLsum': 0.3673183260659394}, 'accuracy': 0.00176522506619594, 'correct_ids': [77, 364]}
-*** Evaluating with num_shots: 50
-100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1133/1133 [38:15<00:00,  2.03s/it]
-gpt-4o/shots-50 metrics: {'meteor': 0.40413933252744955, 'bleu_scores': {'bleu': 0.13782450337569063, 'precisions': [0.4695234708392603, 0.19261125727201986, 0.09873251410464487, 0.05424823410696267], 'brevity_penalty': 0.9290310787259491, 'length_ratio': 0.9314342497515734, 'translation_length': 28120, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.44343703034704307, 'rouge2': 0.20310004059554654, 'rougeL': 0.3908878454222482, 'rougeLsum': 0.39082492657743595}, 'accuracy': 0.00353045013239188, 'correct_ids': [77, 364, 567, 1000]}

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad80c611cc68e4bca4120d818e49a283694a72c8b1dc71abe908ec8dd68e4497
+size 9795

notebooks/00b_Data Analysis_Few_Shots.ipynb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4062aa5271a5e14210e73d9ce344cb49dcbb126429fa370c68f8e38725840121
-size 593498

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c9785edd77e41276edd7d949b04796bb3a2b98fbf4efc56049f074ef0c74e39
+size 645130

results/mac-results_few_shots_metrics.csv CHANGED Viewed

@@ -1,23 +1,29 @@
-model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens
-01-ai/Yi-1.5-9B-Chat,0,0.2624042529095214,0.052402107437040435,0.22702109917009206,0.0088261253309797,1.593115622241836,1.6019417475728155,0.24649759532229093,18
-01-ai/Yi-1.5-9B-Chat,1,0.34870107586750904,0.08089424511255362,0.32734221074629044,0.0,0.41394527802294795,0.41394527802294795,0.3426649332614599,17
-01-ai/Yi-1.5-9B-Chat,3,0.32640977691198636,0.055279846527263934,0.2928978370489262,0.0,0.8570167696381289,0.8570167696381289,0.3151554166830832,41
-01-ai/Yi-1.5-9B-Chat,5,0.34766805202103457,0.08282971728232061,0.3267409773412665,0.0,0.1703442188879082,0.1703442188879082,0.3451362525721807,12
-01-ai/Yi-1.5-9B-Chat,10,0.3404245874451134,0.0874799371333584,0.3186285587310857,0.0,0.33451015004413065,0.33451015004413065,0.335628491165567,9
-Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.3843308919636922,0.0,0.19593998234774934,0.19593998234774934,0.3970180421898014,1
-Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.39419477888585397,0.0,0.15798764342453664,0.15798764342453664,0.4041216347207881,1
-Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.3975872454980886,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0
-Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.40319922813685904,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0
-Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.4061550950232767,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0
-gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.3703414668036082,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0
-gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.3672849018610451,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0
-gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.3678727405759652,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0
-gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.3467666649149247,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0
-gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.3655393297085069,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0
-gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.39119808964775155,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0
-gpt-4o,0,0.3797419877414444,0.12054600115274576,0.3701547457064372,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0
-gpt-4o,1,0.37588586538591867,0.12049862468096047,0.3655088353382996,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0
-gpt-4o,3,0.3768512103553621,0.12408746322526747,0.36675999670221837,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0
-gpt-4o,5,0.35772544915145654,0.12169683347842021,0.348000637544411,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0
-gpt-4o,10,0.3746444651189953,0.12498238983123719,0.36675868342577317,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0
-gpt-4o,50,0.40413933252744955,0.13782450337569063,0.39068912530823663,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0

+model,shots,meteor,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rap,num_max_output_tokens,eval_time
+Qwen/Qwen2-72B-Instruct,0,0.4003638205699929,0.12223832517678616,0.3850228285100058,0.0,0.19593998234774934,0.19593998234774934,0.3970180421898014,1,167.96666666666667
+Qwen/Qwen2-72B-Instruct,1,0.4068727655718769,0.13151008586303575,0.39473841744219135,0.0,0.15798764342453664,0.15798764342453664,0.4041216347207881,1,169.63333333333333
+Qwen/Qwen2-72B-Instruct,3,0.4086244766794449,0.13771788946915253,0.3982274035420223,0.0,0.12709620476610767,0.12709620476610767,0.4063954239173824,0,220.13333333333333
+Qwen/Qwen2-72B-Instruct,5,0.4132330811975005,0.1439773872150899,0.4038550675754348,0.0,0.11915269196822595,0.11915269196822595,0.41111822769434864,0,324.18333333333334
+Qwen/Qwen2-72B-Instruct,10,0.41598174489789025,0.14493475334416772,0.40675496346798323,0.0,0.09620476610767872,0.09620476610767872,0.4142591929807702,0,561.3666666666667
+Qwen/Qwen2-7B-Instruct,0,0.377477070949433,0.11783492823424507,0.36803838669677474,0.0,0.07149161518093557,0.07149161518093557,0.3763128359886437,0,18.516666666666666
+Qwen/Qwen2-7B-Instruct,1,0.38000752971097884,0.11731917392837622,0.3717522235071834,0.0,0.07413945278022947,0.07413945278022947,0.37879237953430883,0,19.883333333333333
+Qwen/Qwen2-7B-Instruct,3,0.38678180999660744,0.12368875746156333,0.37795982050404114,0.0,0.1412180052956752,0.1412180052956752,0.38444052153933106,0,30.233333333333334
+Qwen/Qwen2-7B-Instruct,5,0.38784856371389564,0.1227725469820483,0.38271501720823375,0.0,0.09179170344218888,0.09179170344218888,0.38631555618548774,0,43.233333333333334
+Qwen/Qwen2-7B-Instruct,10,0.38526484346757095,0.12535252418966952,0.381921964717894,0.0,0.10326566637246248,0.10326566637246248,0.3835535147682633,0,75.65
+internlm/internlm2_5-7b-chat,0,0.36816799960793073,0.11360521358693174,0.3604145848797059,0.0,0.2144748455428067,0.2144748455428067,0.3648059323539847,0,23.116666666666667
+internlm/internlm2_5-7b-chat,1,0.3719587471180722,0.1157707566176535,0.3640593681230251,0.0,0.14033539276257723,0.14033539276257723,0.36972107700643503,0,24.783333333333335
+internlm/internlm2_5-7b-chat,3,0.3747105229822289,0.1154826016668525,0.3690935886519632,0.0,0.17740511915269197,0.17740511915269197,0.37187052462735126,0,35.083333333333336
+internlm/internlm2_5-7b-chat,5,0.37285562384505977,0.11541534709366409,0.36890113755410703,0.0,0.14827890556045895,0.14827890556045895,0.37048732274065205,0,54.016666666666666
+internlm/internlm2_5-7b-chat,10,0.3750895095392996,0.11696492920010637,0.3680889860873816,0.0,0.13062665489849956,0.13062665489849956,0.37298723763770353,0,108.06666666666666
+internlm/internlm2_5-7b-chat,50,0.37213069871716603,0.11404688073207249,0.3626467635078598,0.0,0.16857899382171226,0.16857899382171226,0.3694484047441432,8,798.6166666666667
+gpt-4o-mini,0,0.3797696357415517,0.1208238389018596,0.3705758962787884,0.0,0.09532215357458076,0.09532215357458076,0.37821133607113916,0,30.1
+gpt-4o-mini,1,0.37721414424357197,0.12013402254992751,0.36747964103348624,0.0,0.09179170344218888,0.09179170344218888,0.37572317024740703,0,29.583333333333332
+gpt-4o-mini,3,0.3772985230936086,0.12400311006855895,0.3682804376759824,0.0,0.09179170344218888,0.09179170344218888,0.3758072155821894,0,24.3
+gpt-4o-mini,5,0.35541821046691263,0.1202464326274801,0.34720271618869103,0.0,0.05030891438658429,0.05030891438658429,0.3546452926906339,0,22.733333333333334
+gpt-4o-mini,10,0.37335968903521094,0.1257600824824953,0.3659023214917385,0.0,0.0706090026478376,0.0706090026478376,0.37222227656264567,0,22.433333333333334
+gpt-4o-mini,50,0.4044690970661121,0.13972883920222515,0.3916652577211933,0.0,0.08473080317740513,0.08473080317740513,0.4029924080114739,0,24.35
+gpt-4o,0,0.3797419877414444,0.12054600115274576,0.3704105629830775,0.0,0.09532215357458076,0.09532215357458076,0.37818380151840997,0,28.866666666666667
+gpt-4o,1,0.37588586538591867,0.12049862468096047,0.3658901615314057,0.0,0.09179170344218888,0.09179170344218888,0.3744001415355042,0,22.733333333333334
+gpt-4o,3,0.3768512103553621,0.12408746322526747,0.3668998399542892,0.0,0.09355692850838482,0.09355692850838482,0.3753332737090981,0,38.75
+gpt-4o,5,0.35772544915145654,0.12169683347842021,0.3483810267695463,0.0,0.0353045013239188,0.0353045013239188,0.3571787674657609,0,31.8
+gpt-4o,10,0.3746444651189953,0.12498238983123719,0.36712547974906085,0.0,0.0706090026478376,0.0706090026478376,0.37350313867182305,0,33.8
+gpt-4o,50,0.40413933252744955,0.13782450337569063,0.3909558691963182,0.0,0.07590467784642542,0.07590467784642542,0.402816463024093,0,38.25