Spaces:
Build error
Build error
diable lfs for 00b notebook
Browse files- .gitattributes +0 -1
- llm_toolkit/translation_utils.py +35 -10
- notebooks/00b_Data Analysis_Few_Shots.ipynb +0 -0
- requirements.txt +1 -0
.gitattributes
CHANGED
@@ -56,7 +56,6 @@ results/mac-results_few_shots.csv filter=lfs diff=lfs merge=lfs -text
|
|
56 |
results/mac-results_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
57 |
notebooks/00_Data[[:space:]]Analysis.ipynb filter=lfs diff=lfs merge=lfs -text
|
58 |
notebooks/00a_Data[[:space:]]Analysis_greedy_decoding.ipynb filter=lfs diff=lfs merge=lfs -text
|
59 |
-
notebooks/00b_Data[[:space:]]Analysis_Few_Shots.ipynb filter=lfs diff=lfs merge=lfs -text
|
60 |
notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
61 |
notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
62 |
logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
|
|
|
56 |
results/mac-results_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
57 |
notebooks/00_Data[[:space:]]Analysis.ipynb filter=lfs diff=lfs merge=lfs -text
|
58 |
notebooks/00a_Data[[:space:]]Analysis_greedy_decoding.ipynb filter=lfs diff=lfs merge=lfs -text
|
|
|
59 |
notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
60 |
notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
|
61 |
logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
|
llm_toolkit/translation_utils.py
CHANGED
@@ -19,6 +19,7 @@ rouge = evaluate.load("rouge")
|
|
19 |
meteor = evaluate.load("meteor")
|
20 |
accuracy = evaluate.load("accuracy")
|
21 |
sacrebleu = evaluate.load("sacrebleu")
|
|
|
22 |
|
23 |
|
24 |
def extract_answer(text, debug=False):
|
@@ -43,7 +44,7 @@ def extract_answer(text, debug=False):
|
|
43 |
return text
|
44 |
|
45 |
|
46 |
-
def calc_metrics(references, predictions, debug=False):
|
47 |
assert len(references) == len(
|
48 |
predictions
|
49 |
), f"lengths are difference: {len(references)} != {len(predictions)}"
|
@@ -51,6 +52,10 @@ def calc_metrics(references, predictions, debug=False):
|
|
51 |
predictions = [extract_answer(text) for text in predictions]
|
52 |
results = {}
|
53 |
|
|
|
|
|
|
|
|
|
54 |
results["meteor"] = meteor.compute(predictions=predictions, references=references)[
|
55 |
"meteor"
|
56 |
]
|
@@ -233,7 +238,18 @@ def detect_repetition_scores(row, col, debug=False):
|
|
233 |
)
|
234 |
|
235 |
|
236 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
chinese_char_pattern = re.compile(r"[\u4e00-\u9fff]")
|
238 |
return 1 if chinese_char_pattern.search(text) else 0
|
239 |
|
@@ -264,7 +280,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
264 |
repetition_score = []
|
265 |
total_repetitions = []
|
266 |
num_max_output_tokens = []
|
267 |
-
|
268 |
columns = df.columns[2:]
|
269 |
|
270 |
df[
|
@@ -275,8 +291,13 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
275 |
]
|
276 |
] = df["english"].apply(detect_scores)
|
277 |
|
|
|
|
|
|
|
278 |
for col in columns:
|
279 |
-
metrics = calc_metrics(
|
|
|
|
|
280 |
print(f"{col}: {metrics}")
|
281 |
|
282 |
meteor.append(metrics["meteor"])
|
@@ -298,9 +319,11 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
298 |
lambda x: len(tokenizers[model](x)["input_ids"])
|
299 |
)
|
300 |
|
301 |
-
new_col = f"
|
302 |
-
df[new_col] = df[col].apply(
|
303 |
-
|
|
|
|
|
304 |
|
305 |
new_col = f"output_tokens-{col}"
|
306 |
df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
|
@@ -320,7 +343,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
320 |
lambda x: x["meteor"] / math.log10(10 + x["total_repetitions"]), axis=1
|
321 |
)
|
322 |
|
323 |
-
metrics_df["
|
324 |
metrics_df["num_max_output_tokens"] = num_max_output_tokens
|
325 |
|
326 |
if variant != "rpp":
|
@@ -359,8 +382,10 @@ def analyze_translation_results(df, col, max_new_tokens=300, repetition_threshol
|
|
359 |
)
|
360 |
print_row_details(df2, range(len(df2)))
|
361 |
|
362 |
-
|
363 |
-
df3 = df[df[
|
|
|
|
|
364 |
|
365 |
print(f"\n*** Found {len(df3)} rows with incomplete translations for {col}")
|
366 |
print_row_details(df3, range(len(df3)))
|
|
|
19 |
meteor = evaluate.load("meteor")
|
20 |
accuracy = evaluate.load("accuracy")
|
21 |
sacrebleu = evaluate.load("sacrebleu")
|
22 |
+
comet = evaluate.load("comet")
|
23 |
|
24 |
|
25 |
def extract_answer(text, debug=False):
|
|
|
44 |
return text
|
45 |
|
46 |
|
47 |
+
def calc_metrics(references, predictions, sources=None, debug=False):
|
48 |
assert len(references) == len(
|
49 |
predictions
|
50 |
), f"lengths are difference: {len(references)} != {len(predictions)}"
|
|
|
52 |
predictions = [extract_answer(text) for text in predictions]
|
53 |
results = {}
|
54 |
|
55 |
+
results["comet"] = comet.compute(
|
56 |
+
predictions=predictions, references=references, sources=sources
|
57 |
+
)["mean_score"]
|
58 |
+
|
59 |
results["meteor"] = meteor.compute(predictions=predictions, references=references)[
|
60 |
"meteor"
|
61 |
]
|
|
|
238 |
)
|
239 |
|
240 |
|
241 |
+
def count_chinese_characters(text):
|
242 |
+
# Define a regular expression pattern for Chinese characters
|
243 |
+
chinese_char_pattern = r"[\u4e00-\u9fff]"
|
244 |
+
|
245 |
+
# Use re.findall to find all Chinese characters in the text
|
246 |
+
chinese_chars = re.findall(chinese_char_pattern, text)
|
247 |
+
|
248 |
+
# Return the count of Chinese characters
|
249 |
+
return len(chinese_chars)
|
250 |
+
|
251 |
+
|
252 |
+
def count_chinese_characters(text):
|
253 |
chinese_char_pattern = re.compile(r"[\u4e00-\u9fff]")
|
254 |
return 1 if chinese_char_pattern.search(text) else 0
|
255 |
|
|
|
280 |
repetition_score = []
|
281 |
total_repetitions = []
|
282 |
num_max_output_tokens = []
|
283 |
+
translation_completeness = []
|
284 |
columns = df.columns[2:]
|
285 |
|
286 |
df[
|
|
|
291 |
]
|
292 |
] = df["english"].apply(detect_scores)
|
293 |
|
294 |
+
new_col = f"count_chinese_characters-ground_truth"
|
295 |
+
df[new_col] = df["chinese"].apply(count_chinese_characters)
|
296 |
+
|
297 |
for col in columns:
|
298 |
+
metrics = calc_metrics(
|
299 |
+
df["english"], df[col], sources=df["chinese"], debug=True
|
300 |
+
)
|
301 |
print(f"{col}: {metrics}")
|
302 |
|
303 |
meteor.append(metrics["meteor"])
|
|
|
319 |
lambda x: len(tokenizers[model](x)["input_ids"])
|
320 |
)
|
321 |
|
322 |
+
new_col = f"count_chinese_characters-{col}"
|
323 |
+
df[new_col] = df[col].apply(count_chinese_characters)
|
324 |
+
translation_completeness.append(
|
325 |
+
1 - df[new_col].sum() / df["count_chinese_characters-ground_truth"].sum()
|
326 |
+
)
|
327 |
|
328 |
new_col = f"output_tokens-{col}"
|
329 |
df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
|
|
|
343 |
lambda x: x["meteor"] / math.log10(10 + x["total_repetitions"]), axis=1
|
344 |
)
|
345 |
|
346 |
+
metrics_df["translation_completeness"] = translation_completeness
|
347 |
metrics_df["num_max_output_tokens"] = num_max_output_tokens
|
348 |
|
349 |
if variant != "rpp":
|
|
|
382 |
)
|
383 |
print_row_details(df2, range(len(df2)))
|
384 |
|
385 |
+
count_chinese_characters = f"count_chinese_characters-{col}"
|
386 |
+
df3 = df[df[count_chinese_characters] > 0][
|
387 |
+
["chinese", "english", col, count_chinese_characters]
|
388 |
+
]
|
389 |
|
390 |
print(f"\n*** Found {len(df3)} rows with incomplete translations for {col}")
|
391 |
print_row_details(df3, range(len(df3)))
|
notebooks/00b_Data Analysis_Few_Shots.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -19,3 +19,4 @@ einops==0.8.0
|
|
19 |
accelerate==0.32.0
|
20 |
peft==0.11.1
|
21 |
sacrebleu==2.4.2
|
|
|
|
19 |
accelerate==0.32.0
|
20 |
peft==0.11.1
|
21 |
sacrebleu==2.4.2
|
22 |
+
unbabel-comet==2.2.2
|