dh-mc commited on
Commit
553af0d
1 Parent(s): 80027ea

diable lfs for 00b notebook

Browse files
.gitattributes CHANGED
@@ -56,7 +56,6 @@ results/mac-results_few_shots.csv filter=lfs diff=lfs merge=lfs -text
56
  results/mac-results_metrics.csv filter=lfs diff=lfs merge=lfs -text
57
  notebooks/00_Data[[:space:]]Analysis.ipynb filter=lfs diff=lfs merge=lfs -text
58
  notebooks/00a_Data[[:space:]]Analysis_greedy_decoding.ipynb filter=lfs diff=lfs merge=lfs -text
59
- notebooks/00b_Data[[:space:]]Analysis_Few_Shots.ipynb filter=lfs diff=lfs merge=lfs -text
60
  notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
61
  notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
62
  logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
 
56
  results/mac-results_metrics.csv filter=lfs diff=lfs merge=lfs -text
57
  notebooks/00_Data[[:space:]]Analysis.ipynb filter=lfs diff=lfs merge=lfs -text
58
  notebooks/00a_Data[[:space:]]Analysis_greedy_decoding.ipynb filter=lfs diff=lfs merge=lfs -text
 
59
  notebooks/01_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
60
  notebooks/01a_Few-shot_Prompting.ipynb filter=lfs diff=lfs merge=lfs -text
61
  logs/l40-1gpu.txt filter=lfs diff=lfs merge=lfs -text
llm_toolkit/translation_utils.py CHANGED
@@ -19,6 +19,7 @@ rouge = evaluate.load("rouge")
19
  meteor = evaluate.load("meteor")
20
  accuracy = evaluate.load("accuracy")
21
  sacrebleu = evaluate.load("sacrebleu")
 
22
 
23
 
24
  def extract_answer(text, debug=False):
@@ -43,7 +44,7 @@ def extract_answer(text, debug=False):
43
  return text
44
 
45
 
46
- def calc_metrics(references, predictions, debug=False):
47
  assert len(references) == len(
48
  predictions
49
  ), f"lengths are difference: {len(references)} != {len(predictions)}"
@@ -51,6 +52,10 @@ def calc_metrics(references, predictions, debug=False):
51
  predictions = [extract_answer(text) for text in predictions]
52
  results = {}
53
 
 
 
 
 
54
  results["meteor"] = meteor.compute(predictions=predictions, references=references)[
55
  "meteor"
56
  ]
@@ -233,7 +238,18 @@ def detect_repetition_scores(row, col, debug=False):
233
  )
234
 
235
 
236
- def contains_chinese(text):
 
 
 
 
 
 
 
 
 
 
 
237
  chinese_char_pattern = re.compile(r"[\u4e00-\u9fff]")
238
  return 1 if chinese_char_pattern.search(text) else 0
239
 
@@ -264,7 +280,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
264
  repetition_score = []
265
  total_repetitions = []
266
  num_max_output_tokens = []
267
- num_incomplete_translations = []
268
  columns = df.columns[2:]
269
 
270
  df[
@@ -275,8 +291,13 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
275
  ]
276
  ] = df["english"].apply(detect_scores)
277
 
 
 
 
278
  for col in columns:
279
- metrics = calc_metrics(df["english"], df[col], debug=True)
 
 
280
  print(f"{col}: {metrics}")
281
 
282
  meteor.append(metrics["meteor"])
@@ -298,9 +319,11 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
298
  lambda x: len(tokenizers[model](x)["input_ids"])
299
  )
300
 
301
- new_col = f"contains_chinese-{col}"
302
- df[new_col] = df[col].apply(contains_chinese)
303
- num_incomplete_translations.append(df[new_col].sum())
 
 
304
 
305
  new_col = f"output_tokens-{col}"
306
  df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
@@ -320,7 +343,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
320
  lambda x: x["meteor"] / math.log10(10 + x["total_repetitions"]), axis=1
321
  )
322
 
323
- metrics_df["num_incomplete_translations"] = num_incomplete_translations
324
  metrics_df["num_max_output_tokens"] = num_max_output_tokens
325
 
326
  if variant != "rpp":
@@ -359,8 +382,10 @@ def analyze_translation_results(df, col, max_new_tokens=300, repetition_threshol
359
  )
360
  print_row_details(df2, range(len(df2)))
361
 
362
- contains_chinese = f"contains_chinese-{col}"
363
- df3 = df[df[contains_chinese] > 0][["chinese", "english", col, contains_chinese]]
 
 
364
 
365
  print(f"\n*** Found {len(df3)} rows with incomplete translations for {col}")
366
  print_row_details(df3, range(len(df3)))
 
19
  meteor = evaluate.load("meteor")
20
  accuracy = evaluate.load("accuracy")
21
  sacrebleu = evaluate.load("sacrebleu")
22
+ comet = evaluate.load("comet")
23
 
24
 
25
  def extract_answer(text, debug=False):
 
44
  return text
45
 
46
 
47
+ def calc_metrics(references, predictions, sources=None, debug=False):
48
  assert len(references) == len(
49
  predictions
50
  ), f"lengths are difference: {len(references)} != {len(predictions)}"
 
52
  predictions = [extract_answer(text) for text in predictions]
53
  results = {}
54
 
55
+ results["comet"] = comet.compute(
56
+ predictions=predictions, references=references, sources=sources
57
+ )["mean_score"]
58
+
59
  results["meteor"] = meteor.compute(predictions=predictions, references=references)[
60
  "meteor"
61
  ]
 
238
  )
239
 
240
 
241
+ def count_chinese_characters(text):
242
+ # Define a regular expression pattern for Chinese characters
243
+ chinese_char_pattern = r"[\u4e00-\u9fff]"
244
+
245
+ # Use re.findall to find all Chinese characters in the text
246
+ chinese_chars = re.findall(chinese_char_pattern, text)
247
+
248
+ # Return the count of Chinese characters
249
+ return len(chinese_chars)
250
+
251
+
252
+ def count_chinese_characters(text):
253
  chinese_char_pattern = re.compile(r"[\u4e00-\u9fff]")
254
  return 1 if chinese_char_pattern.search(text) else 0
255
 
 
280
  repetition_score = []
281
  total_repetitions = []
282
  num_max_output_tokens = []
283
+ translation_completeness = []
284
  columns = df.columns[2:]
285
 
286
  df[
 
291
  ]
292
  ] = df["english"].apply(detect_scores)
293
 
294
+ new_col = f"count_chinese_characters-ground_truth"
295
+ df[new_col] = df["chinese"].apply(count_chinese_characters)
296
+
297
  for col in columns:
298
+ metrics = calc_metrics(
299
+ df["english"], df[col], sources=df["chinese"], debug=True
300
+ )
301
  print(f"{col}: {metrics}")
302
 
303
  meteor.append(metrics["meteor"])
 
319
  lambda x: len(tokenizers[model](x)["input_ids"])
320
  )
321
 
322
+ new_col = f"count_chinese_characters-{col}"
323
+ df[new_col] = df[col].apply(count_chinese_characters)
324
+ translation_completeness.append(
325
+ 1 - df[new_col].sum() / df["count_chinese_characters-ground_truth"].sum()
326
+ )
327
 
328
  new_col = f"output_tokens-{col}"
329
  df[new_col] = df[col].apply(lambda x: len(tokenizers[model](x)["input_ids"]))
 
343
  lambda x: x["meteor"] / math.log10(10 + x["total_repetitions"]), axis=1
344
  )
345
 
346
+ metrics_df["translation_completeness"] = translation_completeness
347
  metrics_df["num_max_output_tokens"] = num_max_output_tokens
348
 
349
  if variant != "rpp":
 
382
  )
383
  print_row_details(df2, range(len(df2)))
384
 
385
+ count_chinese_characters = f"count_chinese_characters-{col}"
386
+ df3 = df[df[count_chinese_characters] > 0][
387
+ ["chinese", "english", col, count_chinese_characters]
388
+ ]
389
 
390
  print(f"\n*** Found {len(df3)} rows with incomplete translations for {col}")
391
  print_row_details(df3, range(len(df3)))
notebooks/00b_Data Analysis_Few_Shots.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -19,3 +19,4 @@ einops==0.8.0
19
  accelerate==0.32.0
20
  peft==0.11.1
21
  sacrebleu==2.4.2
 
 
19
  accelerate==0.32.0
20
  peft==0.11.1
21
  sacrebleu==2.4.2
22
+ unbabel-comet==2.2.2