Spaces:
Sleeping
Sleeping
Alex Cabrera
commited on
Commit
•
95b368a
1
Parent(s):
8ceca60
config
Browse files- modeling.py +29 -2
modeling.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
from __future__ import annotations
|
3 |
|
4 |
import os
|
|
|
5 |
from dataclasses import dataclass
|
6 |
|
7 |
import config
|
@@ -58,6 +59,27 @@ def process_data(
|
|
58 |
return data
|
59 |
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
def process_output(
|
62 |
input_dir: str,
|
63 |
lang_pairs: list[str],
|
@@ -66,7 +88,8 @@ def process_output(
|
|
66 |
"""Load model outputs."""
|
67 |
# Load the data
|
68 |
data: list[str] = []
|
69 |
-
|
|
|
70 |
system_dir = os.path.join(input_dir, "evaluation", "system-outputs", model_path)
|
71 |
for lang_pair in lang_pairs:
|
72 |
src_lang, trg_lang = lang_pair[:2], lang_pair[2:]
|
@@ -75,5 +98,9 @@ def process_output(
|
|
75 |
)
|
76 |
with open(sys_file, "r") as sys_in:
|
77 |
for sys_line in sys_in:
|
78 |
-
|
|
|
|
|
|
|
|
|
79 |
return data
|
|
|
2 |
from __future__ import annotations
|
3 |
|
4 |
import os
|
5 |
+
import re
|
6 |
from dataclasses import dataclass
|
7 |
|
8 |
import config
|
|
|
59 |
return data
|
60 |
|
61 |
|
62 |
+
def remove_leading_language(line: str) -> str:
|
63 |
+
"""Remove a language at the beginning of the string.
|
64 |
+
|
65 |
+
Some zero-shot models output the name of the language at the beginning of the
|
66 |
+
string. This is a manual post-processing function that removes the language name
|
67 |
+
(partly as an example of how you can do simple fixes to issues that come up during
|
68 |
+
analysis using Zeno).
|
69 |
+
|
70 |
+
Args:
|
71 |
+
line: The line to process.
|
72 |
+
|
73 |
+
Returns:
|
74 |
+
The line with the language removed.
|
75 |
+
"""
|
76 |
+
return re.sub(
|
77 |
+
r"^(English|Japanese|Chinese|Hausa|Icelandic|French|German|Russian|Ukranian): ",
|
78 |
+
"",
|
79 |
+
line,
|
80 |
+
)
|
81 |
+
|
82 |
+
|
83 |
def process_output(
|
84 |
input_dir: str,
|
85 |
lang_pairs: list[str],
|
|
|
88 |
"""Load model outputs."""
|
89 |
# Load the data
|
90 |
data: list[str] = []
|
91 |
+
model_config = config.model_configs[model_preset]
|
92 |
+
model_path = model_config.path
|
93 |
system_dir = os.path.join(input_dir, "evaluation", "system-outputs", model_path)
|
94 |
for lang_pair in lang_pairs:
|
95 |
src_lang, trg_lang = lang_pair[:2], lang_pair[2:]
|
|
|
98 |
)
|
99 |
with open(sys_file, "r") as sys_in:
|
100 |
for sys_line in sys_in:
|
101 |
+
sys_line = sys_line.strip()
|
102 |
+
if model_config.post_processors is not None:
|
103 |
+
for postprocessor in model_config.post_processors:
|
104 |
+
sys_line = postprocessor(sys_line)
|
105 |
+
data.append(sys_line)
|
106 |
return data
|