Spaces:
Build error
Build error
Sam Passaglia
commited on
Commit
·
b6dd3a3
1
Parent(s):
470c02e
minor
Browse files- requirements.txt +0 -1
- yomikata/utils.py +3 -49
requirements.txt
CHANGED
@@ -9,7 +9,6 @@ unidic-lite
|
|
9 |
sudachipy
|
10 |
sudachidict_full
|
11 |
spacy
|
12 |
-
scikit-learn==1.2.0
|
13 |
speach==0.1a15.post1
|
14 |
torch>=1.13.1
|
15 |
transformers>=4.25.1
|
|
|
9 |
sudachipy
|
10 |
sudachidict_full
|
11 |
spacy
|
|
|
12 |
speach==0.1a15.post1
|
13 |
torch>=1.13.1
|
14 |
transformers>=4.25.1
|
yomikata/utils.py
CHANGED
@@ -8,7 +8,6 @@ import unicodedata
|
|
8 |
|
9 |
import numpy as np
|
10 |
import pynvml
|
11 |
-
from sklearn.metrics import precision_recall_fscore_support
|
12 |
from speach.ttlig import RubyFrag, RubyToken
|
13 |
|
14 |
"""
|
@@ -30,9 +29,7 @@ def load_dict(filepath: str) -> dict:
|
|
30 |
return d
|
31 |
|
32 |
|
33 |
-
def save_dict(
|
34 |
-
d: dict, filepath: str, cls: json.JSONEncoder = None, sortkeys: bool = False
|
35 |
-
) -> None:
|
36 |
"""Save a dictionary to a specified location.
|
37 |
|
38 |
Args:
|
@@ -276,10 +273,7 @@ def remove_furigana(s: str) -> str:
|
|
276 |
"""
|
277 |
rubytoken = parse_furigana(s)
|
278 |
return "".join(
|
279 |
-
[
|
280 |
-
token.text if isinstance(token, RubyFrag) else token
|
281 |
-
for token in rubytoken.groups
|
282 |
-
]
|
283 |
)
|
284 |
|
285 |
|
@@ -295,10 +289,7 @@ def furigana_to_kana(s: str) -> str:
|
|
295 |
"""
|
296 |
rubytoken = parse_furigana(s)
|
297 |
return "".join(
|
298 |
-
[
|
299 |
-
token.furi if isinstance(token, RubyFrag) else token
|
300 |
-
for token in rubytoken.groups
|
301 |
-
]
|
302 |
)
|
303 |
|
304 |
|
@@ -315,40 +306,3 @@ def has_kanji(s: str) -> bool:
|
|
315 |
if code >= UNICODE_KANJI_START and code <= UNICODE_KANJI_END:
|
316 |
return True
|
317 |
return False
|
318 |
-
|
319 |
-
|
320 |
-
"""
|
321 |
-
Performance Metrics
|
322 |
-
"""
|
323 |
-
|
324 |
-
|
325 |
-
def get_label_performance(y_true, y_pred, classes):
|
326 |
-
"""Per-class performance metrics.
|
327 |
-
|
328 |
-
MIT License
|
329 |
-
Copyright (c) 2020 Made With ML
|
330 |
-
"""
|
331 |
-
|
332 |
-
# Performance
|
333 |
-
performance = {"overall": {}, "class": {}}
|
334 |
-
|
335 |
-
# Overall performance
|
336 |
-
metrics = precision_recall_fscore_support(y_true, y_pred, average="weighted")
|
337 |
-
performance["overall"]["precision"] = metrics[0]
|
338 |
-
performance["overall"]["recall"] = metrics[1]
|
339 |
-
performance["overall"]["f1"] = metrics[2]
|
340 |
-
performance["overall"]["num_samples"] = np.float64(len(y_true))
|
341 |
-
|
342 |
-
# Per-class performance
|
343 |
-
metrics = precision_recall_fscore_support(
|
344 |
-
y_true, y_pred, average=None, labels=classes
|
345 |
-
)
|
346 |
-
for i in range(len(classes)):
|
347 |
-
performance["class"][classes[i]] = {
|
348 |
-
"precision": metrics[0][i],
|
349 |
-
"recall": metrics[1][i],
|
350 |
-
"f1": metrics[2][i],
|
351 |
-
"num_samples": np.float64(metrics[3][i]),
|
352 |
-
}
|
353 |
-
|
354 |
-
return performance
|
|
|
8 |
|
9 |
import numpy as np
|
10 |
import pynvml
|
|
|
11 |
from speach.ttlig import RubyFrag, RubyToken
|
12 |
|
13 |
"""
|
|
|
29 |
return d
|
30 |
|
31 |
|
32 |
+
def save_dict(d: dict, filepath: str, cls: json.JSONEncoder = None, sortkeys: bool = False) -> None:
|
|
|
|
|
33 |
"""Save a dictionary to a specified location.
|
34 |
|
35 |
Args:
|
|
|
273 |
"""
|
274 |
rubytoken = parse_furigana(s)
|
275 |
return "".join(
|
276 |
+
[token.text if isinstance(token, RubyFrag) else token for token in rubytoken.groups]
|
|
|
|
|
|
|
277 |
)
|
278 |
|
279 |
|
|
|
289 |
"""
|
290 |
rubytoken = parse_furigana(s)
|
291 |
return "".join(
|
292 |
+
[token.furi if isinstance(token, RubyFrag) else token for token in rubytoken.groups]
|
|
|
|
|
|
|
293 |
)
|
294 |
|
295 |
|
|
|
306 |
if code >= UNICODE_KANJI_START and code <= UNICODE_KANJI_END:
|
307 |
return True
|
308 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|