Sam Passaglia commited on
Commit
b6dd3a3
·
1 Parent(s): 470c02e
Files changed (2) hide show
  1. requirements.txt +0 -1
  2. yomikata/utils.py +3 -49
requirements.txt CHANGED
@@ -9,7 +9,6 @@ unidic-lite
9
  sudachipy
10
  sudachidict_full
11
  spacy
12
- scikit-learn==1.2.0
13
  speach==0.1a15.post1
14
  torch>=1.13.1
15
  transformers>=4.25.1
 
9
  sudachipy
10
  sudachidict_full
11
  spacy
 
12
  speach==0.1a15.post1
13
  torch>=1.13.1
14
  transformers>=4.25.1
yomikata/utils.py CHANGED
@@ -8,7 +8,6 @@ import unicodedata
8
 
9
  import numpy as np
10
  import pynvml
11
- from sklearn.metrics import precision_recall_fscore_support
12
  from speach.ttlig import RubyFrag, RubyToken
13
 
14
  """
@@ -30,9 +29,7 @@ def load_dict(filepath: str) -> dict:
30
  return d
31
 
32
 
33
- def save_dict(
34
- d: dict, filepath: str, cls: json.JSONEncoder = None, sortkeys: bool = False
35
- ) -> None:
36
  """Save a dictionary to a specified location.
37
 
38
  Args:
@@ -276,10 +273,7 @@ def remove_furigana(s: str) -> str:
276
  """
277
  rubytoken = parse_furigana(s)
278
  return "".join(
279
- [
280
- token.text if isinstance(token, RubyFrag) else token
281
- for token in rubytoken.groups
282
- ]
283
  )
284
 
285
 
@@ -295,10 +289,7 @@ def furigana_to_kana(s: str) -> str:
295
  """
296
  rubytoken = parse_furigana(s)
297
  return "".join(
298
- [
299
- token.furi if isinstance(token, RubyFrag) else token
300
- for token in rubytoken.groups
301
- ]
302
  )
303
 
304
 
@@ -315,40 +306,3 @@ def has_kanji(s: str) -> bool:
315
  if code >= UNICODE_KANJI_START and code <= UNICODE_KANJI_END:
316
  return True
317
  return False
318
-
319
-
320
- """
321
- Performance Metrics
322
- """
323
-
324
-
325
- def get_label_performance(y_true, y_pred, classes):
326
- """Per-class performance metrics.
327
-
328
- MIT License
329
- Copyright (c) 2020 Made With ML
330
- """
331
-
332
- # Performance
333
- performance = {"overall": {}, "class": {}}
334
-
335
- # Overall performance
336
- metrics = precision_recall_fscore_support(y_true, y_pred, average="weighted")
337
- performance["overall"]["precision"] = metrics[0]
338
- performance["overall"]["recall"] = metrics[1]
339
- performance["overall"]["f1"] = metrics[2]
340
- performance["overall"]["num_samples"] = np.float64(len(y_true))
341
-
342
- # Per-class performance
343
- metrics = precision_recall_fscore_support(
344
- y_true, y_pred, average=None, labels=classes
345
- )
346
- for i in range(len(classes)):
347
- performance["class"][classes[i]] = {
348
- "precision": metrics[0][i],
349
- "recall": metrics[1][i],
350
- "f1": metrics[2][i],
351
- "num_samples": np.float64(metrics[3][i]),
352
- }
353
-
354
- return performance
 
8
 
9
  import numpy as np
10
  import pynvml
 
11
  from speach.ttlig import RubyFrag, RubyToken
12
 
13
  """
 
29
  return d
30
 
31
 
32
+ def save_dict(d: dict, filepath: str, cls: json.JSONEncoder = None, sortkeys: bool = False) -> None:
 
 
33
  """Save a dictionary to a specified location.
34
 
35
  Args:
 
273
  """
274
  rubytoken = parse_furigana(s)
275
  return "".join(
276
+ [token.text if isinstance(token, RubyFrag) else token for token in rubytoken.groups]
 
 
 
277
  )
278
 
279
 
 
289
  """
290
  rubytoken = parse_furigana(s)
291
  return "".join(
292
+ [token.furi if isinstance(token, RubyFrag) else token for token in rubytoken.groups]
 
 
 
293
  )
294
 
295
 
 
306
  if code >= UNICODE_KANJI_START and code <= UNICODE_KANJI_END:
307
  return True
308
  return False