unijoh commited on
Commit
ea38473
1 Parent(s): 9aa67b4

Update lid.py

Browse files
Files changed (1) hide show
  1. lid.py +5 -5
lid.py CHANGED
@@ -16,15 +16,13 @@ with open(f"data/lid/all_langs.tsv") as f:
16
  iso, name = line.split(" ", 1)
17
  LID_LANGUAGES[iso] = name.strip()
18
 
19
- def identify_language(audio_source=None, microphone=None, file_upload=None):
20
- audio_fp = file_upload if "upload" in str(audio_source or "").lower() else microphone
21
- if audio_fp is None:
22
  return "ERROR: You have to either use the microphone or upload an audio file"
23
 
24
- audio_samples = librosa.load(audio_fp, sr=LID_SAMPLING_RATE, mono=True)[0]
25
  inputs = processor(audio_samples, sampling_rate=LID_SAMPLING_RATE, return_tensors="pt")
26
 
27
- # Set device
28
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
  model.to(device)
30
  inputs = inputs.to(device)
@@ -36,6 +34,8 @@ def identify_language(audio_source=None, microphone=None, file_upload=None):
36
  scores, indices = torch.topk(logit_lsm, 5, dim=-1)
37
  scores, indices = torch.exp(scores).to("cpu").tolist(), indices.to("cpu").tolist()
38
  iso2score = {model.config.id2label[int(i)]: s for s, i in zip(scores, indices)}
 
39
  if max(iso2score.values()) < LID_THRESHOLD:
40
  return "Low confidence in the language identification predictions. Output is not shown!"
 
41
  return {LID_LANGUAGES[iso]: score for iso, score in iso2score.items()}
 
16
  iso, name = line.split(" ", 1)
17
  LID_LANGUAGES[iso] = name.strip()
18
 
19
+ def identify_language(audio=None):
20
+ if audio is None:
 
21
  return "ERROR: You have to either use the microphone or upload an audio file"
22
 
23
+ audio_samples = librosa.load(audio, sr=LID_SAMPLING_RATE, mono=True)[0]
24
  inputs = processor(audio_samples, sampling_rate=LID_SAMPLING_RATE, return_tensors="pt")
25
 
 
26
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
  model.to(device)
28
  inputs = inputs.to(device)
 
34
  scores, indices = torch.topk(logit_lsm, 5, dim=-1)
35
  scores, indices = torch.exp(scores).to("cpu").tolist(), indices.to("cpu").tolist()
36
  iso2score = {model.config.id2label[int(i)]: s for s, i in zip(scores, indices)}
37
+
38
  if max(iso2score.values()) < LID_THRESHOLD:
39
  return "Low confidence in the language identification predictions. Output is not shown!"
40
+
41
  return {LID_LANGUAGES[iso]: score for iso, score in iso2score.items()}