TifinLab commited on
Commit
bff529d
1 Parent(s): 423869b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -7
app.py CHANGED
@@ -7,13 +7,14 @@ import json
7
  with open('ISO_codes.json', 'r') as file:
8
  iso_codes = json.load(file)
9
 
10
- languages = list(iso_codes.keys())
11
 
12
- model_id = "facebook/mms-1b-all"
 
13
  processor = AutoProcessor.from_pretrained(model_id)
14
  model = Wav2Vec2ForCTC.from_pretrained(model_id)
15
 
16
- def transcribe(audio_file_mic=None, audio_file_upload=None, language="English (eng)"):
 
17
  if audio_file_mic:
18
  audio_file = audio_file_mic
19
  elif audio_file_upload:
@@ -26,10 +27,10 @@ def transcribe(audio_file_mic=None, audio_file_upload=None, language="English (e
26
  if sample_rate != 16000:
27
  speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
28
 
29
- # Keep the same model in memory and simply switch out the language adapters by calling load_adapter() for the model and set_target_lang() for the tokenizer
30
- language_code = iso_codes[language]
31
- processor.tokenizer.set_target_lang(language_code)
32
- model.load_adapter(language_code)
33
 
34
  inputs = processor(speech, sampling_rate=16_000, return_tensors="pt")
35
 
 
7
  with open('ISO_codes.json', 'r') as file:
8
  iso_codes = json.load(file)
9
 
 
10
 
11
+ model_id = "TifinLab/mms-1b-berber"
12
+
13
  processor = AutoProcessor.from_pretrained(model_id)
14
  model = Wav2Vec2ForCTC.from_pretrained(model_id)
15
 
16
+
17
+ def transcribe(audio_file_mic=None, audio_file_upload=None):
18
  if audio_file_mic:
19
  audio_file = audio_file_mic
20
  elif audio_file_upload:
 
27
  if sample_rate != 16000:
28
  speech = librosa.resample(speech, orig_sr=sample_rate, target_sr=16000)
29
 
30
+
31
+ processor.tokenizer.set_target_lang("ber")
32
+ model.load_adapter("ber")
33
+
34
 
35
  inputs = processor(speech, sampling_rate=16_000, return_tensors="pt")
36