Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,11 @@ import os
|
|
7 |
|
8 |
# Assuming other necessary imports and setup are already done
|
9 |
|
|
|
|
|
|
|
|
|
|
|
10 |
# Helper function to format and group word timestamps
|
11 |
def format_and_group_timestamps(chunks, interval=5.0):
|
12 |
grouped = {}
|
@@ -35,11 +40,11 @@ def transcribe_audio(input_file, language, chunk_length_s=10, stride_length_s=(4
|
|
35 |
if target_lang_code == "eng":
|
36 |
model_id = "facebook/mms-1b-all"
|
37 |
else:
|
38 |
-
model_id = "
|
39 |
|
40 |
-
auth_token = os.environ.get("HF_TOKEN")
|
41 |
pipe = pipeline(model=model_id, device=device, token=auth_token)
|
42 |
-
|
|
|
43 |
|
44 |
output = pipe(input_file, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
|
45 |
formatted_output = format_and_group_timestamps(output['chunks'])
|
|
|
7 |
|
8 |
# Assuming other necessary imports and setup are already done
|
9 |
|
10 |
+
auth_token = os.environ.get("HF_TOKEN")
|
11 |
+
target_lang_options = {"English": "eng", "Luganda": "lug", "Acholi": "ach", "Runyankole": "nyn", "Lugbara": "lgg"}
|
12 |
+
languages = list(target_lang_options.keys())
|
13 |
+
|
14 |
+
|
15 |
# Helper function to format and group word timestamps
|
16 |
def format_and_group_timestamps(chunks, interval=5.0):
|
17 |
grouped = {}
|
|
|
40 |
if target_lang_code == "eng":
|
41 |
model_id = "facebook/mms-1b-all"
|
42 |
else:
|
43 |
+
model_id = "Sunbird/sunbird-mms"
|
44 |
|
|
|
45 |
pipe = pipeline(model=model_id, device=device, token=auth_token)
|
46 |
+
pipe.tokenizer.set_target_lang(target_lang_code)
|
47 |
+
pipe.model.load_adapter(target_lang_code)
|
48 |
|
49 |
output = pipe(input_file, chunk_length_s=chunk_length_s, stride_length_s=stride_length_s, return_timestamps=return_timestamps)
|
50 |
formatted_output = format_and_group_timestamps(output['chunks'])
|