Update app.py
Browse files
app.py
CHANGED
@@ -17,11 +17,11 @@ YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
|
|
17 |
|
18 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
19 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
20 |
-
model_id = "Kushtrim/whisper-large-v3-turbo-shqip
|
21 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, use_safetensors=True).to(device)
|
22 |
processor = AutoProcessor.from_pretrained(model_id)
|
23 |
pipe = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor,
|
24 |
-
max_new_tokens=256, chunk_length_s=
|
25 |
token=os.environ["HF"])
|
26 |
|
27 |
@spaces.GPU
|
@@ -31,7 +31,7 @@ def transcribe(inputs, task):
|
|
31 |
"No audio file submitted! Please upload or record an audio file before submitting your request.")
|
32 |
|
33 |
text = pipe(inputs, generate_kwargs={
|
34 |
-
"task": task, 'language': 'sq'}, return_timestamps=True)["text"]
|
35 |
return text
|
36 |
|
37 |
|
@@ -109,7 +109,7 @@ file_transcribe = gr.Interface(
|
|
109 |
title="Whisper Large V3 Turbo Shqip: Transcribe Audio",
|
110 |
description=("This fine-tuned Whisper model provides reliable transcription for Albanian audio, whether from a microphone or an uploaded file. "
|
111 |
"Key details about this project:"
|
112 |
-
"\n\n- Fine-tuned on
|
113 |
"\n- This is the third training run, reflecting continuous improvements as the dataset evolves. "
|
114 |
f"\n- Hosted on Hugging Face. Repository: [{model_id}](https://huggingface.co/{model_id}). "
|
115 |
),
|
@@ -125,7 +125,7 @@ mf_transcribe = gr.Interface(
|
|
125 |
title="Whisper Large V3 Turbo Shqip: Transcribe Audio",
|
126 |
description=("This fine-tuned Whisper model provides reliable transcription for Albanian audio, whether from a microphone or an uploaded file. "
|
127 |
"Key details about this project:"
|
128 |
-
"\n\n- Fine-tuned on
|
129 |
"\n- This is the third training run, reflecting continuous improvements as the dataset evolves. "
|
130 |
f"\n- Hosted on Hugging Face. Repository: [{model_id}](https://huggingface.co/{model_id}). "
|
131 |
),
|
@@ -142,7 +142,7 @@ yt_transcribe = gr.Interface(
|
|
142 |
title="Whisper Large V3 Turbo Shqip: Transcribe Audio",
|
143 |
description=("This fine-tuned Whisper model provides reliable transcription for Albanian audio, whether from a microphone or an uploaded file. "
|
144 |
"Key details about this project:"
|
145 |
-
"\n\n- Fine-tuned on
|
146 |
"\n- This is the third training run, reflecting continuous improvements as the dataset evolves. "
|
147 |
f"\n- Hosted on Hugging Face. Repository: [{model_id}](https://huggingface.co/{model_id}). "
|
148 |
),
|
|
|
17 |
|
18 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
19 |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
20 |
+
model_id = "Kushtrim/whisper-large-v3-turbo-shqip"
|
21 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id, torch_dtype=torch_dtype, use_safetensors=True).to(device)
|
22 |
processor = AutoProcessor.from_pretrained(model_id)
|
23 |
pipe = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor,
|
24 |
+
max_new_tokens=256, chunk_length_s=28, batch_size=16, torch_dtype=torch_dtype, device=device,
|
25 |
token=os.environ["HF"])
|
26 |
|
27 |
@spaces.GPU
|
|
|
31 |
"No audio file submitted! Please upload or record an audio file before submitting your request.")
|
32 |
|
33 |
text = pipe(inputs, generate_kwargs={
|
34 |
+
'num_beams': 5, "task": task, 'language': 'sq'}, return_timestamps=True)["text"]
|
35 |
return text
|
36 |
|
37 |
|
|
|
109 |
title="Whisper Large V3 Turbo Shqip: Transcribe Audio",
|
110 |
description=("This fine-tuned Whisper model provides reliable transcription for Albanian audio, whether from a microphone or an uploaded file. "
|
111 |
"Key details about this project:"
|
112 |
+
"\n\n- Fine-tuned on 200 hours of carefully curated Albanian audio data. "
|
113 |
"\n- This is the third training run, reflecting continuous improvements as the dataset evolves. "
|
114 |
f"\n- Hosted on Hugging Face. Repository: [{model_id}](https://huggingface.co/{model_id}). "
|
115 |
),
|
|
|
125 |
title="Whisper Large V3 Turbo Shqip: Transcribe Audio",
|
126 |
description=("This fine-tuned Whisper model provides reliable transcription for Albanian audio, whether from a microphone or an uploaded file. "
|
127 |
"Key details about this project:"
|
128 |
+
"\n\n- Fine-tuned on 200 hours of carefully curated Albanian audio data. "
|
129 |
"\n- This is the third training run, reflecting continuous improvements as the dataset evolves. "
|
130 |
f"\n- Hosted on Hugging Face. Repository: [{model_id}](https://huggingface.co/{model_id}). "
|
131 |
),
|
|
|
142 |
title="Whisper Large V3 Turbo Shqip: Transcribe Audio",
|
143 |
description=("This fine-tuned Whisper model provides reliable transcription for Albanian audio, whether from a microphone or an uploaded file. "
|
144 |
"Key details about this project:"
|
145 |
+
"\n\n- Fine-tuned on 200 hours of carefully curated Albanian audio data. "
|
146 |
"\n- This is the third training run, reflecting continuous improvements as the dataset evolves. "
|
147 |
f"\n- Hosted on Hugging Face. Repository: [{model_id}](https://huggingface.co/{model_id}). "
|
148 |
),
|