Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import pipeline | |
from utils.thai_word import ThaiWord | |
from pythainlp.tokenize import word_tokenize | |
from collections import deque | |
from copy import deepcopy | |
MODEL_NAME = "biodatlab/whisper-th-medium-combined" | |
DEVICE = 0 if torch.cuda.is_available() else "cpu" | |
thw = ThaiWord() | |
# stride_length_s is a tuple of the left and right stride length. | |
# With only 1 number, both sides get the same stride, by default | |
# the stride_length on one side is 1/6th of the chunk_length_s | |
transcriber = pipeline( | |
"automatic-speech-recognition", | |
model=MODEL_NAME, | |
chunk_length_s=30, | |
device=DEVICE | |
) | |
def transcribe(audio): | |
result = '' | |
try: | |
sr, y = audio | |
y = y.astype(np.float32) | |
y /= np.max(np.abs(y)) | |
text = transcriber( | |
{"sampling_rate": sr, "raw": y}, | |
generate_kwargs={"language":"<|th|>", "task":"transcribe"}, | |
return_timestamps=False, | |
batch_size=16 | |
)["text"] | |
if text is not None: | |
# pretty text | |
tokens = word_tokenize(text, engine="attacut", join_broken_num=True) | |
print(tokens) | |
result = f'pretty: {thw.pretty(deque(deepcopy(tokens)))}\n\n original: {text}' | |
else: | |
result = 'โปรดลองพูดอีกครั้ง' | |
except Exception as e: | |
result = f'ไม่สามารถแปลงข้อความเสียงได้ โปรดลองอีกครั้ง\n\nพบข้อผิดพลาด: {str(e)}' | |
return result | |
demo = gr.Interface( | |
transcribe, | |
gr.Audio(sources=["microphone"]), | |
"text", | |
) | |
demo.launch() |