ThaiSpeech-to-Text-v1.0 / requirements.txt
Aekanun's picture
rev req
4fceee3
raw
history blame
434 Bytes
# Core dependencies
torch>=2.0.0
transformers>=4.34.0
gradio>=4.13.0
# Audio processing and model dependencies
datasets[audio]>=2.14.0
evaluate>=0.4.0
jiwer>=3.0.0
# Optimization and acceleration
bitsandbytes>=0.41.1
accelerate>=0.24.1
xformers>=0.0.27
# Hugging Face integration
huggingface_hub>=0.19.3
peft
spaces
# Image processing
Pillow>=9.0.0
# Additional required packages for whisper model
librosa
soundfile
ffmpeg-python