|
from optimum.intel.openvino import OVModelForAudioClassification |
|
from transformers import AutoFeatureExtractor, pipeline |
|
|
|
|
|
model_id = "helenai/MIT-ast-finetuned-speech-commands-v2-ov" |
|
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) |
|
model = OVModelForAudioClassification.from_pretrained(model_id) |
|
pipe = pipeline("audio-classification", model=model, feature_extractor=feature_extractor) |
|
result = pipe("https://datasets-server.huggingface.co/assets/speech_commands/--/v0.01/test/38/audio/audio.mp3") |
|
print(result) |
|
|