#from transformers import pipeline #import streamlit as st #pipe = pipeline('sentiment-analysis') #text = st.text_area('Enter some text here!') #if text: # out = pipe(text) # st.json(out) from transformers import pipeline import torch classifier = pipeline( "audio-classification", model="MIT/ast-finetuned-speech-commands-v2", device=device ) from transformers.pipelines.audio_utils import ffmpeg_microphone_live def launch_fn( wake_word="marvin", prob_threshold=0.5, chunk_length_s=2.0, stream_chunk_s=1, debug=False, ): if wake_word not in classifier.model.config.label2id.keys(): raise ValueError( f"Wake word {wake_word} not in set of valid class labels, pick a wake word in the set {classifier.model.config.label2id.keys()}." ) sampling_rate = classifier.feature_extractor.sampling_rate mic = ffmpeg_microphone_live( sampling_rate=sampling_rate, chunk_length_s=chunk_length_s, stream_chunk_s=stream_chunk_s, ) print("Listening for wake word...") mic_results = classifier(mic) for prediction in mic_results: prediction = prediction[0] if debug: print(prediction) if prediction["label"] == wake_word: if prediction["score"] > prob_threshold: return True launch_fn(debug=True)