from sclib import SoundcloudAPI, Track from transformers import pipeline import gradio as gr import numpy as np import librosa import os transcriber = pipeline("automatic-speech-recognition", model="ID2223/whisper-small-swedish") # do not pass a Soundcloud client ID that did not come from this library, but you can save a client_id that this lib found and reuse it api = SoundcloudAPI() def get_audio(url: str): filename = url_to_file(url) audio_original, sr = librosa.load(filename, sr=16000) audio_original = audio_original[:30*sr] os.remove(filename) return (sr, audio_original) def url_to_file(url: str) -> str: track = api.resolve(url) assert type(track) is Track filename = "to_transcribe.mp3" with open(filename, 'wb+') as file: track.write_mp3_to(file) return filename def transcribe(url: str): sr, audio_original = get_audio(url) audio = audio_original.astype(np.float32) audio /= np.max(np.abs(audio)) return ((sr, audio_original), transcriber({"sampling_rate": sr, "raw": audio})["text"]) demo = gr.Interface( transcribe, inputs = [gr.Textbox(label="Soundcloud URL", value="https://soundcloud.com/user-668319862/sa-gar-borsen-2024")], outputs = ["audio", "text"], title="Whisper Soundcloud", ) demo.launch()