Enric Perpinyà Pitarch commited on
Commit
aa2cd6e
1 Parent(s): 5b6c323

Init commit

Browse files
Files changed (2) hide show
  1. app.py +42 -0
  2. requirements.txt +101 -0
app.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sclib import SoundcloudAPI, Track
2
+ from transformers import pipeline
3
+ import gradio as gr
4
+ import numpy as np
5
+ import librosa
6
+ import os
7
+
8
+ transcriber = pipeline("automatic-speech-recognition", model="ID2223/whisper-small-swedish")
9
+
10
+ # do not pass a Soundcloud client ID that did not come from this library, but you can save a client_id that this lib found and reuse it
11
+ api = SoundcloudAPI()
12
+
13
+ def get_audio(url: str):
14
+ filename = url_to_file(url)
15
+ audio_original, sr = librosa.load(filename, sr=16000)
16
+ audio_original = audio_original[:30*sr]
17
+ os.remove(filename)
18
+ return (sr, audio_original)
19
+
20
+ def url_to_file(url: str) -> str:
21
+ track = api.resolve(url)
22
+ assert type(track) is Track
23
+ filename = "to_transcribe.mp3"
24
+ with open(filename, 'wb+') as file:
25
+ track.write_mp3_to(file)
26
+ return filename
27
+
28
+ def transcribe(url: str):
29
+ sr, audio_original = get_audio(url)
30
+ audio = audio_original.astype(np.float32)
31
+ audio /= np.max(np.abs(audio))
32
+ return ((sr, audio_original), transcriber({"sampling_rate": sr, "raw": audio})["text"])
33
+
34
+
35
+ demo = gr.Interface(
36
+ transcribe,
37
+ inputs = [gr.Textbox(label="Soundcloud URL", value="https://soundcloud.com/user-668319862/sa-gar-borsen-2024")],
38
+ outputs = ["audio", "text"],
39
+ title="Whisper Soundcloud",
40
+ )
41
+
42
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.9.1
3
+ aiosignal==1.3.1
4
+ altair==5.2.0
5
+ annotated-types==0.6.0
6
+ anyio==3.7.1
7
+ attrs==23.1.0
8
+ audioread==3.0.1
9
+ beautifulsoup4==4.12.2
10
+ bs4==0.0.1
11
+ certifi==2023.11.17
12
+ cffi==1.16.0
13
+ charset-normalizer==3.3.2
14
+ click==8.1.7
15
+ colorama==0.4.6
16
+ contourpy==1.2.0
17
+ cycler==0.12.1
18
+ decorator==5.1.1
19
+ fastapi==0.104.1
20
+ ffmpy==0.3.1
21
+ filelock==3.13.1
22
+ fonttools==4.46.0
23
+ frozenlist==1.4.0
24
+ fsspec==2023.12.1
25
+ gradio==4.8.0
26
+ gradio_client==0.7.1
27
+ h11==0.14.0
28
+ httpcore==1.0.2
29
+ httpx==0.25.2
30
+ huggingface-hub==0.19.4
31
+ idna==3.6
32
+ importlib-resources==6.1.1
33
+ Jinja2==3.1.2
34
+ joblib==1.3.2
35
+ jsonschema==4.20.0
36
+ jsonschema-specifications==2023.11.2
37
+ kiwisolver==1.4.5
38
+ lazy_loader==0.3
39
+ librosa==0.10.1
40
+ llvmlite==0.41.1
41
+ markdown-it-py==3.0.0
42
+ MarkupSafe==2.1.3
43
+ matplotlib==3.8.2
44
+ mdurl==0.1.2
45
+ mpmath==1.3.0
46
+ msgpack==1.0.7
47
+ multidict==6.0.4
48
+ mutagen==1.47.0
49
+ networkx==3.2.1
50
+ numba==0.58.1
51
+ numpy==1.26.2
52
+ orjson==3.9.10
53
+ packaging==23.2
54
+ pandas==2.1.4
55
+ Pillow==10.1.0
56
+ platformdirs==4.1.0
57
+ pooch==1.8.0
58
+ pycparser==2.21
59
+ pydantic==2.5.2
60
+ pydantic_core==2.14.5
61
+ pydub==0.25.1
62
+ Pygments==2.17.2
63
+ pyparsing==3.1.1
64
+ python-dateutil==2.8.2
65
+ python-multipart==0.0.6
66
+ pytz==2023.3.post1
67
+ PyYAML==6.0.1
68
+ referencing==0.32.0
69
+ regex==2023.10.3
70
+ requests==2.31.0
71
+ rich==13.7.0
72
+ rpds-py==0.13.2
73
+ safetensors==0.4.1
74
+ scikit-learn==1.3.2
75
+ scipy==1.11.4
76
+ semantic-version==2.10.0
77
+ shellingham==1.5.4
78
+ six==1.16.0
79
+ sniffio==1.3.0
80
+ soundcloud-lib==0.6.1
81
+ soundfile==0.12.1
82
+ soupsieve==2.5
83
+ soxr==0.3.7
84
+ starlette==0.27.0
85
+ sympy==1.12
86
+ threadpoolctl==3.2.0
87
+ tokenizers==0.15.0
88
+ tomlkit==0.12.0
89
+ toolz==0.12.0
90
+ torch==2.1.1
91
+ torchaudio==2.1.1
92
+ torchvision==0.16.1
93
+ tqdm==4.66.1
94
+ transformers==4.35.2
95
+ typer==0.9.0
96
+ typing_extensions==4.9.0
97
+ tzdata==2023.3
98
+ urllib3==2.1.0
99
+ uvicorn==0.24.0.post1
100
+ websockets==11.0.3
101
+ yarl==1.9.4