Spaces:
Runtime error
Runtime error
Merge pull request #1 from VFluger/fix_dependencies
Browse files- README.md +5 -3
- TODO.md +0 -20
- app.py +4 -3
- environment.yml +3 -1
- requirements.txt +1 -0
README.md
CHANGED
|
@@ -16,9 +16,11 @@ Classifies the dance style that best accompanies a provided song. Users record o
|
|
| 16 |
|
| 17 |
## Getting Started
|
| 18 |
|
| 19 |
-
1.
|
| 20 |
-
2.
|
| 21 |
-
3.
|
|
|
|
|
|
|
| 22 |
|
| 23 |
## Training
|
| 24 |
|
|
|
|
| 16 |
|
| 17 |
## Getting Started
|
| 18 |
|
| 19 |
+
1. Clone this repo: `git clone https://github.com/Waidhoferj/dance-classifier`
|
| 20 |
+
2. Download git LSF files: `git lfs pull`
|
| 21 |
+
3. Download dependencies: `conda env create --file environment.yml`
|
| 22 |
+
4. Open environment: `conda activate dancer-classifier`
|
| 23 |
+
5. Start the demo application: `python app.py`
|
| 24 |
|
| 25 |
## Training
|
| 26 |
|
TODO.md
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
- β
Ensure app.py audio input sounds like training data
|
| 2 |
-
- β
Use a huggingface transformer with the dataset
|
| 3 |
-
- Verify that the training spectrogram matches the predict spectrogram
|
| 4 |
-
- Count number of example misses in dataset loading
|
| 5 |
-
- Verify windowing and jitter params in Song Dataset
|
| 6 |
-
- Create an attention-based network
|
| 7 |
-
- β
Increase parameter count in network
|
| 8 |
-
- Verify that labels really match what is on the music4dance site
|
| 9 |
-
- β
Read the Medium series about audio DL
|
| 10 |
-
- double check \_rectify_duration
|
| 11 |
-
- β
Filter out songs that have only one vote
|
| 12 |
-
- β
Download songs from [Best Ballroom](https://www.youtube.com/channel/UC0bYSnzAFMwPiEjmVsrvmRg)
|
| 13 |
-
|
| 14 |
-
- β
fix nan values
|
| 15 |
-
- Try higher mels (224) and more ffts (2048)
|
| 16 |
-
- Verify random sample of dataset outputs by hand.
|
| 17 |
-
|
| 18 |
-
- Train with non music data and add a non music category
|
| 19 |
-
- Add back class weights
|
| 20 |
-
- Add back multi label classification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -85,9 +85,10 @@ class DancePredictor:
|
|
| 85 |
if waveform.ndim == 1:
|
| 86 |
waveform = np.stack([waveform, waveform]).T
|
| 87 |
waveform = torch.from_numpy(waveform.T)
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
| 91 |
|
| 92 |
waveform = torchaudio.functional.resample(
|
| 93 |
waveform, sample_rate, self.resample_frequency
|
|
|
|
| 85 |
if waveform.ndim == 1:
|
| 86 |
waveform = np.stack([waveform, waveform]).T
|
| 87 |
waveform = torch.from_numpy(waveform.T)
|
| 88 |
+
# Convert to proper format instead of using deprecated apply_codec
|
| 89 |
+
# The apply_codec was mainly used for format conversion, but since we're already
|
| 90 |
+
# working with tensor data, we can skip this step
|
| 91 |
+
waveform = waveform.float()
|
| 92 |
|
| 93 |
waveform = torchaudio.functional.resample(
|
| 94 |
waveform, sample_rate, self.resample_frequency
|
environment.yml
CHANGED
|
@@ -9,7 +9,9 @@ dependencies:
|
|
| 9 |
- pytorch
|
| 10 |
- torchaudio
|
| 11 |
- librosa
|
| 12 |
-
- numpy
|
|
|
|
|
|
|
| 13 |
- pandas
|
| 14 |
- bs4
|
| 15 |
- requests
|
|
|
|
| 9 |
- pytorch
|
| 10 |
- torchaudio
|
| 11 |
- librosa
|
| 12 |
+
- numpy<2
|
| 13 |
+
- sounddevice
|
| 14 |
+
- gradio
|
| 15 |
- pandas
|
| 16 |
- bs4
|
| 17 |
- requests
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
torch
|
| 2 |
torchaudio
|
| 3 |
pytorch-lightning
|
|
|
|
| 1 |
+
sounddevice
|
| 2 |
torch
|
| 3 |
torchaudio
|
| 4 |
pytorch-lightning
|