github-actions[bot] commited on
Commit
9df2e22
0 Parent(s):

Sync to HuggingFace Spaces

Browse files
Files changed (15) hide show
  1. .gitattributes +3 -0
  2. .github/workflows/sync.yaml +29 -0
  3. .gitignore +163 -0
  4. Dockerfile +23 -0
  5. LICENSE +21 -0
  6. README.md +13 -0
  7. app.py +122 -0
  8. asset/don.wav +3 -0
  9. asset/ka.wav +3 -0
  10. model.py +72 -0
  11. odcnn.py +87 -0
  12. preprocess.py +163 -0
  13. requirements.txt +11 -0
  14. synthesize.py +148 -0
  15. youtube.py +43 -0
.gitattributes ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Auto detect text files and perform LF normalization
2
+ * text=auto
3
+ *.wav filter=lfs diff=lfs merge=lfs -text
.github/workflows/sync.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face Spaces
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync:
11
+ name: Sync
12
+ runs-on: ubuntu-latest
13
+
14
+ steps:
15
+ - name: Checkout Repository
16
+ uses: actions/checkout@v4
17
+ with:
18
+ lfs: true
19
+
20
+ - name: Sync to Hugging Face Spaces
21
+ uses: JacobLinCool/huggingface-sync@v1
22
+ with:
23
+ github: ${{ secrets.GITHUB_TOKEN }}
24
+ user: jacoblincool # Hugging Face username or organization name
25
+ space: tja-generator # Hugging Face space name
26
+ token: ${{ secrets.HF_TOKEN }} # Hugging Face token
27
+ title: "TJA Generator"
28
+ sdk: "gradio"
29
+ models: "[ JacobLinCool/odcnn-320k-100 ]"
.gitignore ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ models/
163
+ flagged/
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.11
5
+
6
+ RUN useradd -m -u 1000 user
7
+
8
+ WORKDIR /app
9
+
10
+ RUN apt update && apt install -y curl libsndfile1 ffmpeg
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+
14
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
15
+
16
+ COPY --chown=user . /app
17
+
18
+ RUN mkdir -p /app/models && \
19
+ curl -L https://huggingface.co/JacobLinCool/odcnn-320k-100/resolve/main/don_model.pth -o /app/models/don_model.pth && \
20
+ curl -L https://huggingface.co/JacobLinCool/odcnn-320k-100/resolve/main/ka_model.pth -o /app/models/ka_model.pth && \
21
+ chown -R user /app/models
22
+
23
+ CMD ["python", "app.py"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 JacobLinCool
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: TJA Generator
3
+ emoji: 🦄
4
+ colorFrom: indigo
5
+ colorTo: pink
6
+ sdk: gradio
7
+ short_description: HuggingFace Space for https://github.com/seiichiinoue/odcnn
8
+ models: [ JacobLinCool/odcnn-320k-100 ]
9
+ ---
10
+
11
+ # TJA Generator
12
+
13
+ Use [ODCNN](https://github.com/seiichiinoue/odcnn) to generate TJA from music.
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from tempfile import NamedTemporaryFile
3
+ from typing import Tuple
4
+ from zipfile import ZipFile
5
+
6
+ import gradio as gr
7
+ from accelerate import Accelerator
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ from odcnn import ODCNN
11
+ from youtube import youtube
12
+
13
+ accelerator = Accelerator()
14
+ device = accelerator.device
15
+
16
+ DON_MODEL = hf_hub_download(
17
+ repo_id="JacobLinCool/odcnn-320k-100", filename="don_model.pth"
18
+ )
19
+ KA_MODEL = hf_hub_download(
20
+ repo_id="JacobLinCool/odcnn-320k-100", filename="ka_model.pth"
21
+ )
22
+
23
+
24
+ models = {"odcnn-320k-100": ODCNN(DON_MODEL, KA_MODEL, device)}
25
+
26
+
27
+ def run(file: str, model: str, delta: float, trim: bool) -> Tuple[str, str, str]:
28
+ preview, tja = models[model].run(file, delta, trim)
29
+
30
+ with NamedTemporaryFile(
31
+ "w", suffix=".tja", delete=True
32
+ ) as tjafile, NamedTemporaryFile("w", suffix=".zip", delete=False) as zfile:
33
+ tjafile.write(tja)
34
+
35
+ with ZipFile(zfile.name, "w") as z:
36
+ z.write(file, os.path.basename(file))
37
+ z.write(tjafile.name, f"{os.path.basename(file)}-{model}.tja")
38
+
39
+ return preview, tja, zfile.name
40
+
41
+
42
+ def from_youtube(
43
+ url: str, model: str, delta: float, trim: bool
44
+ ) -> Tuple[str, str, str, str]:
45
+ audio = youtube(url)
46
+ return audio, *run(audio, model, delta, trim)
47
+
48
+
49
+ with gr.Blocks() as app:
50
+ with open(os.path.join(os.path.dirname(__file__), "README.md"), "r") as f:
51
+ README = f.read()
52
+ # remove yaml front matter
53
+ blocks = README.split("---")
54
+ if len(blocks) > 1:
55
+ README = "---".join(blocks[2:])
56
+
57
+ gr.Markdown(README)
58
+
59
+ with gr.Row():
60
+ with gr.Column():
61
+ gr.Markdown("## Upload an audio file")
62
+ audio = gr.Audio(label="Upload an audio file", type="filepath")
63
+ with gr.Column():
64
+ gr.Markdown(
65
+ "## or use a YouTube URL\n\nTry something on [The First Take](https://www.youtube.com/@The_FirstTake)?"
66
+ )
67
+ yt = gr.Textbox(
68
+ label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."
69
+ )
70
+ yt_btn = gr.Button("Use this YouTube URL")
71
+
72
+ with gr.Row():
73
+ model = gr.Radio(
74
+ label="Select a model",
75
+ choices=[s for s in models.keys()],
76
+ value="odcnn-320k-100",
77
+ )
78
+ btn = gr.Button("Infer", variant="primary")
79
+
80
+ with gr.Row():
81
+ with gr.Column():
82
+ synthesized = gr.Audio(
83
+ label="Synthesized Audio",
84
+ format="mp3",
85
+ type="filepath",
86
+ interactive=False,
87
+ )
88
+ with gr.Column():
89
+ tja = gr.Text(label="TJA", interactive=False)
90
+
91
+ with gr.Row():
92
+ zip = gr.File(label="Download ZIP", type="filepath")
93
+
94
+ with gr.Accordion("Advanced Options", open=False):
95
+ delta = gr.Slider(
96
+ label="Delta",
97
+ value=0.02,
98
+ minimum=0.01,
99
+ maximum=0.5,
100
+ step=0.01,
101
+ info="Threshold for note detection (Ura)",
102
+ )
103
+ trim = gr.Checkbox(
104
+ label="Trim silence",
105
+ value=True,
106
+ info="Trim silence from the start and end of the audio",
107
+ )
108
+
109
+ btn.click(
110
+ fn=run,
111
+ inputs=[audio, model, delta, trim],
112
+ outputs=[synthesized, tja, zip],
113
+ api_name="run",
114
+ )
115
+
116
+ yt_btn.click(
117
+ fn=from_youtube,
118
+ inputs=[yt, model, delta, trim],
119
+ outputs=[audio, synthesized, tja, zip],
120
+ )
121
+
122
+ app.queue().launch(server_name="0.0.0.0")
asset/don.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:268b9e66095a3890652f6791529edb37630ad1a0c2ad550cddb4cb105895884d
3
+ size 171118
asset/ka.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89cb09280bb84a039b32e33e60a0961811dc1419d6d9ca18771f545cb7414939
3
+ size 52652
model.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ from tqdm import tqdm
6
+
7
+ from preprocess import *
8
+
9
+
10
+ class convNet(nn.Module):
11
+ """
12
+ copies the neural net used in a paper.
13
+ "Improved musical onset detection with Convolutional Neural Networks".
14
+ src: https://ieeexplore.ieee.org/document/6854953
15
+ """
16
+
17
+ def __init__(self):
18
+ super(convNet, self).__init__()
19
+ # model
20
+ self.conv1 = nn.Conv2d(3, 10, (3, 7))
21
+ self.conv2 = nn.Conv2d(10, 20, 3)
22
+ self.fc1 = nn.Linear(1120, 256)
23
+ self.fc2 = nn.Linear(256, 120)
24
+ self.fc3 = nn.Linear(120, 1)
25
+
26
+ def forward(self, x, istraining=False, minibatch=1):
27
+ x = F.max_pool2d(F.relu(self.conv1(x)), (3, 1))
28
+ x = F.max_pool2d(F.relu(self.conv2(x)), (3, 1))
29
+ x = F.dropout(x.view(minibatch, -1), training=istraining)
30
+ x = F.dropout(F.relu(self.fc1(x)), training=istraining)
31
+ x = F.dropout(F.relu(self.fc2(x)), training=istraining)
32
+
33
+ return F.sigmoid(self.fc3(x))
34
+
35
+ def infer_data_builder(self, feats, soundlen=15, minibatch=1):
36
+ x = []
37
+
38
+ for i in range(feats.shape[2] - soundlen):
39
+ x.append(feats[:, :, i : i + soundlen])
40
+
41
+ if (i + 1) % minibatch == 0:
42
+ yield (torch.from_numpy(np.array(x)).float())
43
+ x = []
44
+
45
+ if len(x) != 0:
46
+ yield (torch.from_numpy(np.array(x)).float())
47
+
48
+ def infer(self, feats, device, minibatch=1):
49
+ with torch.no_grad():
50
+ inference = None
51
+ for x in tqdm(
52
+ self.infer_data_builder(feats, minibatch=minibatch),
53
+ total=feats.shape[2] // minibatch,
54
+ ):
55
+ output = self(x.to(device), minibatch=x.shape[0])
56
+ if inference is not None:
57
+ inference = np.concatenate(
58
+ (inference, output.cpu().numpy().reshape(-1))
59
+ )
60
+ else:
61
+ inference = output.cpu().numpy().reshape(-1)
62
+
63
+ return np.array(inference).reshape(-1)
64
+
65
+
66
+ if __name__ == "__main__":
67
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
68
+ net = convNet()
69
+ net = net.to(device)
70
+
71
+ print(net)
72
+ print("parameters: ", sum(p.numel() for p in net.parameters()))
odcnn.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ from typing import Tuple
3
+ import numpy as np
4
+ import soundfile as sf
5
+ import torch
6
+ from pathlib import Path
7
+
8
+ from model import convNet
9
+ from preprocess import Audio, fft_and_melscale
10
+ from synthesize import create_tja, detect, synthesize
11
+
12
+
13
+ def trim_silence(data: np.ndarray, sr: int):
14
+ start = 0
15
+ end = len(data) - 1
16
+ while start < len(data) and np.abs(data[start]) < 0.2:
17
+ start += 1
18
+ while end > 0 and np.abs(data[end]) < 0.1:
19
+ end -= 1
20
+ start = max(start - sr * 3, 0)
21
+ end = min(end + sr * 3, len(data))
22
+ print(
23
+ f"Trimming {start/sr} seconds from the start and {end/sr} seconds from the end"
24
+ )
25
+ data = data[start:end]
26
+ return data
27
+
28
+
29
+ class ODCNN:
30
+ def __init__(self, don_model: str, ka_model: str, device: torch.device = "cpu"):
31
+ donNet = convNet()
32
+ donNet = donNet.to(device)
33
+ donNet.load_state_dict(torch.load(don_model, map_location="cpu"))
34
+ self.donNet = donNet
35
+
36
+ kaNet = convNet()
37
+ kaNet = kaNet.to(device)
38
+ kaNet.load_state_dict(torch.load(ka_model, map_location="cpu"))
39
+ self.kaNet = kaNet
40
+
41
+ self.device = device
42
+
43
+ def run(self, file: str, delta=0.05, trim=True) -> Tuple[str, str]:
44
+ data, sr = sf.read(file, always_2d=True)
45
+ song = Audio(data, sr)
46
+ song.data = song.data.mean(axis=1)
47
+ if trim:
48
+ song.data = trim_silence(song.data, sr)
49
+
50
+ song.feats = fft_and_melscale(
51
+ song,
52
+ nhop=512,
53
+ nffts=[1024, 2048, 4096],
54
+ mel_nband=80,
55
+ mel_freqlo=27.5,
56
+ mel_freqhi=16000.0,
57
+ )
58
+
59
+ don_inference = self.donNet.infer(song.feats, self.device, minibatch=4192)
60
+ don_inference = np.reshape(don_inference, (-1))
61
+
62
+ ka_inference = self.kaNet.infer(song.feats, self.device, minibatch=4192)
63
+ ka_inference = np.reshape(ka_inference, (-1))
64
+
65
+ easy_detection = detect(don_inference, ka_inference, delta=0.25)
66
+ normal_detection = detect(don_inference, ka_inference, delta=0.2)
67
+ hard_detection = detect(don_inference, ka_inference, delta=0.15)
68
+ oni_detection = detect(don_inference, ka_inference, delta=0.075)
69
+ ura_detection = detect(don_inference, ka_inference, delta=delta)
70
+
71
+ synthesized_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
72
+ synthesize(*hard_detection, song, synthesized_path)
73
+ file = Path(file)
74
+ tja = create_tja(
75
+ song,
76
+ timestamps=[
77
+ easy_detection,
78
+ normal_detection,
79
+ hard_detection,
80
+ oni_detection,
81
+ ura_detection,
82
+ ],
83
+ title=file.stem,
84
+ wave=file.name,
85
+ )
86
+
87
+ return synthesized_path, tja
preprocess.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+ import soundfile as sf
4
+ from librosa.filters import mel
5
+ from scipy import signal
6
+ from scipy.fftpack import fft
7
+
8
+
9
+ class Audio:
10
+ """
11
+ audio class which holds music data and timestamp for notes.
12
+
13
+ Args:
14
+ filename: file name.
15
+ stereo: True or False; wether you have Don/Ka streo file or not. normaly True.
16
+ Variables:
17
+
18
+
19
+ Example:
20
+ >>>from music_processor import *
21
+ >>>song = Audio(filename)
22
+ >>># to get audio data
23
+ >>>song.data
24
+ >>># to import .tja files:
25
+ >>>song.import_tja(filename)
26
+ >>># to get data converted
27
+ >>>song.data = (song.data[:,0]+song.data[:,1])/2
28
+ >>>fft_and_melscale(song, include_zero_cross=False)
29
+ """
30
+
31
+ def __init__(self, data, samplerate, stereo=True):
32
+ self.data = data
33
+ self.samplerate = samplerate
34
+ if stereo is False:
35
+ self.data = (self.data[:, 0] + self.data[:, 1]) / 2
36
+ self.timestamp = []
37
+
38
+ def plotaudio(self, start_t, stop_t):
39
+ plt.plot(
40
+ np.linspace(start_t, stop_t, stop_t - start_t), self.data[start_t:stop_t, 0]
41
+ )
42
+ plt.show()
43
+
44
+ def save(self, filename, start_t=0, stop_t=None):
45
+ if stop_t is None:
46
+ stop_t = self.data.shape[0]
47
+ sf.write(filename, self.data[start_t:stop_t], self.samplerate)
48
+
49
+ def synthesize(self, diff=True, don="./asset/don.wav", ka="./asset/ka.wav"):
50
+ donsound = sf.read(don)[0]
51
+ donsound = (donsound[:, 0] + donsound[:, 1]) / 2
52
+ kasound = sf.read(ka)[0]
53
+ kasound = (kasound[:, 0] + kasound[:, 1]) / 2
54
+ donlen = len(donsound)
55
+ kalen = len(kasound)
56
+
57
+ if diff is True:
58
+ for stamp in self.timestamp:
59
+ timing = int(stamp[0] * self.samplerate)
60
+ try:
61
+ if stamp[1] in (1, 3, 5, 6, 7):
62
+ self.data[timing : timing + donlen] += donsound
63
+ elif stamp[1] in (2, 4):
64
+ self.data[timing : timing + kalen] += kasound
65
+ except ValueError:
66
+ pass
67
+
68
+ elif diff == "don":
69
+ if isinstance(self.timestamp[0], tuple):
70
+ for stamp in self.timestamp:
71
+ if stamp * self.samplerate + donlen < self.data.shape[0]:
72
+ self.data[
73
+ int(stamp[0] * self.samplerate) : int(
74
+ stamp[0] * self.samplerate
75
+ )
76
+ + donlen
77
+ ] += donsound
78
+ else:
79
+ for stamp in self.timestamp:
80
+ if stamp * self.samplerate + donlen < self.data.shape[0]:
81
+ self.data[
82
+ int(stamp * self.samplerate) : int(stamp * self.samplerate)
83
+ + donlen
84
+ ] += donsound
85
+
86
+ elif diff == "ka":
87
+ if isinstance(self.timestamp[0], tuple):
88
+ for stamp in self.timestamp:
89
+ if stamp * self.samplerate + kalen < self.data.shape[0]:
90
+ self.data[
91
+ int(stamp[0] * self.samplerate) : int(
92
+ stamp[0] * self.samplerate
93
+ )
94
+ + kalen
95
+ ] += kasound
96
+ else:
97
+ for stamp in self.timestamp:
98
+ if stamp * self.samplerate + kalen < self.data.shape[0]:
99
+ self.data[
100
+ int(stamp * self.samplerate) : int(stamp * self.samplerate)
101
+ + kalen
102
+ ] += kasound
103
+
104
+
105
+ def make_frame(data, nhop, nfft):
106
+ """
107
+ helping function for fftandmelscale.
108
+ 細かい時間に切り分けたものを学習データとするため,nhop(512)ずつずらしながらnfftサイズのデータを配列として返す
109
+ """
110
+
111
+ length = data.shape[0]
112
+ framedata = np.concatenate((data, np.zeros(nfft))) # zero padding
113
+ return np.array(
114
+ [framedata[i * nhop : i * nhop + nfft] for i in range(length // nhop)]
115
+ )
116
+
117
+
118
+ # @jit
119
+ def fft_and_melscale(
120
+ song,
121
+ nhop=512,
122
+ nffts=[1024, 2048, 4096],
123
+ mel_nband=80,
124
+ mel_freqlo=27.5,
125
+ mel_freqhi=16000.0,
126
+ include_zero_cross=False,
127
+ ):
128
+ """
129
+ fft and melscale method.
130
+ fft: nfft = [1024, 2048, 4096]; サンプルの切り取る長さを変えながらデータからnp.arrayを抽出して高速フーリエ変換を行う.
131
+ melscale: 周波数の次元を削減するとともに,log10の値を取っている.
132
+ """
133
+
134
+ feat_channels = []
135
+
136
+ for nfft in nffts:
137
+ feats = []
138
+ window = signal.windows.blackmanharris(nfft)
139
+ filt = mel(
140
+ sr=song.samplerate,
141
+ n_fft=nfft,
142
+ n_mels=mel_nband,
143
+ fmin=mel_freqlo,
144
+ fmax=mel_freqhi,
145
+ )
146
+
147
+ # get normal frame
148
+ frame = make_frame(song.data, nhop, nfft)
149
+ # print(frame.shape)
150
+
151
+ # melscaling
152
+ processedframe = fft(window * frame)[:, : nfft // 2 + 1]
153
+ processedframe = np.dot(filt, np.transpose(np.abs(processedframe) ** 2))
154
+ processedframe = 20 * np.log10(processedframe + 0.1)
155
+ # print(processedframe.shape)
156
+
157
+ feat_channels.append(processedframe)
158
+
159
+ if include_zero_cross:
160
+ song.zero_crossing = np.where(np.diff(np.sign(song.data)))[0]
161
+ print(song.zero_crossing)
162
+
163
+ return np.array(feat_channels)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ chainer
3
+ librosa
4
+ matplotlib
5
+ numpy
6
+ soundfile
7
+ torch
8
+ scikit-learn
9
+ yt_dlp
10
+ accelerate
11
+ spaces
synthesize.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ import numpy as np
3
+ from librosa.util import peak_pick
4
+
5
+ from preprocess import *
6
+
7
+
8
+ def smooth(x, window_len=11, window="hanning"):
9
+ if x.ndim != 1:
10
+ raise ValueError
11
+
12
+ if x.size < window_len:
13
+ raise ValueError
14
+
15
+ if window_len < 3:
16
+ return x
17
+
18
+ if not window in ["flat", "hanning", "hamming", "bartlett", "blackman"]:
19
+ raise ValueError
20
+
21
+ s = np.r_[x[window_len - 1 : 0 : -1], x, x[-2 : -window_len - 1 : -1]]
22
+ # print(len(s))
23
+ if window == "flat": # moving average
24
+ w = np.ones(window_len, "d")
25
+ else:
26
+ w = eval("np." + window + "(window_len)")
27
+
28
+ y = np.convolve(w / w.sum(), s, mode="valid")
29
+
30
+ return y
31
+
32
+
33
+ def detect(don_inference, ka_inference, delta=0.05):
34
+ don_inference = smooth(don_inference, 5)
35
+ ka_inference = smooth(ka_inference, 5)
36
+
37
+ don_timestamp = (
38
+ peak_pick(
39
+ x=don_inference,
40
+ pre_max=1,
41
+ post_max=2,
42
+ pre_avg=4,
43
+ post_avg=5,
44
+ delta=delta,
45
+ wait=3,
46
+ )
47
+ + 7
48
+ ) # 実際は7フレーム目のところの音
49
+ ka_timestamp = (
50
+ peak_pick(
51
+ x=ka_inference,
52
+ pre_max=1,
53
+ post_max=2,
54
+ pre_avg=4,
55
+ post_avg=5,
56
+ delta=delta,
57
+ wait=3,
58
+ )
59
+ + 7
60
+ )
61
+
62
+ print(don_timestamp)
63
+ print(ka_timestamp)
64
+
65
+ don_timestamp = don_timestamp[
66
+ np.where(don_inference[don_timestamp] > ka_inference[don_timestamp])
67
+ ]
68
+
69
+ ka_timestamp = ka_timestamp[
70
+ np.where(ka_inference[ka_timestamp] > don_inference[ka_timestamp])
71
+ ]
72
+
73
+ return don_timestamp, ka_timestamp
74
+
75
+
76
+ # def note_to_drumroll(timestamp, max_gap=5, min_note=3):
77
+ # drumroll = []
78
+ # note = 0
79
+ # for i in range(1, len(timestamp)):
80
+ # if timestamp[i] - timestamp[i - 1] <= max_gap:
81
+ # note += 1
82
+ # else:
83
+ # if note >= min_note:
84
+ # drumroll.append((timestamp[i - note - 1], timestamp[i - 1]))
85
+ # note = 0
86
+ # if note >= min_note:
87
+ # drumroll.append((timestamp[-note - 1], timestamp[-1]))
88
+ # return drumroll
89
+
90
+ def synthesize(don_timestamp, ka_timestamp, song, filepath):
91
+ song.don_timestamp = don_timestamp
92
+ song.timestamp = song.don_timestamp * 512 / song.samplerate
93
+ # print(len(song.timestamp))
94
+ song.synthesize(diff="don")
95
+
96
+ # song.ka_timestamp = song.don_timestamp
97
+ song.ka_timestamp = ka_timestamp
98
+ song.timestamp = song.ka_timestamp * 512 / song.samplerate
99
+ # print(len(song.timestamp))
100
+ song.synthesize(diff="ka")
101
+
102
+ song.save(filepath)
103
+
104
+
105
+ def create_tja(
106
+ song,
107
+ timestamps: List[tuple],
108
+ title="untitled",
109
+ subtitle="--",
110
+ wave="untitled.ogg",
111
+ safezone=2,
112
+ ):
113
+ tja = f"TITLE: {title}\nSUBTITLE: {subtitle}\nBPM: 240\nWAVE:{wave}\nOFFSET:0\n\n"
114
+
115
+ for i, (don, ka) in enumerate(timestamps):
116
+ try:
117
+ level = [3, 5, 7, 8, 9][i]
118
+ scroll = [0.6, 0.7, 0.8, 0.9, 1.0][i]
119
+
120
+ don_timestamp = np.rint(don * 512 / song.samplerate * 100).astype(np.int32)
121
+ ka_timestamp = np.rint(ka * 512 / song.samplerate * 100).astype(np.int32)
122
+ length = np.max(
123
+ (
124
+ don_timestamp[-1] if don_timestamp.size > 0 else 0,
125
+ ka_timestamp[-1] if ka_timestamp.size > 0 else 0,
126
+ )
127
+ )
128
+ safezone_keep = 0
129
+ tja += f"COURSE:{i}\nLEVEL:{level}\n\n#START\n#SCROLL {scroll}\n"
130
+ for time in range(length):
131
+ if np.isin(time, don_timestamp) == True and safezone_keep <= 0:
132
+ tja += "1"
133
+ safezone_keep = safezone
134
+ elif np.isin(time, ka_timestamp) == True and safezone_keep <= 0:
135
+ tja += "2"
136
+ safezone_keep = safezone
137
+ else:
138
+ tja += "0"
139
+ safezone_keep -= 1
140
+ if time % 100 == 99:
141
+ tja += ",\n"
142
+ if length % 100 != 0:
143
+ tja += "0" * (100 - (length % 100)) + ",\n"
144
+ tja += "#END\n\n"
145
+ except:
146
+ pass
147
+
148
+ return tja
youtube.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ import os
3
+ import shutil
4
+ import tempfile
5
+
6
+ import gradio as gr
7
+ import yt_dlp
8
+ from gradio_client import Client
9
+
10
+
11
+ def youtube(url: str) -> str:
12
+ if not url:
13
+ raise gr.Error("Please input a YouTube URL")
14
+
15
+ hash = hashlib.md5(url.encode()).hexdigest()
16
+ tmp_file = os.path.join(tempfile.gettempdir(), f"{hash}")
17
+
18
+ try:
19
+ ydl_opts = {
20
+ "format": "bestaudio/best",
21
+ "outtmpl": tmp_file,
22
+ "postprocessors": [
23
+ {
24
+ "key": "FFmpegExtractAudio",
25
+ "preferredcodec": "mp3",
26
+ "preferredquality": "192",
27
+ }
28
+ ],
29
+ }
30
+
31
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
32
+ ydl.download([url])
33
+ except Exception as e:
34
+ print(e)
35
+ try:
36
+ ytdl = Client("JacobLinCool/yt-dlp")
37
+ file = ytdl.predict(api_name="/download", url=url)
38
+ shutil.move(file, tmp_file + ".mp3")
39
+ except Exception as e:
40
+ print(e)
41
+ raise gr.Error(f"Failed to download YouTube audio from {url}")
42
+
43
+ return tmp_file + ".mp3"