Spaces:

JacobLinCool
/

tja-generator

Sleeping

App Files Files Community

github-actions[bot] commited on Aug 29, 2024

Commit

9df2e22

0 Parent(s):

Sync to HuggingFace Spaces

Browse files

Files changed (15) hide show

.gitattributes +3 -0
.github/workflows/sync.yaml +29 -0
.gitignore +163 -0
Dockerfile +23 -0
LICENSE +21 -0
README.md +13 -0
app.py +122 -0
asset/don.wav +3 -0
asset/ka.wav +3 -0
model.py +72 -0
odcnn.py +87 -0
preprocess.py +163 -0
requirements.txt +11 -0
synthesize.py +148 -0
youtube.py +43 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,3 @@

+# Auto detect text files and perform LF normalization
+* text=auto
+*.wav filter=lfs diff=lfs merge=lfs -text

.github/workflows/sync.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+name: Sync to Hugging Face Spaces
+on:
+    push:
+        branches:
+            - main
+    workflow_dispatch:
+jobs:
+    sync:
+        name: Sync
+        runs-on: ubuntu-latest
+        steps:
+            - name: Checkout Repository
+              uses: actions/checkout@v4
+              with:
+                  lfs: true
+            - name: Sync to Hugging Face Spaces
+              uses: JacobLinCool/huggingface-sync@v1
+              with:
+                  github: ${{ secrets.GITHUB_TOKEN }}
+                  user: jacoblincool # Hugging Face username or organization name
+                  space: tja-generator # Hugging Face space name
+                  token: ${{ secrets.HF_TOKEN }} # Hugging Face token
+                  title: "TJA Generator"
+                  sdk: "gradio"
+                  models: "[ JacobLinCool/odcnn-320k-100 ]"

.gitignore ADDED Viewed

	@@ -0,0 +1,163 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+models/
+flagged/

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.11
+RUN useradd -m -u 1000 user
+WORKDIR /app
+RUN apt update && apt install -y curl libsndfile1 ffmpeg
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+RUN mkdir -p /app/models && \
+    curl -L https://huggingface.co/JacobLinCool/odcnn-320k-100/resolve/main/don_model.pth -o /app/models/don_model.pth && \
+    curl -L https://huggingface.co/JacobLinCool/odcnn-320k-100/resolve/main/ka_model.pth -o /app/models/ka_model.pth && \
+    chown -R user /app/models
+CMD ["python", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 JacobLinCool
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: TJA Generator
+emoji: 🦄
+colorFrom: indigo
+colorTo: pink
+sdk: gradio
+short_description: HuggingFace Space for https://github.com/seiichiinoue/odcnn
+models: [ JacobLinCool/odcnn-320k-100 ]
+---
+# TJA Generator
+Use [ODCNN](https://github.com/seiichiinoue/odcnn) to generate TJA from music.

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import os
+from tempfile import NamedTemporaryFile
+from typing import Tuple
+from zipfile import ZipFile
+import gradio as gr
+from accelerate import Accelerator
+from huggingface_hub import hf_hub_download
+from odcnn import ODCNN
+from youtube import youtube
+accelerator = Accelerator()
+device = accelerator.device
+DON_MODEL = hf_hub_download(
+    repo_id="JacobLinCool/odcnn-320k-100", filename="don_model.pth"
+)
+KA_MODEL = hf_hub_download(
+    repo_id="JacobLinCool/odcnn-320k-100", filename="ka_model.pth"
+)
+models = {"odcnn-320k-100": ODCNN(DON_MODEL, KA_MODEL, device)}
+def run(file: str, model: str, delta: float, trim: bool) -> Tuple[str, str, str]:
+    preview, tja = models[model].run(file, delta, trim)
+    with NamedTemporaryFile(
+        "w", suffix=".tja", delete=True
+    ) as tjafile, NamedTemporaryFile("w", suffix=".zip", delete=False) as zfile:
+        tjafile.write(tja)
+        with ZipFile(zfile.name, "w") as z:
+            z.write(file, os.path.basename(file))
+            z.write(tjafile.name, f"{os.path.basename(file)}-{model}.tja")
+    return preview, tja, zfile.name
+def from_youtube(
+    url: str, model: str, delta: float, trim: bool
+) -> Tuple[str, str, str, str]:
+    audio = youtube(url)
+    return audio, *run(audio, model, delta, trim)
+with gr.Blocks() as app:
+    with open(os.path.join(os.path.dirname(__file__), "README.md"), "r") as f:
+        README = f.read()
+        # remove yaml front matter
+        blocks = README.split("---")
+        if len(blocks) > 1:
+            README = "---".join(blocks[2:])
+    gr.Markdown(README)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("## Upload an audio file")
+            audio = gr.Audio(label="Upload an audio file", type="filepath")
+        with gr.Column():
+            gr.Markdown(
+                "## or use a YouTube URL\n\nTry something on [The First Take](https://www.youtube.com/@The_FirstTake)?"
+            )
+            yt = gr.Textbox(
+                label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."
+            )
+            yt_btn = gr.Button("Use this YouTube URL")
+    with gr.Row():
+        model = gr.Radio(
+            label="Select a model",
+            choices=[s for s in models.keys()],
+            value="odcnn-320k-100",
+        )
+        btn = gr.Button("Infer", variant="primary")
+    with gr.Row():
+        with gr.Column():
+            synthesized = gr.Audio(
+                label="Synthesized Audio",
+                format="mp3",
+                type="filepath",
+                interactive=False,
+            )
+        with gr.Column():
+            tja = gr.Text(label="TJA", interactive=False)
+    with gr.Row():
+        zip = gr.File(label="Download ZIP", type="filepath")
+    with gr.Accordion("Advanced Options", open=False):
+        delta = gr.Slider(
+            label="Delta",
+            value=0.02,
+            minimum=0.01,
+            maximum=0.5,
+            step=0.01,
+            info="Threshold for note detection (Ura)",
+        )
+        trim = gr.Checkbox(
+            label="Trim silence",
+            value=True,
+            info="Trim silence from the start and end of the audio",
+        )
+    btn.click(
+        fn=run,
+        inputs=[audio, model, delta, trim],
+        outputs=[synthesized, tja, zip],
+        api_name="run",
+    )
+    yt_btn.click(
+        fn=from_youtube,
+        inputs=[yt, model, delta, trim],
+        outputs=[audio, synthesized, tja, zip],
+    )
+app.queue().launch(server_name="0.0.0.0")

asset/don.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:268b9e66095a3890652f6791529edb37630ad1a0c2ad550cddb4cb105895884d
+size 171118

asset/ka.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89cb09280bb84a039b32e33e60a0961811dc1419d6d9ca18771f545cb7414939
+size 52652

model.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from tqdm import tqdm
+from preprocess import *
+class convNet(nn.Module):
+    """
+    copies the neural net used in a paper.
+    "Improved musical onset detection with Convolutional Neural Networks".
+    src: https://ieeexplore.ieee.org/document/6854953
+    """
+    def __init__(self):
+        super(convNet, self).__init__()
+        # model
+        self.conv1 = nn.Conv2d(3, 10, (3, 7))
+        self.conv2 = nn.Conv2d(10, 20, 3)
+        self.fc1 = nn.Linear(1120, 256)
+        self.fc2 = nn.Linear(256, 120)
+        self.fc3 = nn.Linear(120, 1)
+    def forward(self, x, istraining=False, minibatch=1):
+        x = F.max_pool2d(F.relu(self.conv1(x)), (3, 1))
+        x = F.max_pool2d(F.relu(self.conv2(x)), (3, 1))
+        x = F.dropout(x.view(minibatch, -1), training=istraining)
+        x = F.dropout(F.relu(self.fc1(x)), training=istraining)
+        x = F.dropout(F.relu(self.fc2(x)), training=istraining)
+        return F.sigmoid(self.fc3(x))
+    def infer_data_builder(self, feats, soundlen=15, minibatch=1):
+        x = []
+        for i in range(feats.shape[2] - soundlen):
+            x.append(feats[:, :, i : i + soundlen])
+            if (i + 1) % minibatch == 0:
+                yield (torch.from_numpy(np.array(x)).float())
+                x = []
+        if len(x) != 0:
+            yield (torch.from_numpy(np.array(x)).float())
+    def infer(self, feats, device, minibatch=1):
+        with torch.no_grad():
+            inference = None
+            for x in tqdm(
+                self.infer_data_builder(feats, minibatch=minibatch),
+                total=feats.shape[2] // minibatch,
+            ):
+                output = self(x.to(device), minibatch=x.shape[0])
+                if inference is not None:
+                    inference = np.concatenate(
+                        (inference, output.cpu().numpy().reshape(-1))
+                    )
+                else:
+                    inference = output.cpu().numpy().reshape(-1)
+            return np.array(inference).reshape(-1)
+if __name__ == "__main__":
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    net = convNet()
+    net = net.to(device)
+    print(net)
+    print("parameters: ", sum(p.numel() for p in net.parameters()))

odcnn.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import tempfile
+from typing import Tuple
+import numpy as np
+import soundfile as sf
+import torch
+from pathlib import Path
+from model import convNet
+from preprocess import Audio, fft_and_melscale
+from synthesize import create_tja, detect, synthesize
+def trim_silence(data: np.ndarray, sr: int):
+    start = 0
+    end = len(data) - 1
+    while start < len(data) and np.abs(data[start]) < 0.2:
+        start += 1
+    while end > 0 and np.abs(data[end]) < 0.1:
+        end -= 1
+    start = max(start - sr * 3, 0)
+    end = min(end + sr * 3, len(data))
+    print(
+        f"Trimming {start/sr} seconds from the start and {end/sr} seconds from the end"
+    )
+    data = data[start:end]
+    return data
+class ODCNN:
+    def __init__(self, don_model: str, ka_model: str, device: torch.device = "cpu"):
+        donNet = convNet()
+        donNet = donNet.to(device)
+        donNet.load_state_dict(torch.load(don_model, map_location="cpu"))
+        self.donNet = donNet
+        kaNet = convNet()
+        kaNet = kaNet.to(device)
+        kaNet.load_state_dict(torch.load(ka_model, map_location="cpu"))
+        self.kaNet = kaNet
+        self.device = device
+    def run(self, file: str, delta=0.05, trim=True) -> Tuple[str, str]:
+        data, sr = sf.read(file, always_2d=True)
+        song = Audio(data, sr)
+        song.data = song.data.mean(axis=1)
+        if trim:
+            song.data = trim_silence(song.data, sr)
+        song.feats = fft_and_melscale(
+            song,
+            nhop=512,
+            nffts=[1024, 2048, 4096],
+            mel_nband=80,
+            mel_freqlo=27.5,
+            mel_freqhi=16000.0,
+        )
+        don_inference = self.donNet.infer(song.feats, self.device, minibatch=4192)
+        don_inference = np.reshape(don_inference, (-1))
+        ka_inference = self.kaNet.infer(song.feats, self.device, minibatch=4192)
+        ka_inference = np.reshape(ka_inference, (-1))
+        easy_detection = detect(don_inference, ka_inference, delta=0.25)
+        normal_detection = detect(don_inference, ka_inference, delta=0.2)
+        hard_detection = detect(don_inference, ka_inference, delta=0.15)
+        oni_detection = detect(don_inference, ka_inference, delta=0.075)
+        ura_detection = detect(don_inference, ka_inference, delta=delta)
+        synthesized_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
+        synthesize(*hard_detection, song, synthesized_path)
+        file = Path(file)
+        tja = create_tja(
+            song,
+            timestamps=[
+                easy_detection,
+                normal_detection,
+                hard_detection,
+                oni_detection,
+                ura_detection,
+            ],
+            title=file.stem,
+            wave=file.name,
+        )
+        return synthesized_path, tja

preprocess.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import matplotlib.pyplot as plt
+import numpy as np
+import soundfile as sf
+from librosa.filters import mel
+from scipy import signal
+from scipy.fftpack import fft
+class Audio:
+    """
+    audio class which holds music data and timestamp for notes.
+    Args:
+        filename: file name.
+        stereo: True or False; wether you have Don/Ka streo file or not. normaly True.
+    Variables:
+    Example:
+        >>>from music_processor import *
+        >>>song = Audio(filename)
+        >>># to get audio data
+        >>>song.data
+        >>># to import .tja files:
+        >>>song.import_tja(filename)
+        >>># to get data converted
+        >>>song.data = (song.data[:,0]+song.data[:,1])/2
+        >>>fft_and_melscale(song, include_zero_cross=False)
+    """
+    def __init__(self, data, samplerate, stereo=True):
+        self.data = data
+        self.samplerate = samplerate
+        if stereo is False:
+            self.data = (self.data[:, 0] + self.data[:, 1]) / 2
+        self.timestamp = []
+    def plotaudio(self, start_t, stop_t):
+        plt.plot(
+            np.linspace(start_t, stop_t, stop_t - start_t), self.data[start_t:stop_t, 0]
+        )
+        plt.show()
+    def save(self, filename, start_t=0, stop_t=None):
+        if stop_t is None:
+            stop_t = self.data.shape[0]
+        sf.write(filename, self.data[start_t:stop_t], self.samplerate)
+    def synthesize(self, diff=True, don="./asset/don.wav", ka="./asset/ka.wav"):
+        donsound = sf.read(don)[0]
+        donsound = (donsound[:, 0] + donsound[:, 1]) / 2
+        kasound = sf.read(ka)[0]
+        kasound = (kasound[:, 0] + kasound[:, 1]) / 2
+        donlen = len(donsound)
+        kalen = len(kasound)
+        if diff is True:
+            for stamp in self.timestamp:
+                timing = int(stamp[0] * self.samplerate)
+                try:
+                    if stamp[1] in (1, 3, 5, 6, 7):
+                        self.data[timing : timing + donlen] += donsound
+                    elif stamp[1] in (2, 4):
+                        self.data[timing : timing + kalen] += kasound
+                except ValueError:
+                    pass
+        elif diff == "don":
+            if isinstance(self.timestamp[0], tuple):
+                for stamp in self.timestamp:
+                    if stamp * self.samplerate + donlen < self.data.shape[0]:
+                        self.data[
+                            int(stamp[0] * self.samplerate) : int(
+                                stamp[0] * self.samplerate
+                            )
+                            + donlen
+                        ] += donsound
+            else:
+                for stamp in self.timestamp:
+                    if stamp * self.samplerate + donlen < self.data.shape[0]:
+                        self.data[
+                            int(stamp * self.samplerate) : int(stamp * self.samplerate)
+                            + donlen
+                        ] += donsound
+        elif diff == "ka":
+            if isinstance(self.timestamp[0], tuple):
+                for stamp in self.timestamp:
+                    if stamp * self.samplerate + kalen < self.data.shape[0]:
+                        self.data[
+                            int(stamp[0] * self.samplerate) : int(
+                                stamp[0] * self.samplerate
+                            )
+                            + kalen
+                        ] += kasound
+            else:
+                for stamp in self.timestamp:
+                    if stamp * self.samplerate + kalen < self.data.shape[0]:
+                        self.data[
+                            int(stamp * self.samplerate) : int(stamp * self.samplerate)
+                            + kalen
+                        ] += kasound
+def make_frame(data, nhop, nfft):
+    """
+    helping function for fftandmelscale.
+    細かい時間に切り分けたものを学習データとするため，nhop(512)ずつずらしながらnfftサイズのデータを配列として返す
+    """
+    length = data.shape[0]
+    framedata = np.concatenate((data, np.zeros(nfft)))  # zero padding
+    return np.array(
+        [framedata[i * nhop : i * nhop + nfft] for i in range(length // nhop)]
+    )
+# @jit
+def fft_and_melscale(
+    song,
+    nhop=512,
+    nffts=[1024, 2048, 4096],
+    mel_nband=80,
+    mel_freqlo=27.5,
+    mel_freqhi=16000.0,
+    include_zero_cross=False,
+):
+    """
+    fft and melscale method.
+    fft: nfft = [1024, 2048, 4096]; サンプルの切り取る長さを変えながらデータからnp.arrayを抽出して高速フーリエ変換を行う．
+    melscale: 周波数の次元を削減するとともに，log10の値を取っている．
+    """
+    feat_channels = []
+    for nfft in nffts:
+        feats = []
+        window = signal.windows.blackmanharris(nfft)
+        filt = mel(
+            sr=song.samplerate,
+            n_fft=nfft,
+            n_mels=mel_nband,
+            fmin=mel_freqlo,
+            fmax=mel_freqhi,
+        )
+        # get normal frame
+        frame = make_frame(song.data, nhop, nfft)
+        # print(frame.shape)
+        # melscaling
+        processedframe = fft(window * frame)[:, : nfft // 2 + 1]
+        processedframe = np.dot(filt, np.transpose(np.abs(processedframe) ** 2))
+        processedframe = 20 * np.log10(processedframe + 0.1)
+        # print(processedframe.shape)
+        feat_channels.append(processedframe)
+    if include_zero_cross:
+        song.zero_crossing = np.where(np.diff(np.sign(song.data)))[0]
+        print(song.zero_crossing)
+    return np.array(feat_channels)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+gradio
+chainer
+librosa
+matplotlib
+numpy
+soundfile
+torch
+scikit-learn
+yt_dlp
+accelerate
+spaces

synthesize.py ADDED Viewed

	@@ -0,0 +1,148 @@

+from typing import List
+import numpy as np
+from librosa.util import peak_pick
+from preprocess import *
+def smooth(x, window_len=11, window="hanning"):
+    if x.ndim != 1:
+        raise ValueError
+    if x.size < window_len:
+        raise ValueError
+    if window_len < 3:
+        return x
+    if not window in ["flat", "hanning", "hamming", "bartlett", "blackman"]:
+        raise ValueError
+    s = np.r_[x[window_len - 1 : 0 : -1], x, x[-2 : -window_len - 1 : -1]]
+    # print(len(s))
+    if window == "flat":  # moving average
+        w = np.ones(window_len, "d")
+    else:
+        w = eval("np." + window + "(window_len)")
+    y = np.convolve(w / w.sum(), s, mode="valid")
+    return y
+def detect(don_inference, ka_inference, delta=0.05):
+    don_inference = smooth(don_inference, 5)
+    ka_inference = smooth(ka_inference, 5)
+    don_timestamp = (
+        peak_pick(
+            x=don_inference,
+            pre_max=1,
+            post_max=2,
+            pre_avg=4,
+            post_avg=5,
+            delta=delta,
+            wait=3,
+        )
+        + 7
+    )  # 実際は7フレーム目のところの音
+    ka_timestamp = (
+        peak_pick(
+            x=ka_inference,
+            pre_max=1,
+            post_max=2,
+            pre_avg=4,
+            post_avg=5,
+            delta=delta,
+            wait=3,
+        )
+        + 7
+    )
+    print(don_timestamp)
+    print(ka_timestamp)
+    don_timestamp = don_timestamp[
+        np.where(don_inference[don_timestamp] > ka_inference[don_timestamp])
+    ]
+    ka_timestamp = ka_timestamp[
+        np.where(ka_inference[ka_timestamp] > don_inference[ka_timestamp])
+    ]
+    return don_timestamp, ka_timestamp
+# def note_to_drumroll(timestamp, max_gap=5, min_note=3):
+#     drumroll = []
+#     note = 0
+#     for i in range(1, len(timestamp)):
+#         if timestamp[i] - timestamp[i - 1] <= max_gap:
+#             note += 1
+#         else:
+#             if note >= min_note:
+#                 drumroll.append((timestamp[i - note - 1], timestamp[i - 1]))
+#             note = 0
+#     if note >= min_note:
+#         drumroll.append((timestamp[-note - 1], timestamp[-1]))
+#     return drumroll
+def synthesize(don_timestamp, ka_timestamp, song, filepath):
+    song.don_timestamp = don_timestamp
+    song.timestamp = song.don_timestamp * 512 / song.samplerate
+    # print(len(song.timestamp))
+    song.synthesize(diff="don")
+    # song.ka_timestamp = song.don_timestamp
+    song.ka_timestamp = ka_timestamp
+    song.timestamp = song.ka_timestamp * 512 / song.samplerate
+    # print(len(song.timestamp))
+    song.synthesize(diff="ka")
+    song.save(filepath)
+def create_tja(
+    song,
+    timestamps: List[tuple],
+    title="untitled",
+    subtitle="--",
+    wave="untitled.ogg",
+    safezone=2,
+):
+    tja = f"TITLE: {title}\nSUBTITLE: {subtitle}\nBPM: 240\nWAVE:{wave}\nOFFSET:0\n\n"
+    for i, (don, ka) in enumerate(timestamps):
+        try:
+            level = [3, 5, 7, 8, 9][i]
+            scroll = [0.6, 0.7, 0.8, 0.9, 1.0][i]
+            don_timestamp = np.rint(don * 512 / song.samplerate * 100).astype(np.int32)
+            ka_timestamp = np.rint(ka * 512 / song.samplerate * 100).astype(np.int32)
+            length = np.max(
+                (
+                    don_timestamp[-1] if don_timestamp.size > 0 else 0,
+                    ka_timestamp[-1] if ka_timestamp.size > 0 else 0,
+                )
+            )
+            safezone_keep = 0
+            tja += f"COURSE:{i}\nLEVEL:{level}\n\n#START\n#SCROLL {scroll}\n"
+            for time in range(length):
+                if np.isin(time, don_timestamp) == True and safezone_keep <= 0:
+                    tja += "1"
+                    safezone_keep = safezone
+                elif np.isin(time, ka_timestamp) == True and safezone_keep <= 0:
+                    tja += "2"
+                    safezone_keep = safezone
+                else:
+                    tja += "0"
+                    safezone_keep -= 1
+                if time % 100 == 99:
+                    tja += ",\n"
+            if length % 100 != 0:
+                tja += "0" * (100 - (length % 100)) + ",\n"
+            tja += "#END\n\n"
+        except:
+            pass
+    return tja

youtube.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import hashlib
+import os
+import shutil
+import tempfile
+import gradio as gr
+import yt_dlp
+from gradio_client import Client
+def youtube(url: str) -> str:
+    if not url:
+        raise gr.Error("Please input a YouTube URL")
+    hash = hashlib.md5(url.encode()).hexdigest()
+    tmp_file = os.path.join(tempfile.gettempdir(), f"{hash}")
+    try:
+        ydl_opts = {
+            "format": "bestaudio/best",
+            "outtmpl": tmp_file,
+            "postprocessors": [
+                {
+                    "key": "FFmpegExtractAudio",
+                    "preferredcodec": "mp3",
+                    "preferredquality": "192",
+                }
+            ],
+        }
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+    except Exception as e:
+        print(e)
+        try:
+            ytdl = Client("JacobLinCool/yt-dlp")
+            file = ytdl.predict(api_name="/download", url=url)
+            shutil.move(file, tmp_file + ".mp3")
+        except Exception as e:
+            print(e)
+            raise gr.Error(f"Failed to download YouTube audio from {url}")
+    return tmp_file + ".mp3"