Spaces:
Running
Running
github-actions[bot]
commited on
Commit
•
9df2e22
0
Parent(s):
Sync to HuggingFace Spaces
Browse files- .gitattributes +3 -0
- .github/workflows/sync.yaml +29 -0
- .gitignore +163 -0
- Dockerfile +23 -0
- LICENSE +21 -0
- README.md +13 -0
- app.py +122 -0
- asset/don.wav +3 -0
- asset/ka.wav +3 -0
- model.py +72 -0
- odcnn.py +87 -0
- preprocess.py +163 -0
- requirements.txt +11 -0
- synthesize.py +148 -0
- youtube.py +43 -0
.gitattributes
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Auto detect text files and perform LF normalization
|
2 |
+
* text=auto
|
3 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
.github/workflows/sync.yaml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to Hugging Face Spaces
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
workflow_dispatch:
|
8 |
+
|
9 |
+
jobs:
|
10 |
+
sync:
|
11 |
+
name: Sync
|
12 |
+
runs-on: ubuntu-latest
|
13 |
+
|
14 |
+
steps:
|
15 |
+
- name: Checkout Repository
|
16 |
+
uses: actions/checkout@v4
|
17 |
+
with:
|
18 |
+
lfs: true
|
19 |
+
|
20 |
+
- name: Sync to Hugging Face Spaces
|
21 |
+
uses: JacobLinCool/huggingface-sync@v1
|
22 |
+
with:
|
23 |
+
github: ${{ secrets.GITHUB_TOKEN }}
|
24 |
+
user: jacoblincool # Hugging Face username or organization name
|
25 |
+
space: tja-generator # Hugging Face space name
|
26 |
+
token: ${{ secrets.HF_TOKEN }} # Hugging Face token
|
27 |
+
title: "TJA Generator"
|
28 |
+
sdk: "gradio"
|
29 |
+
models: "[ JacobLinCool/odcnn-320k-100 ]"
|
.gitignore
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
161 |
+
|
162 |
+
models/
|
163 |
+
flagged/
|
Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM python:3.11
|
5 |
+
|
6 |
+
RUN useradd -m -u 1000 user
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
RUN apt update && apt install -y curl libsndfile1 ffmpeg
|
11 |
+
|
12 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
13 |
+
|
14 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
15 |
+
|
16 |
+
COPY --chown=user . /app
|
17 |
+
|
18 |
+
RUN mkdir -p /app/models && \
|
19 |
+
curl -L https://huggingface.co/JacobLinCool/odcnn-320k-100/resolve/main/don_model.pth -o /app/models/don_model.pth && \
|
20 |
+
curl -L https://huggingface.co/JacobLinCool/odcnn-320k-100/resolve/main/ka_model.pth -o /app/models/ka_model.pth && \
|
21 |
+
chown -R user /app/models
|
22 |
+
|
23 |
+
CMD ["python", "app.py"]
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 JacobLinCool
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: TJA Generator
|
3 |
+
emoji: 🦄
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: pink
|
6 |
+
sdk: gradio
|
7 |
+
short_description: HuggingFace Space for https://github.com/seiichiinoue/odcnn
|
8 |
+
models: [ JacobLinCool/odcnn-320k-100 ]
|
9 |
+
---
|
10 |
+
|
11 |
+
# TJA Generator
|
12 |
+
|
13 |
+
Use [ODCNN](https://github.com/seiichiinoue/odcnn) to generate TJA from music.
|
app.py
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from tempfile import NamedTemporaryFile
|
3 |
+
from typing import Tuple
|
4 |
+
from zipfile import ZipFile
|
5 |
+
|
6 |
+
import gradio as gr
|
7 |
+
from accelerate import Accelerator
|
8 |
+
from huggingface_hub import hf_hub_download
|
9 |
+
|
10 |
+
from odcnn import ODCNN
|
11 |
+
from youtube import youtube
|
12 |
+
|
13 |
+
accelerator = Accelerator()
|
14 |
+
device = accelerator.device
|
15 |
+
|
16 |
+
DON_MODEL = hf_hub_download(
|
17 |
+
repo_id="JacobLinCool/odcnn-320k-100", filename="don_model.pth"
|
18 |
+
)
|
19 |
+
KA_MODEL = hf_hub_download(
|
20 |
+
repo_id="JacobLinCool/odcnn-320k-100", filename="ka_model.pth"
|
21 |
+
)
|
22 |
+
|
23 |
+
|
24 |
+
models = {"odcnn-320k-100": ODCNN(DON_MODEL, KA_MODEL, device)}
|
25 |
+
|
26 |
+
|
27 |
+
def run(file: str, model: str, delta: float, trim: bool) -> Tuple[str, str, str]:
|
28 |
+
preview, tja = models[model].run(file, delta, trim)
|
29 |
+
|
30 |
+
with NamedTemporaryFile(
|
31 |
+
"w", suffix=".tja", delete=True
|
32 |
+
) as tjafile, NamedTemporaryFile("w", suffix=".zip", delete=False) as zfile:
|
33 |
+
tjafile.write(tja)
|
34 |
+
|
35 |
+
with ZipFile(zfile.name, "w") as z:
|
36 |
+
z.write(file, os.path.basename(file))
|
37 |
+
z.write(tjafile.name, f"{os.path.basename(file)}-{model}.tja")
|
38 |
+
|
39 |
+
return preview, tja, zfile.name
|
40 |
+
|
41 |
+
|
42 |
+
def from_youtube(
|
43 |
+
url: str, model: str, delta: float, trim: bool
|
44 |
+
) -> Tuple[str, str, str, str]:
|
45 |
+
audio = youtube(url)
|
46 |
+
return audio, *run(audio, model, delta, trim)
|
47 |
+
|
48 |
+
|
49 |
+
with gr.Blocks() as app:
|
50 |
+
with open(os.path.join(os.path.dirname(__file__), "README.md"), "r") as f:
|
51 |
+
README = f.read()
|
52 |
+
# remove yaml front matter
|
53 |
+
blocks = README.split("---")
|
54 |
+
if len(blocks) > 1:
|
55 |
+
README = "---".join(blocks[2:])
|
56 |
+
|
57 |
+
gr.Markdown(README)
|
58 |
+
|
59 |
+
with gr.Row():
|
60 |
+
with gr.Column():
|
61 |
+
gr.Markdown("## Upload an audio file")
|
62 |
+
audio = gr.Audio(label="Upload an audio file", type="filepath")
|
63 |
+
with gr.Column():
|
64 |
+
gr.Markdown(
|
65 |
+
"## or use a YouTube URL\n\nTry something on [The First Take](https://www.youtube.com/@The_FirstTake)?"
|
66 |
+
)
|
67 |
+
yt = gr.Textbox(
|
68 |
+
label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."
|
69 |
+
)
|
70 |
+
yt_btn = gr.Button("Use this YouTube URL")
|
71 |
+
|
72 |
+
with gr.Row():
|
73 |
+
model = gr.Radio(
|
74 |
+
label="Select a model",
|
75 |
+
choices=[s for s in models.keys()],
|
76 |
+
value="odcnn-320k-100",
|
77 |
+
)
|
78 |
+
btn = gr.Button("Infer", variant="primary")
|
79 |
+
|
80 |
+
with gr.Row():
|
81 |
+
with gr.Column():
|
82 |
+
synthesized = gr.Audio(
|
83 |
+
label="Synthesized Audio",
|
84 |
+
format="mp3",
|
85 |
+
type="filepath",
|
86 |
+
interactive=False,
|
87 |
+
)
|
88 |
+
with gr.Column():
|
89 |
+
tja = gr.Text(label="TJA", interactive=False)
|
90 |
+
|
91 |
+
with gr.Row():
|
92 |
+
zip = gr.File(label="Download ZIP", type="filepath")
|
93 |
+
|
94 |
+
with gr.Accordion("Advanced Options", open=False):
|
95 |
+
delta = gr.Slider(
|
96 |
+
label="Delta",
|
97 |
+
value=0.02,
|
98 |
+
minimum=0.01,
|
99 |
+
maximum=0.5,
|
100 |
+
step=0.01,
|
101 |
+
info="Threshold for note detection (Ura)",
|
102 |
+
)
|
103 |
+
trim = gr.Checkbox(
|
104 |
+
label="Trim silence",
|
105 |
+
value=True,
|
106 |
+
info="Trim silence from the start and end of the audio",
|
107 |
+
)
|
108 |
+
|
109 |
+
btn.click(
|
110 |
+
fn=run,
|
111 |
+
inputs=[audio, model, delta, trim],
|
112 |
+
outputs=[synthesized, tja, zip],
|
113 |
+
api_name="run",
|
114 |
+
)
|
115 |
+
|
116 |
+
yt_btn.click(
|
117 |
+
fn=from_youtube,
|
118 |
+
inputs=[yt, model, delta, trim],
|
119 |
+
outputs=[audio, synthesized, tja, zip],
|
120 |
+
)
|
121 |
+
|
122 |
+
app.queue().launch(server_name="0.0.0.0")
|
asset/don.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:268b9e66095a3890652f6791529edb37630ad1a0c2ad550cddb4cb105895884d
|
3 |
+
size 171118
|
asset/ka.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89cb09280bb84a039b32e33e60a0961811dc1419d6d9ca18771f545cb7414939
|
3 |
+
size 52652
|
model.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import torch.nn as nn
|
4 |
+
import torch.nn.functional as F
|
5 |
+
from tqdm import tqdm
|
6 |
+
|
7 |
+
from preprocess import *
|
8 |
+
|
9 |
+
|
10 |
+
class convNet(nn.Module):
|
11 |
+
"""
|
12 |
+
copies the neural net used in a paper.
|
13 |
+
"Improved musical onset detection with Convolutional Neural Networks".
|
14 |
+
src: https://ieeexplore.ieee.org/document/6854953
|
15 |
+
"""
|
16 |
+
|
17 |
+
def __init__(self):
|
18 |
+
super(convNet, self).__init__()
|
19 |
+
# model
|
20 |
+
self.conv1 = nn.Conv2d(3, 10, (3, 7))
|
21 |
+
self.conv2 = nn.Conv2d(10, 20, 3)
|
22 |
+
self.fc1 = nn.Linear(1120, 256)
|
23 |
+
self.fc2 = nn.Linear(256, 120)
|
24 |
+
self.fc3 = nn.Linear(120, 1)
|
25 |
+
|
26 |
+
def forward(self, x, istraining=False, minibatch=1):
|
27 |
+
x = F.max_pool2d(F.relu(self.conv1(x)), (3, 1))
|
28 |
+
x = F.max_pool2d(F.relu(self.conv2(x)), (3, 1))
|
29 |
+
x = F.dropout(x.view(minibatch, -1), training=istraining)
|
30 |
+
x = F.dropout(F.relu(self.fc1(x)), training=istraining)
|
31 |
+
x = F.dropout(F.relu(self.fc2(x)), training=istraining)
|
32 |
+
|
33 |
+
return F.sigmoid(self.fc3(x))
|
34 |
+
|
35 |
+
def infer_data_builder(self, feats, soundlen=15, minibatch=1):
|
36 |
+
x = []
|
37 |
+
|
38 |
+
for i in range(feats.shape[2] - soundlen):
|
39 |
+
x.append(feats[:, :, i : i + soundlen])
|
40 |
+
|
41 |
+
if (i + 1) % minibatch == 0:
|
42 |
+
yield (torch.from_numpy(np.array(x)).float())
|
43 |
+
x = []
|
44 |
+
|
45 |
+
if len(x) != 0:
|
46 |
+
yield (torch.from_numpy(np.array(x)).float())
|
47 |
+
|
48 |
+
def infer(self, feats, device, minibatch=1):
|
49 |
+
with torch.no_grad():
|
50 |
+
inference = None
|
51 |
+
for x in tqdm(
|
52 |
+
self.infer_data_builder(feats, minibatch=minibatch),
|
53 |
+
total=feats.shape[2] // minibatch,
|
54 |
+
):
|
55 |
+
output = self(x.to(device), minibatch=x.shape[0])
|
56 |
+
if inference is not None:
|
57 |
+
inference = np.concatenate(
|
58 |
+
(inference, output.cpu().numpy().reshape(-1))
|
59 |
+
)
|
60 |
+
else:
|
61 |
+
inference = output.cpu().numpy().reshape(-1)
|
62 |
+
|
63 |
+
return np.array(inference).reshape(-1)
|
64 |
+
|
65 |
+
|
66 |
+
if __name__ == "__main__":
|
67 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
68 |
+
net = convNet()
|
69 |
+
net = net.to(device)
|
70 |
+
|
71 |
+
print(net)
|
72 |
+
print("parameters: ", sum(p.numel() for p in net.parameters()))
|
odcnn.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tempfile
|
2 |
+
from typing import Tuple
|
3 |
+
import numpy as np
|
4 |
+
import soundfile as sf
|
5 |
+
import torch
|
6 |
+
from pathlib import Path
|
7 |
+
|
8 |
+
from model import convNet
|
9 |
+
from preprocess import Audio, fft_and_melscale
|
10 |
+
from synthesize import create_tja, detect, synthesize
|
11 |
+
|
12 |
+
|
13 |
+
def trim_silence(data: np.ndarray, sr: int):
|
14 |
+
start = 0
|
15 |
+
end = len(data) - 1
|
16 |
+
while start < len(data) and np.abs(data[start]) < 0.2:
|
17 |
+
start += 1
|
18 |
+
while end > 0 and np.abs(data[end]) < 0.1:
|
19 |
+
end -= 1
|
20 |
+
start = max(start - sr * 3, 0)
|
21 |
+
end = min(end + sr * 3, len(data))
|
22 |
+
print(
|
23 |
+
f"Trimming {start/sr} seconds from the start and {end/sr} seconds from the end"
|
24 |
+
)
|
25 |
+
data = data[start:end]
|
26 |
+
return data
|
27 |
+
|
28 |
+
|
29 |
+
class ODCNN:
|
30 |
+
def __init__(self, don_model: str, ka_model: str, device: torch.device = "cpu"):
|
31 |
+
donNet = convNet()
|
32 |
+
donNet = donNet.to(device)
|
33 |
+
donNet.load_state_dict(torch.load(don_model, map_location="cpu"))
|
34 |
+
self.donNet = donNet
|
35 |
+
|
36 |
+
kaNet = convNet()
|
37 |
+
kaNet = kaNet.to(device)
|
38 |
+
kaNet.load_state_dict(torch.load(ka_model, map_location="cpu"))
|
39 |
+
self.kaNet = kaNet
|
40 |
+
|
41 |
+
self.device = device
|
42 |
+
|
43 |
+
def run(self, file: str, delta=0.05, trim=True) -> Tuple[str, str]:
|
44 |
+
data, sr = sf.read(file, always_2d=True)
|
45 |
+
song = Audio(data, sr)
|
46 |
+
song.data = song.data.mean(axis=1)
|
47 |
+
if trim:
|
48 |
+
song.data = trim_silence(song.data, sr)
|
49 |
+
|
50 |
+
song.feats = fft_and_melscale(
|
51 |
+
song,
|
52 |
+
nhop=512,
|
53 |
+
nffts=[1024, 2048, 4096],
|
54 |
+
mel_nband=80,
|
55 |
+
mel_freqlo=27.5,
|
56 |
+
mel_freqhi=16000.0,
|
57 |
+
)
|
58 |
+
|
59 |
+
don_inference = self.donNet.infer(song.feats, self.device, minibatch=4192)
|
60 |
+
don_inference = np.reshape(don_inference, (-1))
|
61 |
+
|
62 |
+
ka_inference = self.kaNet.infer(song.feats, self.device, minibatch=4192)
|
63 |
+
ka_inference = np.reshape(ka_inference, (-1))
|
64 |
+
|
65 |
+
easy_detection = detect(don_inference, ka_inference, delta=0.25)
|
66 |
+
normal_detection = detect(don_inference, ka_inference, delta=0.2)
|
67 |
+
hard_detection = detect(don_inference, ka_inference, delta=0.15)
|
68 |
+
oni_detection = detect(don_inference, ka_inference, delta=0.075)
|
69 |
+
ura_detection = detect(don_inference, ka_inference, delta=delta)
|
70 |
+
|
71 |
+
synthesized_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
|
72 |
+
synthesize(*hard_detection, song, synthesized_path)
|
73 |
+
file = Path(file)
|
74 |
+
tja = create_tja(
|
75 |
+
song,
|
76 |
+
timestamps=[
|
77 |
+
easy_detection,
|
78 |
+
normal_detection,
|
79 |
+
hard_detection,
|
80 |
+
oni_detection,
|
81 |
+
ura_detection,
|
82 |
+
],
|
83 |
+
title=file.stem,
|
84 |
+
wave=file.name,
|
85 |
+
)
|
86 |
+
|
87 |
+
return synthesized_path, tja
|
preprocess.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
import numpy as np
|
3 |
+
import soundfile as sf
|
4 |
+
from librosa.filters import mel
|
5 |
+
from scipy import signal
|
6 |
+
from scipy.fftpack import fft
|
7 |
+
|
8 |
+
|
9 |
+
class Audio:
|
10 |
+
"""
|
11 |
+
audio class which holds music data and timestamp for notes.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
filename: file name.
|
15 |
+
stereo: True or False; wether you have Don/Ka streo file or not. normaly True.
|
16 |
+
Variables:
|
17 |
+
|
18 |
+
|
19 |
+
Example:
|
20 |
+
>>>from music_processor import *
|
21 |
+
>>>song = Audio(filename)
|
22 |
+
>>># to get audio data
|
23 |
+
>>>song.data
|
24 |
+
>>># to import .tja files:
|
25 |
+
>>>song.import_tja(filename)
|
26 |
+
>>># to get data converted
|
27 |
+
>>>song.data = (song.data[:,0]+song.data[:,1])/2
|
28 |
+
>>>fft_and_melscale(song, include_zero_cross=False)
|
29 |
+
"""
|
30 |
+
|
31 |
+
def __init__(self, data, samplerate, stereo=True):
|
32 |
+
self.data = data
|
33 |
+
self.samplerate = samplerate
|
34 |
+
if stereo is False:
|
35 |
+
self.data = (self.data[:, 0] + self.data[:, 1]) / 2
|
36 |
+
self.timestamp = []
|
37 |
+
|
38 |
+
def plotaudio(self, start_t, stop_t):
|
39 |
+
plt.plot(
|
40 |
+
np.linspace(start_t, stop_t, stop_t - start_t), self.data[start_t:stop_t, 0]
|
41 |
+
)
|
42 |
+
plt.show()
|
43 |
+
|
44 |
+
def save(self, filename, start_t=0, stop_t=None):
|
45 |
+
if stop_t is None:
|
46 |
+
stop_t = self.data.shape[0]
|
47 |
+
sf.write(filename, self.data[start_t:stop_t], self.samplerate)
|
48 |
+
|
49 |
+
def synthesize(self, diff=True, don="./asset/don.wav", ka="./asset/ka.wav"):
|
50 |
+
donsound = sf.read(don)[0]
|
51 |
+
donsound = (donsound[:, 0] + donsound[:, 1]) / 2
|
52 |
+
kasound = sf.read(ka)[0]
|
53 |
+
kasound = (kasound[:, 0] + kasound[:, 1]) / 2
|
54 |
+
donlen = len(donsound)
|
55 |
+
kalen = len(kasound)
|
56 |
+
|
57 |
+
if diff is True:
|
58 |
+
for stamp in self.timestamp:
|
59 |
+
timing = int(stamp[0] * self.samplerate)
|
60 |
+
try:
|
61 |
+
if stamp[1] in (1, 3, 5, 6, 7):
|
62 |
+
self.data[timing : timing + donlen] += donsound
|
63 |
+
elif stamp[1] in (2, 4):
|
64 |
+
self.data[timing : timing + kalen] += kasound
|
65 |
+
except ValueError:
|
66 |
+
pass
|
67 |
+
|
68 |
+
elif diff == "don":
|
69 |
+
if isinstance(self.timestamp[0], tuple):
|
70 |
+
for stamp in self.timestamp:
|
71 |
+
if stamp * self.samplerate + donlen < self.data.shape[0]:
|
72 |
+
self.data[
|
73 |
+
int(stamp[0] * self.samplerate) : int(
|
74 |
+
stamp[0] * self.samplerate
|
75 |
+
)
|
76 |
+
+ donlen
|
77 |
+
] += donsound
|
78 |
+
else:
|
79 |
+
for stamp in self.timestamp:
|
80 |
+
if stamp * self.samplerate + donlen < self.data.shape[0]:
|
81 |
+
self.data[
|
82 |
+
int(stamp * self.samplerate) : int(stamp * self.samplerate)
|
83 |
+
+ donlen
|
84 |
+
] += donsound
|
85 |
+
|
86 |
+
elif diff == "ka":
|
87 |
+
if isinstance(self.timestamp[0], tuple):
|
88 |
+
for stamp in self.timestamp:
|
89 |
+
if stamp * self.samplerate + kalen < self.data.shape[0]:
|
90 |
+
self.data[
|
91 |
+
int(stamp[0] * self.samplerate) : int(
|
92 |
+
stamp[0] * self.samplerate
|
93 |
+
)
|
94 |
+
+ kalen
|
95 |
+
] += kasound
|
96 |
+
else:
|
97 |
+
for stamp in self.timestamp:
|
98 |
+
if stamp * self.samplerate + kalen < self.data.shape[0]:
|
99 |
+
self.data[
|
100 |
+
int(stamp * self.samplerate) : int(stamp * self.samplerate)
|
101 |
+
+ kalen
|
102 |
+
] += kasound
|
103 |
+
|
104 |
+
|
105 |
+
def make_frame(data, nhop, nfft):
|
106 |
+
"""
|
107 |
+
helping function for fftandmelscale.
|
108 |
+
細かい時間に切り分けたものを学習データとするため,nhop(512)ずつずらしながらnfftサイズのデータを配列として返す
|
109 |
+
"""
|
110 |
+
|
111 |
+
length = data.shape[0]
|
112 |
+
framedata = np.concatenate((data, np.zeros(nfft))) # zero padding
|
113 |
+
return np.array(
|
114 |
+
[framedata[i * nhop : i * nhop + nfft] for i in range(length // nhop)]
|
115 |
+
)
|
116 |
+
|
117 |
+
|
118 |
+
# @jit
|
119 |
+
def fft_and_melscale(
|
120 |
+
song,
|
121 |
+
nhop=512,
|
122 |
+
nffts=[1024, 2048, 4096],
|
123 |
+
mel_nband=80,
|
124 |
+
mel_freqlo=27.5,
|
125 |
+
mel_freqhi=16000.0,
|
126 |
+
include_zero_cross=False,
|
127 |
+
):
|
128 |
+
"""
|
129 |
+
fft and melscale method.
|
130 |
+
fft: nfft = [1024, 2048, 4096]; サンプルの切り取る長さを変えながらデータからnp.arrayを抽出して高速フーリエ変換を行う.
|
131 |
+
melscale: 周波数の次元を削減するとともに,log10の値を取っている.
|
132 |
+
"""
|
133 |
+
|
134 |
+
feat_channels = []
|
135 |
+
|
136 |
+
for nfft in nffts:
|
137 |
+
feats = []
|
138 |
+
window = signal.windows.blackmanharris(nfft)
|
139 |
+
filt = mel(
|
140 |
+
sr=song.samplerate,
|
141 |
+
n_fft=nfft,
|
142 |
+
n_mels=mel_nband,
|
143 |
+
fmin=mel_freqlo,
|
144 |
+
fmax=mel_freqhi,
|
145 |
+
)
|
146 |
+
|
147 |
+
# get normal frame
|
148 |
+
frame = make_frame(song.data, nhop, nfft)
|
149 |
+
# print(frame.shape)
|
150 |
+
|
151 |
+
# melscaling
|
152 |
+
processedframe = fft(window * frame)[:, : nfft // 2 + 1]
|
153 |
+
processedframe = np.dot(filt, np.transpose(np.abs(processedframe) ** 2))
|
154 |
+
processedframe = 20 * np.log10(processedframe + 0.1)
|
155 |
+
# print(processedframe.shape)
|
156 |
+
|
157 |
+
feat_channels.append(processedframe)
|
158 |
+
|
159 |
+
if include_zero_cross:
|
160 |
+
song.zero_crossing = np.where(np.diff(np.sign(song.data)))[0]
|
161 |
+
print(song.zero_crossing)
|
162 |
+
|
163 |
+
return np.array(feat_channels)
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
chainer
|
3 |
+
librosa
|
4 |
+
matplotlib
|
5 |
+
numpy
|
6 |
+
soundfile
|
7 |
+
torch
|
8 |
+
scikit-learn
|
9 |
+
yt_dlp
|
10 |
+
accelerate
|
11 |
+
spaces
|
synthesize.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
import numpy as np
|
3 |
+
from librosa.util import peak_pick
|
4 |
+
|
5 |
+
from preprocess import *
|
6 |
+
|
7 |
+
|
8 |
+
def smooth(x, window_len=11, window="hanning"):
|
9 |
+
if x.ndim != 1:
|
10 |
+
raise ValueError
|
11 |
+
|
12 |
+
if x.size < window_len:
|
13 |
+
raise ValueError
|
14 |
+
|
15 |
+
if window_len < 3:
|
16 |
+
return x
|
17 |
+
|
18 |
+
if not window in ["flat", "hanning", "hamming", "bartlett", "blackman"]:
|
19 |
+
raise ValueError
|
20 |
+
|
21 |
+
s = np.r_[x[window_len - 1 : 0 : -1], x, x[-2 : -window_len - 1 : -1]]
|
22 |
+
# print(len(s))
|
23 |
+
if window == "flat": # moving average
|
24 |
+
w = np.ones(window_len, "d")
|
25 |
+
else:
|
26 |
+
w = eval("np." + window + "(window_len)")
|
27 |
+
|
28 |
+
y = np.convolve(w / w.sum(), s, mode="valid")
|
29 |
+
|
30 |
+
return y
|
31 |
+
|
32 |
+
|
33 |
+
def detect(don_inference, ka_inference, delta=0.05):
|
34 |
+
don_inference = smooth(don_inference, 5)
|
35 |
+
ka_inference = smooth(ka_inference, 5)
|
36 |
+
|
37 |
+
don_timestamp = (
|
38 |
+
peak_pick(
|
39 |
+
x=don_inference,
|
40 |
+
pre_max=1,
|
41 |
+
post_max=2,
|
42 |
+
pre_avg=4,
|
43 |
+
post_avg=5,
|
44 |
+
delta=delta,
|
45 |
+
wait=3,
|
46 |
+
)
|
47 |
+
+ 7
|
48 |
+
) # 実際は7フレーム目のところの音
|
49 |
+
ka_timestamp = (
|
50 |
+
peak_pick(
|
51 |
+
x=ka_inference,
|
52 |
+
pre_max=1,
|
53 |
+
post_max=2,
|
54 |
+
pre_avg=4,
|
55 |
+
post_avg=5,
|
56 |
+
delta=delta,
|
57 |
+
wait=3,
|
58 |
+
)
|
59 |
+
+ 7
|
60 |
+
)
|
61 |
+
|
62 |
+
print(don_timestamp)
|
63 |
+
print(ka_timestamp)
|
64 |
+
|
65 |
+
don_timestamp = don_timestamp[
|
66 |
+
np.where(don_inference[don_timestamp] > ka_inference[don_timestamp])
|
67 |
+
]
|
68 |
+
|
69 |
+
ka_timestamp = ka_timestamp[
|
70 |
+
np.where(ka_inference[ka_timestamp] > don_inference[ka_timestamp])
|
71 |
+
]
|
72 |
+
|
73 |
+
return don_timestamp, ka_timestamp
|
74 |
+
|
75 |
+
|
76 |
+
# def note_to_drumroll(timestamp, max_gap=5, min_note=3):
|
77 |
+
# drumroll = []
|
78 |
+
# note = 0
|
79 |
+
# for i in range(1, len(timestamp)):
|
80 |
+
# if timestamp[i] - timestamp[i - 1] <= max_gap:
|
81 |
+
# note += 1
|
82 |
+
# else:
|
83 |
+
# if note >= min_note:
|
84 |
+
# drumroll.append((timestamp[i - note - 1], timestamp[i - 1]))
|
85 |
+
# note = 0
|
86 |
+
# if note >= min_note:
|
87 |
+
# drumroll.append((timestamp[-note - 1], timestamp[-1]))
|
88 |
+
# return drumroll
|
89 |
+
|
90 |
+
def synthesize(don_timestamp, ka_timestamp, song, filepath):
|
91 |
+
song.don_timestamp = don_timestamp
|
92 |
+
song.timestamp = song.don_timestamp * 512 / song.samplerate
|
93 |
+
# print(len(song.timestamp))
|
94 |
+
song.synthesize(diff="don")
|
95 |
+
|
96 |
+
# song.ka_timestamp = song.don_timestamp
|
97 |
+
song.ka_timestamp = ka_timestamp
|
98 |
+
song.timestamp = song.ka_timestamp * 512 / song.samplerate
|
99 |
+
# print(len(song.timestamp))
|
100 |
+
song.synthesize(diff="ka")
|
101 |
+
|
102 |
+
song.save(filepath)
|
103 |
+
|
104 |
+
|
105 |
+
def create_tja(
|
106 |
+
song,
|
107 |
+
timestamps: List[tuple],
|
108 |
+
title="untitled",
|
109 |
+
subtitle="--",
|
110 |
+
wave="untitled.ogg",
|
111 |
+
safezone=2,
|
112 |
+
):
|
113 |
+
tja = f"TITLE: {title}\nSUBTITLE: {subtitle}\nBPM: 240\nWAVE:{wave}\nOFFSET:0\n\n"
|
114 |
+
|
115 |
+
for i, (don, ka) in enumerate(timestamps):
|
116 |
+
try:
|
117 |
+
level = [3, 5, 7, 8, 9][i]
|
118 |
+
scroll = [0.6, 0.7, 0.8, 0.9, 1.0][i]
|
119 |
+
|
120 |
+
don_timestamp = np.rint(don * 512 / song.samplerate * 100).astype(np.int32)
|
121 |
+
ka_timestamp = np.rint(ka * 512 / song.samplerate * 100).astype(np.int32)
|
122 |
+
length = np.max(
|
123 |
+
(
|
124 |
+
don_timestamp[-1] if don_timestamp.size > 0 else 0,
|
125 |
+
ka_timestamp[-1] if ka_timestamp.size > 0 else 0,
|
126 |
+
)
|
127 |
+
)
|
128 |
+
safezone_keep = 0
|
129 |
+
tja += f"COURSE:{i}\nLEVEL:{level}\n\n#START\n#SCROLL {scroll}\n"
|
130 |
+
for time in range(length):
|
131 |
+
if np.isin(time, don_timestamp) == True and safezone_keep <= 0:
|
132 |
+
tja += "1"
|
133 |
+
safezone_keep = safezone
|
134 |
+
elif np.isin(time, ka_timestamp) == True and safezone_keep <= 0:
|
135 |
+
tja += "2"
|
136 |
+
safezone_keep = safezone
|
137 |
+
else:
|
138 |
+
tja += "0"
|
139 |
+
safezone_keep -= 1
|
140 |
+
if time % 100 == 99:
|
141 |
+
tja += ",\n"
|
142 |
+
if length % 100 != 0:
|
143 |
+
tja += "0" * (100 - (length % 100)) + ",\n"
|
144 |
+
tja += "#END\n\n"
|
145 |
+
except:
|
146 |
+
pass
|
147 |
+
|
148 |
+
return tja
|
youtube.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import hashlib
|
2 |
+
import os
|
3 |
+
import shutil
|
4 |
+
import tempfile
|
5 |
+
|
6 |
+
import gradio as gr
|
7 |
+
import yt_dlp
|
8 |
+
from gradio_client import Client
|
9 |
+
|
10 |
+
|
11 |
+
def youtube(url: str) -> str:
|
12 |
+
if not url:
|
13 |
+
raise gr.Error("Please input a YouTube URL")
|
14 |
+
|
15 |
+
hash = hashlib.md5(url.encode()).hexdigest()
|
16 |
+
tmp_file = os.path.join(tempfile.gettempdir(), f"{hash}")
|
17 |
+
|
18 |
+
try:
|
19 |
+
ydl_opts = {
|
20 |
+
"format": "bestaudio/best",
|
21 |
+
"outtmpl": tmp_file,
|
22 |
+
"postprocessors": [
|
23 |
+
{
|
24 |
+
"key": "FFmpegExtractAudio",
|
25 |
+
"preferredcodec": "mp3",
|
26 |
+
"preferredquality": "192",
|
27 |
+
}
|
28 |
+
],
|
29 |
+
}
|
30 |
+
|
31 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
32 |
+
ydl.download([url])
|
33 |
+
except Exception as e:
|
34 |
+
print(e)
|
35 |
+
try:
|
36 |
+
ytdl = Client("JacobLinCool/yt-dlp")
|
37 |
+
file = ytdl.predict(api_name="/download", url=url)
|
38 |
+
shutil.move(file, tmp_file + ".mp3")
|
39 |
+
except Exception as e:
|
40 |
+
print(e)
|
41 |
+
raise gr.Error(f"Failed to download YouTube audio from {url}")
|
42 |
+
|
43 |
+
return tmp_file + ".mp3"
|