Lasha commited on
Commit
ec0660a
Β·
1 Parent(s): 923f86f
Files changed (4) hide show
  1. README.md +2 -2
  2. app.py +115 -5
  3. packages.txt +1 -0
  4. requirements.txt +1 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
  title: Music Flamingo
3
- emoji: 🌍
4
  colorFrom: yellow
5
  colorTo: purple
6
  sdk: gradio
7
  sdk_version: 5.49.1
8
  python_version: 3.12
9
  app_file: app.py
10
- pinned: false
11
  license: apache-2.0
12
  ---
13
 
 
1
  ---
2
  title: Music Flamingo
3
+ emoji: 🎡
4
  colorFrom: yellow
5
  colorTo: purple
6
  sdk: gradio
7
  sdk_version: 5.49.1
8
  python_version: 3.12
9
  app_file: app.py
10
+ pinned: true
11
  license: apache-2.0
12
  ---
13
 
app.py CHANGED
@@ -1,17 +1,123 @@
 
1
  import gradio as gr
2
  import yt_dlp
3
  import os
4
  import tempfile
5
  import re
 
 
 
 
6
 
7
  from transformers import AutoModel, AutoProcessor
8
 
9
- api_key = os.getenv("my_secret")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  MODEL_ID = "nvidia/music-flamingo-hf"
12
  HERO_IMAGE_URL = "https://musicflamingo.github.io/logo-no-bg.png"
13
  HERO_TITLE = "Music Flamingo: Scaling Music Understanding in Audio Language Models"
14
- HERO_SUBTITLE = "Upload audio and ask anything - genre, key, chords, timbre, lyrics, structure. Music Flamingo gives detailed, theory-aware answers."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  APP_CSS = """
16
  :root {
17
  --font-sans: ui-sans-serif, system-ui, sans-serif,
@@ -40,7 +146,7 @@ body {
40
  display: flex;
41
  flex-direction: column;
42
  align-items: center;
43
- gap: 16px;
44
  padding: 24px 24px 32px;
45
  text-align: center;
46
  }
@@ -179,8 +285,8 @@ EXAMPLE_AUDIO_PROMPTS = [
179
  "Which line directly precedes the chorus?",
180
  ],
181
  ]
182
- processor = AutoProcessor.from_pretrained(MODEL_ID, token=api_key)
183
- model = AutoModel.from_pretrained(MODEL_ID, token=api_key, device_map="auto").eval()
184
 
185
  _youtube_cache = {}
186
 
@@ -283,6 +389,8 @@ def download_youtube_audio(url, force_reload=False):
283
  ],
284
  "noplaylist": True,
285
  }
 
 
286
 
287
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
288
  info = ydl.extract_info(url, download=False)
@@ -372,6 +480,8 @@ with gr.Blocks(css=APP_CSS, theme=gr.themes.Soft(primary_hue="purple", secondary
372
  <img src="{HERO_IMAGE_URL}" alt="Music Flamingo logo" class="hero__logo" />
373
  <h1 class="hero__title">{HERO_TITLE}</h1>
374
  <p class="hero__subtitle">{HERO_SUBTITLE}</p>
 
 
375
  </div>
376
  """
377
  )
 
1
+ import shutil
2
  import gradio as gr
3
  import yt_dlp
4
  import os
5
  import tempfile
6
  import re
7
+ import subprocess
8
+ import socket
9
+ import time
10
+ import atexit
11
 
12
  from transformers import AutoModel, AutoProcessor
13
 
14
+ PROXY_URL = None
15
+ _tunnel_proc = None
16
+
17
+
18
+ def _write_temp_key_and_kh(key_str, kh_line):
19
+ key_clean = key_str.replace("\r\n", "\n").replace("\r", "\n")
20
+ if not key_clean.endswith("\n"):
21
+ key_clean += "\n"
22
+ keyf = tempfile.NamedTemporaryFile("w", delete=False)
23
+ keyf.write(key_clean)
24
+ keyf.flush()
25
+ os.chmod(keyf.name, 0o600)
26
+ keyf.close()
27
+ khf = tempfile.NamedTemporaryFile("w", delete=False)
28
+ khf.write(kh_line.strip() + "\n")
29
+ khf.flush()
30
+ khf.close()
31
+ return keyf.name, khf.name
32
+
33
+
34
+ def _validate_private_key(path):
35
+ if not shutil.which("ssh-keygen"):
36
+ return True
37
+ try:
38
+ subprocess.check_output(["ssh-keygen", "-y", "-f", path], stderr=subprocess.STDOUT)
39
+ return True
40
+ except subprocess.CalledProcessError:
41
+ return False
42
+
43
+
44
+ def _ensure_local_socks_tunnel():
45
+ global PROXY_URL, _tunnel_proc
46
+ if PROXY_URL:
47
+ return
48
+ srv = os.getenv("SSH_SERVER")
49
+ port = os.getenv("SSH_PORT", "22")
50
+ key = os.getenv("SSH_PRIVATE_KEY")
51
+ hk = os.getenv("SSH_HOSTKEY")
52
+ if not (srv and key and hk and shutil.which("ssh")):
53
+ return
54
+ key_path, kh_path = _write_temp_key_and_kh(key, hk)
55
+ if not _validate_private_key(key_path):
56
+ return
57
+ cmd = [
58
+ "ssh","-NT","-p", port,"-i", key_path,
59
+ "-D","127.0.0.1:1080",
60
+ "-o","IdentitiesOnly=yes",
61
+ "-o","ExitOnForwardFailure=yes",
62
+ "-o","BatchMode=yes",
63
+ "-o","StrictHostKeyChecking=yes",
64
+ "-o", f"UserKnownHostsFile={kh_path}",
65
+ "-o","GlobalKnownHostsFile=/dev/null",
66
+ "-o","ServerAliveInterval=30","-o","ServerAliveCountMax=3",
67
+ srv,
68
+ ]
69
+ with open("/tmp/ssh_tunnel.log", "w") as lf:
70
+ _tunnel_proc = subprocess.Popen(cmd, stdout=lf, stderr=lf)
71
+ for _ in range(40):
72
+ if _tunnel_proc.poll() is not None:
73
+ return
74
+ try:
75
+ socket.create_connection(("127.0.0.1", 1080), 0.5).close()
76
+ PROXY_URL = "socks5h://127.0.0.1:1080"
77
+ break
78
+ except OSError:
79
+ time.sleep(0.25)
80
+ atexit.register(lambda: _tunnel_proc and _tunnel_proc.terminate())
81
+
82
+
83
+ _ensure_local_socks_tunnel()
84
+
85
 
86
  MODEL_ID = "nvidia/music-flamingo-hf"
87
  HERO_IMAGE_URL = "https://musicflamingo.github.io/logo-no-bg.png"
88
  HERO_TITLE = "Music Flamingo: Scaling Music Understanding in Audio Language Models"
89
+ HERO_SUBTITLE = "Upload a song and ask anything β€” including captions, lyrics, genre, key, chords, or complex questions. Music Flamingo gives detailed answers."
90
+ HERO_AUTHORS = """
91
+ <div style="margin-top: 8px; margin-bottom: 4px; padding: 8px 20px; text-align: center; max-width: 900px; margin-inline: auto;">
92
+ <p style="font-size: 0.95rem; line-height: 1.6; margin-bottom: 10px;">
93
+ <strong>Authors:</strong> Sreyan Ghosh<sup>1,2*</sup>, Arushi Goel<sup>1*</sup>, Lasha Koroshinadze<sup>2**</sup>, Sang-gil Lee<sup>1</sup>, Zhifeng Kong<sup>1</sup>, Joao Felipe Santos<sup>1</sup>,<br>Ramani Duraiswami<sup>2</sup>, Dinesh Manocha<sup>2</sup>, Wei Ping<sup>1</sup>, Mohammad Shoeybi<sup>1</sup>, Bryan Catanzaro<sup>1</sup>
94
+ </p>
95
+ <p style="font-size: 0.88rem; opacity: 0.75; margin-bottom: 8px;">
96
+ <sup>1</sup>NVIDIA, CA, USA | <sup>2</sup>University of Maryland, College Park, USA
97
+ </p>
98
+ <p style="font-size: 0.82rem; opacity: 0.65; font-style: italic; margin-bottom: 6px;">
99
+ *Equally contributed and led the project. Names randomly ordered. **Significant technical contribution.
100
+ </p>
101
+ <p style="font-size: 0.85rem; opacity: 0.7; margin-bottom: 0;">
102
+ <strong>Correspondence:</strong> <a href="mailto:sreyang@umd.edu" style="color: inherit; text-decoration: underline;">sreyang@umd.edu</a>, <a href="mailto:arushig@nvidia.com" style="color: inherit; text-decoration: underline;">arushig@nvidia.com</a>
103
+ </p>
104
+ </div>
105
+ """
106
+ HERO_BADGES = """
107
+ <div style="display: flex; justify-content: center; margin-top: 6px; align-items: center;">
108
+ <div style="display: flex; justify-content: center; flex-wrap: wrap; gap: 8px;">
109
+ <a href="https://research.nvidia.com/labs/adlr/MF/"><img src="https://img.shields.io/badge/Demo page-228B22" alt="Demo page"></a>
110
+ <a href="https://github.com/NVIDIA/audio-flamingo"><img src='https://img.shields.io/badge/Github-Audio Flamingo 3-9C276A' alt="Github"></a>
111
+ <a href="https://github.com/NVIDIA/audio-flamingo/stargazers"><img src="https://img.shields.io/github/stars/NVIDIA/audio-flamingo.svg?style=social" alt="Stars"></a>
112
+ <a href="https://huggingface.co/nvidia/music-flamingo">
113
+ <img src="https://img.shields.io/badge/πŸ€—-Checkpoints-ED5A22.svg" alt="Checkpoints">
114
+ </a>
115
+ <a href="https://huggingface.co/datasets/nvidia/MF-Skills">
116
+ <img src="https://img.shields.io/badge/πŸ€—-Dataset: MF--Skills-ED5A22.svg" alt="Dataset">
117
+ </a>
118
+ </div>
119
+ </div>
120
+ """
121
  APP_CSS = """
122
  :root {
123
  --font-sans: ui-sans-serif, system-ui, sans-serif,
 
146
  display: flex;
147
  flex-direction: column;
148
  align-items: center;
149
+ gap: 12px;
150
  padding: 24px 24px 32px;
151
  text-align: center;
152
  }
 
285
  "Which line directly precedes the chorus?",
286
  ],
287
  ]
288
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
289
+ model = AutoModel.from_pretrained(MODEL_ID, device_map="auto").eval()
290
 
291
  _youtube_cache = {}
292
 
 
389
  ],
390
  "noplaylist": True,
391
  }
392
+ if PROXY_URL:
393
+ ydl_opts["proxy"] = PROXY_URL
394
 
395
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
396
  info = ydl.extract_info(url, download=False)
 
480
  <img src="{HERO_IMAGE_URL}" alt="Music Flamingo logo" class="hero__logo" />
481
  <h1 class="hero__title">{HERO_TITLE}</h1>
482
  <p class="hero__subtitle">{HERO_SUBTITLE}</p>
483
+ {HERO_AUTHORS}
484
+ {HERO_BADGES}
485
  </div>
486
  """
487
  )
packages.txt CHANGED
@@ -1,3 +1,4 @@
1
  ffmpeg
2
  libsndfile1
3
  git
 
 
1
  ffmpeg
2
  libsndfile1
3
  git
4
+ openssh-client
requirements.txt CHANGED
@@ -7,3 +7,4 @@ librosa
7
  soundfile
8
  yt-dlp
9
  gradio
 
 
7
  soundfile
8
  yt-dlp
9
  gradio
10
+ pysocks