Spaces:

TTS-AGI
/

Voice-Clone-Arena

Running

App Files Files Community

kemuriririn commited on Jun 3

Commit

99b1671

1 Parent(s): 367e06e

(wip)modify models

Browse files

Files changed (2) hide show

app.py +62 -9
models.py +1 -1

app.py CHANGED Viewed

@@ -129,6 +129,34 @@ CACHE_AUDIO_DIR = os.path.join(TEMP_AUDIO_DIR, CACHE_AUDIO_SUBDIR)
 os.makedirs(TEMP_AUDIO_DIR, exist_ok=True)
 os.makedirs(CACHE_AUDIO_DIR, exist_ok=True) # Ensure cache subdir exists
 # Store active TTS sessions
 app.tts_sessions = {}
@@ -382,8 +410,13 @@ def generate_and_save_tts(text, model_id, output_dir):
     temp_audio_path = None # Initialize to None
     try:
         app.logger.debug(f"[TTS Gen {model_id}] Starting generation for: '{text[:30]}...'")
         # If predict_tts saves file itself and returns path:
-        temp_audio_path = predict_tts(text, model_id)
         app.logger.debug(f"[TTS Gen {model_id}] predict_tts returned: {temp_audio_path}")
         if not temp_audio_path or not os.path.exists(temp_audio_path):
@@ -396,7 +429,7 @@ def generate_and_save_tts(text, model_id, output_dir):
         # Move the file generated by predict_tts to the target cache directory
         shutil.move(temp_audio_path, dest_path)
         app.logger.debug(f"[TTS Gen {model_id}] Move successful. Returning {dest_path}")
-        return dest_path
     except Exception as e:
         app.logger.error(f"Error generating/saving TTS for model {model_id} and text '{text[:30]}...': {str(e)}")
@@ -407,7 +440,7 @@ def generate_and_save_tts(text, model_id, output_dir):
                 os.remove(temp_audio_path)
             except OSError:
                 pass # Ignore error if file couldn't be removed
-        return None
 def _generate_cache_entry_task(sentence):
@@ -445,8 +478,8 @@ def _generate_cache_entry_task(sentence):
                 future_b = audio_executor.submit(generate_and_save_tts, sentence, model_b_id, CACHE_AUDIO_DIR)
                 timeout_seconds = 120
-                audio_a_path = future_a.result(timeout=timeout_seconds)
-                audio_b_path = future_b.result(timeout=timeout_seconds)
             if audio_a_path and audio_b_path:
                 with tts_cache_lock:
@@ -458,6 +491,8 @@ def _generate_cache_entry_task(sentence):
                             "model_b": model_b_id,
                             "audio_a": audio_a_path,
                             "audio_b": audio_b_path,
                             "created_at": datetime.utcnow(),
                         }
                         app.logger.info(f"Successfully cached entry for: '{sentence[:50]}...'")
@@ -1112,7 +1147,7 @@ def setup_periodic_tasks():
     db_path = app.config["SQLALCHEMY_DATABASE_URI"].replace("sqlite:///", "instance/") # Get relative path
     preferences_repo_id = "kemuriririn/arena-preferences"
-    database_repo_id = "kemuriririn/database-arena-v2"
     votes_dir = "./votes"
     def sync_database():
@@ -1318,10 +1353,27 @@ def toggle_leaderboard_visibility():
 @app.route("/api/tts/cached-sentences")
 def get_cached_sentences():
-    """Returns a list of sentences currently available in the TTS cache."""
     with tts_cache_lock:
-        cached_keys = list(tts_cache.keys())
-    return jsonify(cached_keys)
 def get_weighted_random_models(
@@ -1414,6 +1466,7 @@ if __name__ == "__main__":
              except Exception as e:
                  print(f"Error downloading database from HF dataset: {str(e)} ⚠️")
         db.create_all()  # Create tables if they don't exist
         insert_initial_models()

 os.makedirs(TEMP_AUDIO_DIR, exist_ok=True)
 os.makedirs(CACHE_AUDIO_DIR, exist_ok=True) # Ensure cache subdir exists
+# --- 参考音色下载与管理 ---
+REFERENCE_AUDIO_DIR = os.path.join(TEMP_AUDIO_DIR, "reference_audios")
+REFERENCE_AUDIO_DATASET = os.getenv("REFERENCE_AUDIO_DATASET", "kemuriririn/arena-files")
+REFERENCE_AUDIO_PATTERN = os.getenv("REFERENCE_AUDIO_PATTERN", "reference_audios/")
+reference_audio_files = []
+def download_reference_audios():
+    """从 Hugging Face dataset 下载参考音频到本地目录，并生成文件列表"""
+    global reference_audio_files
+    os.makedirs(REFERENCE_AUDIO_DIR, exist_ok=True)
+    try:
+        api = HfApi(token=os.getenv("HF_TOKEN"))
+        files = api.list_repo_files(repo_id=REFERENCE_AUDIO_DATASET, repo_type="dataset")
+        # 只下载 wav 文件
+        wav_files = [f for f in files if f.startswith(REFERENCE_AUDIO_PATTERN) and f.endswith(".wav")]
+        for f in wav_files:
+            local_path = hf_hub_download(
+                repo_id=REFERENCE_AUDIO_DATASET,
+                filename=f,
+                repo_type="dataset",
+                local_dir=REFERENCE_AUDIO_DIR,
+                token=os.getenv("HF_TOKEN"),
+            )
+            reference_audio_files.append(local_path)
+        print(f"Downloaded {len(reference_audio_files)} reference audios.")
+    except Exception as e:
+        print(f"Error downloading reference audios: {e}")
+        reference_audio_files = []
 # Store active TTS sessions
 app.tts_sessions = {}
     temp_audio_path = None # Initialize to None
     try:
         app.logger.debug(f"[TTS Gen {model_id}] Starting generation for: '{text[:30]}...'")
+        # 随机选一个参考音频
+        reference_audio_path = None
+        if reference_audio_files:
+            reference_audio_path = random.choice(reference_audio_files)
+        app.logger.debug(f"[TTS Gen {model_id}] Using reference audio: {reference_audio_path}")
         # If predict_tts saves file itself and returns path:
+        temp_audio_path = predict_tts(text, model_id, reference_audio_path=reference_audio_path)
         app.logger.debug(f"[TTS Gen {model_id}] predict_tts returned: {temp_audio_path}")
         if not temp_audio_path or not os.path.exists(temp_audio_path):
         # Move the file generated by predict_tts to the target cache directory
         shutil.move(temp_audio_path, dest_path)
         app.logger.debug(f"[TTS Gen {model_id}] Move successful. Returning {dest_path}")
+        return dest_path, reference_audio_path
     except Exception as e:
         app.logger.error(f"Error generating/saving TTS for model {model_id} and text '{text[:30]}...': {str(e)}")
                 os.remove(temp_audio_path)
             except OSError:
                 pass # Ignore error if file couldn't be removed
+        return None, None
 def _generate_cache_entry_task(sentence):
                 future_b = audio_executor.submit(generate_and_save_tts, sentence, model_b_id, CACHE_AUDIO_DIR)
                 timeout_seconds = 120
+                audio_a_path, ref_a = future_a.result(timeout=timeout_seconds)
+                audio_b_path, ref_b = future_b.result(timeout=timeout_seconds)
             if audio_a_path and audio_b_path:
                 with tts_cache_lock:
                             "model_b": model_b_id,
                             "audio_a": audio_a_path,
                             "audio_b": audio_b_path,
+                            "ref_a": ref_a,
+                            "ref_b": ref_b,
                             "created_at": datetime.utcnow(),
                         }
                         app.logger.info(f"Successfully cached entry for: '{sentence[:50]}...'")
     db_path = app.config["SQLALCHEMY_DATABASE_URI"].replace("sqlite:///", "instance/") # Get relative path
     preferences_repo_id = "kemuriririn/arena-preferences"
+    database_repo_id = "kemuriririn/database-arena"
     votes_dir = "./votes"
     def sync_database():
 @app.route("/api/tts/cached-sentences")
 def get_cached_sentences():
+    """Returns a list of sentences currently available in the TTS cache, with reference audio."""
     with tts_cache_lock:
+        cached = [
+            {
+                "sentence": k,
+                "model_a": v["model_a"],
+                "model_b": v["model_b"],
+                "ref_a": os.path.relpath(v["ref_a"], start=REFERENCE_AUDIO_DIR) if v.get("ref_a") else None,
+                "ref_b": os.path.relpath(v["ref_b"], start=REFERENCE_AUDIO_DIR) if v.get("ref_b") else None,
+            }
+            for k, v in tts_cache.items()
+        ]
+    return jsonify(cached)
+@app.route("/api/tts/reference-audio/<filename>")
+def get_reference_audio(filename):
+    """试听参考音频"""
+    file_path = os.path.join(REFERENCE_AUDIO_DIR, filename)
+    if not os.path.exists(file_path):
+        return jsonify({"error": "Reference audio not found"}), 404
+    return send_file(file_path, mimetype="audio/wav")
 def get_weighted_random_models(
              except Exception as e:
                  print(f"Error downloading database from HF dataset: {str(e)} ⚠️")
+        download_reference_audios()
         db.create_all()  # Create tables if they don't exist
         insert_initial_models()

models.py CHANGED Viewed

@@ -446,7 +446,7 @@ def insert_initial_models():
             name="Spark TTS",
             model_type=ModelType.TTS,
             is_open=False,
-            is_active=False, # API stopped working
             model_url="https://github.com/SparkAudio/Spark-TTS",
         ),
         # Model(

             name="Spark TTS",
             model_type=ModelType.TTS,
             is_open=False,
+            is_active=True, # API stopped working
             model_url="https://github.com/SparkAudio/Spark-TTS",
         ),
         # Model(