import gradio as gr import base64 import numpy as np from scipy.io import wavfile from voice_processing import parallel_tts, get_model_names import os import logging from rvc_service import RVCService # Our new service import asyncio from voice_processing import parallel_tts, get_model_names import sys from datetime import datetime # Add this import import traceback import json # Set up enhanced logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s | %(levelname)s | %(name)s | %(message)s', handlers=[ logging.FileHandler('rvc_server.log'), logging.StreamHandler(sys.stdout) ] ) logger = logging.getLogger('rvc_server') # Initialize RVC Service rvc_service = RVCService() def setup_request_logging(): """Creates a unique logger for request handling""" request_id = datetime.now().strftime('%Y%m%d_%H%M%S_%f') logger = logging.getLogger(f'request_{request_id}') return logger, request_id def convert_tts(model_name, audio_file, slang_rate): """Voice conversion endpoint""" req_logger, request_id = setup_request_logging() try: req_logger.info(f"New request received - ID: {request_id}") req_logger.info(f"Parameters: model={model_name}, slang_rate={slang_rate}") if audio_file is None: req_logger.error("No audio file provided") return {"error": "No audio file uploaded."}, None try: req_logger.info(f"Processing audio file: {audio_file}") sr, audio = wavfile.read(audio_file) req_logger.info(f"Audio loaded: sr={sr}Hz, shape={audio.shape}") # Create task for parallel processing task = (model_name, None, None, slang_rate, True, audio_file) req_logger.info("Running parallel processing") result = parallel_tts([task]) if not result or result[0] is None: req_logger.error("Processing failed - no result") return {"error": "Processing failed"}, None # Get the result and return directly result_tuple = result[0] # Check if result_tuple is in the expected format if isinstance(result_tuple, tuple) and len(result_tuple) == 3: info, _, (tgt_sr, audio_output) = result_tuple if audio_output is None: req_logger.error("No audio output generated") return {"error": "No audio output generated"}, None # Save the output output_filename = f"output_{request_id}.wav" output_path = os.path.join("outputs", output_filename) os.makedirs("outputs", exist_ok=True) if isinstance(audio_output, np.ndarray): req_logger.info(f"Saving numpy array output: shape={audio_output.shape}") wavfile.write(output_path, tgt_sr, audio_output) else: req_logger.info("Saving raw audio output") with open(output_path, "wb") as f: f.write(audio_output) req_logger.info(f"Successfully saved to {output_path}") return {"info": info}, output_path else: req_logger.error(f"Invalid result format: {result_tuple}") return {"error": "Invalid result format"}, None except Exception as e: req_logger.error(f"Error processing audio: {str(e)}") return {"error": f"Processing error: {str(e)}"}, None except Exception as e: req_logger.error(f"Unexpected error: {str(e)}") return {"error": str(e)}, None # Create the Gradio interface with queue iface = gr.Interface( fn=convert_tts, # Remove async if it's there inputs=[ gr.Dropdown(choices=get_model_names(), label="Model", interactive=True), gr.Audio(label="Upload Audio", type="filepath"), gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), ], outputs=[ gr.JSON(label="Info"), gr.Audio(label="Converted Audio") ], title="Voice Conversion" ).queue() if __name__ == "__main__": logger.info("Starting RVC server") try: iface.launch( debug=True, show_error=True, max_threads=10 ) except Exception as e: logger.error(f"Error launching server: {e}", exc_info=True)