| |
| |
| |
| |
|
|
| import os |
| import wave |
| from config import ( |
| SUPPORTED_AUDIO_EXTENSIONS, |
| AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES, |
| MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES |
| ) |
|
|
| def build_format_display_names_from_supported_extensions(): |
| format_display_names = {} |
|
|
| for extension in SUPPORTED_AUDIO_EXTENSIONS: |
| format_code = extension.lstrip(".") |
|
|
| if format_code in AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES: |
| format_display_names[format_code] = AUDIO_FORMAT_DISPLAY_NAME_OVERRIDES[format_code] |
| else: |
| format_display_names[format_code] = format_code.upper() |
|
|
| format_display_names["unknown"] = "Unknown" |
|
|
| return format_display_names |
|
|
| FORMAT_DISPLAY_NAMES = build_format_display_names_from_supported_extensions() |
|
|
| def get_audio_file_extension(file_path): |
| if not file_path: |
| return None |
|
|
| _, extension = os.path.splitext(file_path) |
|
|
| return extension.lower() |
|
|
| def is_supported_audio_extension(file_path): |
| extension = get_audio_file_extension(file_path) |
|
|
| if extension is None: |
| return False |
|
|
| return extension in SUPPORTED_AUDIO_EXTENSIONS |
|
|
| def format_file_size_for_display(size_bytes): |
| if size_bytes < 1024: |
| return f"{size_bytes} bytes" |
|
|
| elif size_bytes < 1024 * 1024: |
| return f"{size_bytes / 1024:.1f} KB" |
|
|
| else: |
| return f"{size_bytes / (1024 * 1024):.2f} MB" |
|
|
| def validate_file_size_for_voice_cloning(file_path): |
| if not file_path: |
| return False, "No audio file provided." |
|
|
| try: |
| file_size = os.path.getsize(file_path) |
| |
| except OSError as size_error: |
| return False, f"Cannot read file size: {str(size_error)}" |
|
|
| if file_size > MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES: |
| max_size_display = format_file_size_for_display(MAXIMUM_VOICE_CLONE_FILE_SIZE_BYTES) |
| actual_size_display = format_file_size_for_display(file_size) |
| return False, f"Audio file size ({actual_size_display}) exceeds the maximum allowed size of {max_size_display}. Please upload a smaller audio file." |
|
|
| return True, None |
|
|
| def validate_file_exists_and_readable(file_path): |
| if not file_path: |
| return False, "No audio file provided." |
|
|
| if not os.path.exists(file_path): |
| return False, "Audio file does not exist." |
|
|
| if not os.path.isfile(file_path): |
| return False, "The provided path is not a valid file." |
|
|
| try: |
| file_size = os.path.getsize(file_path) |
|
|
| except OSError as size_error: |
| return False, f"Cannot read file size: {str(size_error)}" |
|
|
| if file_size == 0: |
| return False, "Audio file is empty (0 bytes)." |
|
|
| if file_size < 44: |
| return False, "Audio file is too small to be a valid audio file." |
|
|
| try: |
| with open(file_path, "rb") as test_file: |
| test_file.read(1) |
| except IOError as read_error: |
| return False, f"Audio file is not readable: {str(read_error)}" |
|
|
| return True, None |
|
|
| def detect_audio_format_from_header(file_path): |
| try: |
| with open(file_path, "rb") as audio_file: |
| header_bytes = audio_file.read(32) |
|
|
| if len(header_bytes) < 4: |
| return None, "File is too small to determine audio format." |
|
|
| if len(header_bytes) >= 12: |
| if header_bytes[:4] == b"RIFF" and header_bytes[8:12] == b"WAVE": |
| return "wav", None |
|
|
| if header_bytes[:3] == b"ID3": |
| return "mp3", None |
|
|
| if len(header_bytes) >= 2: |
| first_two_bytes = header_bytes[:2] |
| |
| mp3_sync_bytes = [ |
| b"\xff\xfb", |
| b"\xff\xfa", |
| b"\xff\xf3", |
| b"\xff\xf2", |
| b"\xff\xe0", |
| b"\xff\xe2", |
| b"\xff\xe3" |
| ] |
|
|
| if first_two_bytes in mp3_sync_bytes: |
| return "mp3", None |
|
|
| if header_bytes[:4] == b"fLaC": |
| return "flac", None |
|
|
| if header_bytes[:4] == b"OggS": |
| return "ogg", None |
|
|
| if len(header_bytes) >= 12: |
| if header_bytes[:4] == b"FORM" and header_bytes[8:12] in [b"AIFF", b"AIFC"]: |
| return "aiff", None |
|
|
| if len(header_bytes) >= 8: |
| if header_bytes[4:8] == b"ftyp": |
| return "m4a", None |
|
|
| if len(header_bytes) >= 4: |
| if header_bytes[:4] == b"\x1aE\xdf\xa3": |
| return "webm", None |
|
|
| if len(header_bytes) >= 8: |
| if header_bytes[4:8] in [b"mdat", b"moov", b"free", b"skip", b"wide"]: |
| return "m4a", None |
|
|
| file_extension = get_audio_file_extension(file_path) |
|
|
| if file_extension and file_extension in SUPPORTED_AUDIO_EXTENSIONS: |
| return file_extension.lstrip("."), None |
|
|
| return "unknown", "Could not determine audio format from file header. The file may be corrupted or in an unsupported format." |
|
|
| except IOError as io_error: |
| return None, f"Error reading file header: {str(io_error)}" |
|
|
| except Exception as detection_error: |
| return None, f"Unexpected error detecting audio format: {str(detection_error)}" |
|
|
| def validate_wav_file_structure(file_path): |
| try: |
| with wave.open(file_path, "rb") as wav_file: |
| number_of_channels = wav_file.getnchannels() |
| sample_width_bytes = wav_file.getsampwidth() |
| sample_rate = wav_file.getframerate() |
| number_of_frames = wav_file.getnframes() |
|
|
| if number_of_channels < 1: |
| return False, "WAV file has no audio channels." |
|
|
| if number_of_channels > 16: |
| return False, f"WAV file has too many channels ({number_of_channels}). Maximum supported is 16." |
|
|
| if sample_width_bytes < 1: |
| return False, "WAV file has invalid sample width (less than 1 byte)." |
|
|
| if sample_width_bytes > 4: |
| return False, f"WAV file has unsupported sample width ({sample_width_bytes} bytes). Maximum supported is 4 bytes (32-bit)." |
|
|
| if sample_rate < 100: |
| return False, f"WAV file has invalid sample rate ({sample_rate} Hz). Minimum supported is 100 Hz." |
|
|
| if sample_rate > 384000: |
| return False, f"WAV file has unsupported sample rate ({sample_rate} Hz). Maximum supported is 384000 Hz." |
|
|
| if number_of_frames < 1: |
| return False, "WAV file contains no audio frames." |
|
|
| audio_duration_seconds = number_of_frames / sample_rate |
|
|
| if audio_duration_seconds < 0.1: |
| return False, f"Audio is too short ({audio_duration_seconds:.2f} seconds). Minimum duration is 0.1 seconds." |
|
|
| if audio_duration_seconds > 60: |
| return False, f"Audio is too long ({audio_duration_seconds:.0f} seconds). Maximum duration is 1 minute." |
|
|
| return True, None |
|
|
| except wave.Error as wav_error: |
| error_message = str(wav_error) |
|
|
| if "file does not start with RIFF id" in error_message: |
| return False, "File has .wav extension but is not a valid WAV file. It may be a different audio format renamed to .wav." |
|
|
| if "unknown format" in error_message.lower(): |
| return False, "WAV file uses an unsupported audio encoding format." |
|
|
| return False, f"Invalid WAV file structure: {error_message}" |
|
|
| except EOFError: |
| return False, "WAV file is truncated or corrupted (unexpected end of file)." |
|
|
| except Exception as validation_error: |
| return False, f"Error validating WAV file: {str(validation_error)}" |
|
|
| def perform_comprehensive_audio_validation(file_path): |
| file_exists_valid, file_exists_error = validate_file_exists_and_readable(file_path) |
|
|
| if not file_exists_valid: |
| return False, False, None, file_exists_error |
|
|
| file_extension = get_audio_file_extension(file_path) |
|
|
| if not is_supported_audio_extension(file_path): |
| supported_formats_list = ", ".join(SUPPORTED_AUDIO_EXTENSIONS) |
| return False, False, None, f"Unsupported file format '{file_extension}'. Supported formats are: {supported_formats_list}" |
|
|
| detected_format, detection_error = detect_audio_format_from_header(file_path) |
|
|
| if detected_format is None: |
| return False, False, None, detection_error |
|
|
| is_wav_format = (detected_format == "wav") |
|
|
| if is_wav_format: |
| wav_structure_valid, wav_structure_error = validate_wav_file_structure(file_path) |
|
|
| if not wav_structure_valid: |
| return False, True, "wav", wav_structure_error |
|
|
| return True, is_wav_format, detected_format, None |
|
|
| def perform_voice_clone_file_validation(file_path): |
| file_size_valid, file_size_error = validate_file_size_for_voice_cloning(file_path) |
|
|
| if not file_size_valid: |
| return False, False, None, file_size_error |
|
|
| return perform_comprehensive_audio_validation(file_path) |
|
|
| def get_format_display_name(format_code): |
| if format_code is None: |
| return "Unknown" |
|
|
| if format_code in FORMAT_DISPLAY_NAMES: |
| return FORMAT_DISPLAY_NAMES[format_code] |
|
|
| return format_code.upper() |