|
print("starting...") |
|
|
|
import argparse |
|
|
|
language_options = [ |
|
"en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko" |
|
] |
|
char_limits = { |
|
"en": 250, |
|
"es": 239, |
|
"fr": 273, |
|
"de": 253, |
|
"it": 213, |
|
"pt": 203, |
|
"pl": 224, |
|
"tr": 226, |
|
"ru": 182, |
|
"nl": 251, |
|
"cs": 186, |
|
"ar": 166, |
|
"zh-cn": 82, |
|
"ja": 71, |
|
"hu": 224, |
|
"ko": 95, |
|
} |
|
|
|
|
|
language_mapping = { |
|
"en": "english", |
|
"de": "german", |
|
"fr": "french", |
|
"es": "spanish", |
|
"it": "italian", |
|
"pt": "portuguese", |
|
"nl": "dutch", |
|
"pl": "polish", |
|
"cs": "czech", |
|
"ru": "russian", |
|
"tr": "turkish", |
|
"el": "greek", |
|
"et": "estonian", |
|
"no": "norwegian", |
|
"ml": "malayalam", |
|
"sl": "slovene", |
|
"da": "danish", |
|
"fi": "finnish", |
|
"sv": "swedish" |
|
} |
|
|
|
|
|
|
|
language_options_str = ", ".join(language_options) |
|
|
|
|
|
parser = argparse.ArgumentParser( |
|
description="Convert eBooks to Audiobooks using a Text-to-Speech model. You can either launch the Gradio interface or run the script in headless mode for direct conversion.", |
|
epilog="Example: python script.py --headless --ebook path_to_ebook --voice path_to_voice --language en --use_custom_model True --custom_model model.pth --custom_config config.json --custom_vocab vocab.json" |
|
) |
|
parser.add_argument("--share", type=bool, default=False, help="Set to True to enable a public shareable Gradio link. Defaults to False.") |
|
parser.add_argument("--headless", type=bool, default=False, help="Set to True to run in headless mode without the Gradio interface. Defaults to False.") |
|
parser.add_argument("--ebook", type=str, help="Path to the ebook file for conversion. Required in headless mode.") |
|
parser.add_argument("--voice", type=str, help="Path to the target voice file for TTS. Optional, uses a default voice if not provided.") |
|
parser.add_argument("--language", type=str, default="en", |
|
help=f"Language for the audiobook conversion. Options: {language_options_str}. Defaults to English (en).") |
|
parser.add_argument("--use_custom_model", type=bool, default=False, |
|
help="Set to True to use a custom TTS model. Defaults to False. Must be True to use custom models, otherwise you'll get an error.") |
|
parser.add_argument("--custom_model", type=str, help="Path to the custom model file (.pth). Required if using a custom model.") |
|
parser.add_argument("--custom_config", type=str, help="Path to the custom config file (config.json). Required if using a custom model.") |
|
parser.add_argument("--custom_vocab", type=str, help="Path to the custom vocab file (vocab.json). Required if using a custom model.") |
|
parser.add_argument("--custom_model_url", type=str, |
|
help=("URL to download the custom model as a zip file. Optional, but will be used if provided. " |
|
"Examples include David Attenborough's model: " |
|
"'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. " |
|
"More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'.")) |
|
args = parser.parse_args() |
|
|
|
|
|
|
|
import os |
|
import shutil |
|
import subprocess |
|
import re |
|
from pydub import AudioSegment |
|
import tempfile |
|
from pydub import AudioSegment |
|
import nltk |
|
from nltk.tokenize import sent_tokenize |
|
import sys |
|
import torch |
|
from TTS.api import TTS |
|
from TTS.tts.configs.xtts_config import XttsConfig |
|
from TTS.tts.models.xtts import Xtts |
|
from tqdm import tqdm |
|
import gradio as gr |
|
from gradio import Progress |
|
import urllib.request |
|
import zipfile |
|
import socket |
|
|
|
|
|
|
|
|
|
|
|
|
|
import import_locally_stored_tts_model_files |
|
|
|
|
|
nltk.data.path.append('/home/user/app/nltk_data') |
|
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(f"Device selected is: {device}") |
|
|
|
|
|
|
|
|
|
def download_and_extract_zip(url, extract_to='.'): |
|
try: |
|
|
|
os.makedirs(extract_to, exist_ok=True) |
|
|
|
zip_path = os.path.join(extract_to, 'model.zip') |
|
|
|
|
|
with tqdm(unit='B', unit_scale=True, miniters=1, desc="Downloading Model") as t: |
|
def reporthook(blocknum, blocksize, totalsize): |
|
t.total = totalsize |
|
t.update(blocknum * blocksize - t.n) |
|
|
|
urllib.request.urlretrieve(url, zip_path, reporthook=reporthook) |
|
print(f"Downloaded zip file to {zip_path}") |
|
|
|
|
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref: |
|
files = zip_ref.namelist() |
|
with tqdm(total=len(files), unit="file", desc="Extracting Files") as t: |
|
for file in files: |
|
if not file.endswith('/'): |
|
|
|
extracted_path = zip_ref.extract(file, extract_to) |
|
|
|
base_file_path = os.path.join(extract_to, os.path.basename(file)) |
|
os.rename(extracted_path, base_file_path) |
|
t.update(1) |
|
|
|
|
|
os.remove(zip_path) |
|
for root, dirs, files in os.walk(extract_to, topdown=False): |
|
for name in dirs: |
|
os.rmdir(os.path.join(root, name)) |
|
print(f"Extracted files to {extract_to}") |
|
|
|
|
|
required_files = ['model.pth', 'config.json', 'vocab.json_'] |
|
missing_files = [file for file in required_files if not os.path.exists(os.path.join(extract_to, file))] |
|
|
|
if not missing_files: |
|
print("All required files (model.pth, config.json, vocab.json_) found.") |
|
else: |
|
print(f"Missing files: {', '.join(missing_files)}") |
|
|
|
except Exception as e: |
|
print(f"Failed to download or extract zip file: {e}") |
|
|
|
|
|
|
|
def is_folder_empty(folder_path): |
|
if os.path.exists(folder_path) and os.path.isdir(folder_path): |
|
|
|
if not os.listdir(folder_path): |
|
return True |
|
else: |
|
return False |
|
else: |
|
print(f"The path {folder_path} is not a valid folder.") |
|
return None |
|
|
|
def remove_folder_with_contents(folder_path): |
|
try: |
|
shutil.rmtree(folder_path) |
|
print(f"Successfully removed {folder_path} and all of its contents.") |
|
except Exception as e: |
|
print(f"Error removing {folder_path}: {e}") |
|
|
|
|
|
|
|
|
|
def wipe_folder(folder_path): |
|
|
|
if not os.path.exists(folder_path): |
|
print(f"The folder {folder_path} does not exist.") |
|
return |
|
|
|
|
|
for item in os.listdir(folder_path): |
|
item_path = os.path.join(folder_path, item) |
|
|
|
if os.path.isfile(item_path): |
|
os.remove(item_path) |
|
print(f"Removed file: {item_path}") |
|
|
|
elif os.path.isdir(item_path): |
|
shutil.rmtree(item_path) |
|
print(f"Removed directory and its contents: {item_path}") |
|
|
|
print(f"All contents wiped from {folder_path}.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_m4b_from_chapters(input_dir, ebook_file, output_dir): |
|
|
|
def sort_key(chapter_file): |
|
numbers = re.findall(r'\d+', chapter_file) |
|
return int(numbers[0]) if numbers else 0 |
|
|
|
|
|
def extract_metadata_and_cover(ebook_path): |
|
try: |
|
cover_path = ebook_path.rsplit('.', 1)[0] + '.jpg' |
|
subprocess.run(['ebook-meta', ebook_path, '--get-cover', cover_path], check=True) |
|
if os.path.exists(cover_path): |
|
return cover_path |
|
except Exception as e: |
|
print(f"Error extracting eBook metadata or cover: {e}") |
|
return None |
|
|
|
def combine_wav_files(chapter_files, output_path, batch_size=256): |
|
|
|
combined_audio = AudioSegment.empty() |
|
|
|
|
|
for i in range(0, len(chapter_files), batch_size): |
|
batch_files = chapter_files[i:i + batch_size] |
|
batch_audio = AudioSegment.empty() |
|
|
|
|
|
for chapter_file in batch_files: |
|
audio_segment = AudioSegment.from_wav(chapter_file) |
|
batch_audio += audio_segment |
|
|
|
|
|
combined_audio += batch_audio |
|
|
|
|
|
combined_audio.export(output_path, format='wav') |
|
print(f"Combined audio saved to {output_path}") |
|
|
|
|
|
def generate_ffmpeg_metadata(chapter_files, metadata_file): |
|
with open(metadata_file, 'w') as file: |
|
file.write(';FFMETADATA1\n') |
|
start_time = 0 |
|
for index, chapter_file in enumerate(chapter_files): |
|
duration_ms = len(AudioSegment.from_wav(chapter_file)) |
|
file.write(f'[CHAPTER]\nTIMEBASE=1/1000\nSTART={start_time}\n') |
|
file.write(f'END={start_time + duration_ms}\ntitle=Chapter {index + 1}\n') |
|
start_time += duration_ms |
|
|
|
|
|
def create_m4b(combined_wav, metadata_file, cover_image, output_m4b): |
|
|
|
os.makedirs(os.path.dirname(output_m4b), exist_ok=True) |
|
|
|
ffmpeg_cmd = ['ffmpeg', '-i', combined_wav, '-i', metadata_file] |
|
if cover_image: |
|
ffmpeg_cmd += ['-i', cover_image, '-map', '0:a', '-map', '2:v'] |
|
else: |
|
ffmpeg_cmd += ['-map', '0:a'] |
|
|
|
ffmpeg_cmd += ['-map_metadata', '1', '-c:a', 'aac', '-b:a', '192k'] |
|
if cover_image: |
|
ffmpeg_cmd += ['-c:v', 'png', '-disposition:v', 'attached_pic'] |
|
ffmpeg_cmd += [output_m4b] |
|
|
|
subprocess.run(ffmpeg_cmd, check=True) |
|
|
|
|
|
|
|
|
|
chapter_files = sorted([os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith('.wav')], key=sort_key) |
|
temp_dir = tempfile.gettempdir() |
|
temp_combined_wav = os.path.join(temp_dir, 'combined.wav') |
|
metadata_file = os.path.join(temp_dir, 'metadata.txt') |
|
cover_image = extract_metadata_and_cover(ebook_file) |
|
output_m4b = os.path.join(output_dir, os.path.splitext(os.path.basename(ebook_file))[0] + '.m4b') |
|
|
|
combine_wav_files(chapter_files, temp_combined_wav) |
|
generate_ffmpeg_metadata(chapter_files, metadata_file) |
|
create_m4b(temp_combined_wav, metadata_file, cover_image, output_m4b) |
|
|
|
|
|
if os.path.exists(temp_combined_wav): |
|
os.remove(temp_combined_wav) |
|
if os.path.exists(metadata_file): |
|
os.remove(metadata_file) |
|
if cover_image and os.path.exists(cover_image): |
|
os.remove(cover_image) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import subprocess |
|
import ebooklib |
|
from ebooklib import epub |
|
from bs4 import BeautifulSoup |
|
import re |
|
import csv |
|
import nltk |
|
|
|
|
|
def create_chapter_labeled_book(ebook_file_path): |
|
|
|
def ensure_directory(directory_path): |
|
if not os.path.exists(directory_path): |
|
os.makedirs(directory_path) |
|
print(f"Created directory: {directory_path}") |
|
|
|
ensure_directory(os.path.join(".", 'Working_files', 'Book')) |
|
|
|
def convert_to_epub(input_path, output_path): |
|
|
|
try: |
|
subprocess.run(['ebook-convert', input_path, output_path], check=True) |
|
except subprocess.CalledProcessError as e: |
|
print(f"An error occurred while converting the eBook: {e}") |
|
return False |
|
return True |
|
|
|
def save_chapters_as_text(epub_path): |
|
|
|
directory = os.path.join(".", "Working_files", "temp_ebook") |
|
ensure_directory(directory) |
|
|
|
|
|
book = epub.read_epub(epub_path) |
|
|
|
previous_chapter_text = '' |
|
previous_filename = '' |
|
chapter_counter = 0 |
|
|
|
|
|
for item in book.get_items(): |
|
if item.get_type() == ebooklib.ITEM_DOCUMENT: |
|
|
|
soup = BeautifulSoup(item.get_content(), 'html.parser') |
|
text = soup.get_text() |
|
|
|
|
|
if text.strip(): |
|
if len(text) < 2300 and previous_filename: |
|
|
|
with open(previous_filename, 'a', encoding='utf-8') as file: |
|
file.write('\n' + text) |
|
else: |
|
|
|
previous_filename = os.path.join(directory, f"chapter_{chapter_counter}.txt") |
|
chapter_counter += 1 |
|
with open(previous_filename, 'w', encoding='utf-8') as file: |
|
file.write(text) |
|
print(f"Saved chapter: {previous_filename}") |
|
|
|
|
|
input_ebook = ebook_file_path |
|
output_epub = os.path.join(".", "Working_files", "temp.epub") |
|
|
|
|
|
if os.path.exists(output_epub): |
|
os.remove(output_epub) |
|
print(f"File {output_epub} has been removed.") |
|
else: |
|
print(f"The file {output_epub} does not exist.") |
|
|
|
if convert_to_epub(input_ebook, output_epub): |
|
save_chapters_as_text(output_epub) |
|
|
|
|
|
|
|
|
|
def process_chapter_files(folder_path, output_csv): |
|
with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile: |
|
writer = csv.writer(csvfile) |
|
|
|
writer.writerow(['Text', 'Start Location', 'End Location', 'Is Quote', 'Speaker', 'Chapter']) |
|
|
|
|
|
chapter_files = sorted(os.listdir(folder_path), key=lambda x: int(x.split('_')[1].split('.')[0])) |
|
for filename in chapter_files: |
|
if filename.startswith('chapter_') and filename.endswith('.txt'): |
|
chapter_number = int(filename.split('_')[1].split('.')[0]) |
|
file_path = os.path.join(folder_path, filename) |
|
|
|
try: |
|
with open(file_path, 'r', encoding='utf-8') as file: |
|
text = file.read() |
|
|
|
if text: |
|
text = "NEWCHAPTERABC" + text |
|
sentences = nltk.tokenize.sent_tokenize(text) |
|
for sentence in sentences: |
|
start_location = text.find(sentence) |
|
end_location = start_location + len(sentence) |
|
writer.writerow([sentence, start_location, end_location, 'True', 'Narrator', chapter_number]) |
|
except Exception as e: |
|
print(f"Error processing file {filename}: {e}") |
|
|
|
|
|
folder_path = os.path.join(".", "Working_files", "temp_ebook") |
|
output_csv = os.path.join(".", "Working_files", "Book", "Other_book.csv") |
|
|
|
process_chapter_files(folder_path, output_csv) |
|
|
|
def sort_key(filename): |
|
"""Extract chapter number for sorting.""" |
|
match = re.search(r'chapter_(\d+)\.txt', filename) |
|
return int(match.group(1)) if match else 0 |
|
|
|
def combine_chapters(input_folder, output_file): |
|
|
|
os.makedirs(os.path.dirname(output_file), exist_ok=True) |
|
|
|
|
|
files = [f for f in os.listdir(input_folder) if f.endswith('.txt')] |
|
sorted_files = sorted(files, key=sort_key) |
|
|
|
with open(output_file, 'w', encoding='utf-8') as outfile: |
|
for i, filename in enumerate(sorted_files): |
|
with open(os.path.join(input_folder, filename), 'r', encoding='utf-8') as infile: |
|
outfile.write(infile.read()) |
|
|
|
if i < len(sorted_files) - 1: |
|
outfile.write("\nNEWCHAPTERABC\n") |
|
|
|
|
|
input_folder = os.path.join(".", 'Working_files', 'temp_ebook') |
|
output_file = os.path.join(".", 'Working_files', 'Book', 'Chapter_Book.txt') |
|
|
|
|
|
|
|
combine_chapters(input_folder, output_file) |
|
|
|
ensure_directory(os.path.join(".", "Working_files", "Book")) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import subprocess |
|
import sys |
|
import torchaudio |
|
|
|
|
|
def calibre_installed(): |
|
try: |
|
subprocess.run(['ebook-convert', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
|
return True |
|
except FileNotFoundError: |
|
print("Calibre is not installed. Please install Calibre for this functionality.") |
|
return False |
|
|
|
|
|
import os |
|
import torch |
|
from TTS.api import TTS |
|
from nltk.tokenize import sent_tokenize |
|
from pydub import AudioSegment |
|
|
|
default_target_voice_path = "default_voice.wav" |
|
default_language_code = "en" |
|
|
|
|
|
|
|
def rename_vocab_file_if_exists(directory): |
|
vocab_path = os.path.join(directory, 'vocab.json') |
|
new_vocab_path = os.path.join(directory, 'vocab.json_') |
|
|
|
|
|
if os.path.exists(vocab_path): |
|
|
|
os.rename(vocab_path, new_vocab_path) |
|
print(f"Renamed {vocab_path} to {new_vocab_path}") |
|
return True |
|
|
|
|
|
def combine_wav_files(input_directory, output_directory, file_name): |
|
|
|
os.makedirs(output_directory, exist_ok=True) |
|
|
|
|
|
output_file_path = os.path.join(output_directory, file_name) |
|
|
|
|
|
combined_audio = AudioSegment.empty() |
|
|
|
|
|
input_file_paths = sorted( |
|
[os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith(".wav")], |
|
key=lambda f: int(''.join(filter(str.isdigit, f))) |
|
) |
|
|
|
|
|
for input_file_path in input_file_paths: |
|
audio_segment = AudioSegment.from_wav(input_file_path) |
|
combined_audio += audio_segment |
|
|
|
|
|
combined_audio.export(output_file_path, format='wav') |
|
|
|
print(f"Combined audio saved to {output_file_path}") |
|
|
|
|
|
|
|
def split_long_sentence(sentence, language='en', max_pauses=10): |
|
""" |
|
Splits a sentence into parts based on length or number of pauses without recursion. |
|
|
|
:param sentence: The sentence to split. |
|
:param language: The language of the sentence (default is English). |
|
:param max_pauses: Maximum allowed number of pauses in a sentence. |
|
:return: A list of sentence parts that meet the criteria. |
|
""" |
|
|
|
max_length = (char_limits.get(language, 250)-2) |
|
|
|
|
|
if language == 'zh-cn': |
|
punctuation = ['๏ผ', 'ใ', '๏ผ', '๏ผ', '๏ผ'] |
|
elif language == 'ja': |
|
punctuation = ['ใ', 'ใ', '๏ผ', '๏ผ', '๏ผ'] |
|
elif language == 'ko': |
|
punctuation = ['๏ผ', 'ใ', '๏ผ', '๏ผ', '๏ผ'] |
|
elif language == 'ar': |
|
punctuation = ['ุ', 'ุ', 'ุ', '!', 'ยท', 'ุ', '.'] |
|
elif language == 'en': |
|
punctuation = [',', ';', '.'] |
|
else: |
|
|
|
punctuation = [',', '.', ';', ':', '?', '!'] |
|
|
|
|
|
|
|
parts = [] |
|
while len(sentence) > max_length or sum(sentence.count(p) for p in punctuation) > max_pauses: |
|
possible_splits = [i for i, char in enumerate(sentence) if char in punctuation and i < max_length] |
|
if possible_splits: |
|
|
|
split_at = possible_splits[-1] + 1 |
|
else: |
|
|
|
split_at = max_length |
|
|
|
|
|
parts.append(sentence[:split_at].strip()) |
|
sentence = sentence[split_at:].strip() |
|
|
|
|
|
parts.append(sentence) |
|
return parts |
|
|
|
""" |
|
if 'tts' not in locals(): |
|
tts = TTS(selected_tts_model, progress_bar=True).to(device) |
|
""" |
|
from tqdm import tqdm |
|
|
|
|
|
|
|
def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, target_voice_path=None, language=None, custom_model=None): |
|
|
|
if target_voice_path==None: |
|
target_voice_path = default_target_voice_path |
|
|
|
if custom_model: |
|
print("Loading custom model...") |
|
config = XttsConfig() |
|
config.load_json(custom_model['config']) |
|
model = Xtts.init_from_config(config) |
|
model.load_checkpoint(config, checkpoint_path=custom_model['model'], vocab_path=custom_model['vocab'], use_deepspeed=False) |
|
model.to(device) |
|
print("Computing speaker latents...") |
|
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[target_voice_path]) |
|
else: |
|
selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2" |
|
tts = TTS(selected_tts_model, progress_bar=False).to(device) |
|
|
|
if not os.path.exists(output_audio_dir): |
|
os.makedirs(output_audio_dir) |
|
|
|
for chapter_file in sorted(os.listdir(chapters_dir)): |
|
if chapter_file.endswith('.txt'): |
|
match = re.search(r"chapter_(\d+).txt", chapter_file) |
|
if match: |
|
chapter_num = int(match.group(1)) |
|
else: |
|
print(f"Skipping file {chapter_file} as it does not match the expected format.") |
|
continue |
|
|
|
chapter_path = os.path.join(chapters_dir, chapter_file) |
|
output_file_name = f"audio_chapter_{chapter_num}.wav" |
|
output_file_path = os.path.join(output_audio_dir, output_file_name) |
|
temp_audio_directory = os.path.join(".", "Working_files", "temp") |
|
os.makedirs(temp_audio_directory, exist_ok=True) |
|
temp_count = 0 |
|
|
|
with open(chapter_path, 'r', encoding='utf-8') as file: |
|
chapter_text = file.read() |
|
|
|
nltk_language = language_mapping.get(language) |
|
if nltk_language: |
|
|
|
sentences = sent_tokenize(chapter_text, language=nltk_language) |
|
else: |
|
|
|
sentences = [chapter_text] |
|
|
|
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"): |
|
fragments = split_long_sentence(sentence, language=language) |
|
for fragment in fragments: |
|
if fragment != "": |
|
print(f"Generating fragment: {fragment}...") |
|
fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav") |
|
if custom_model: |
|
out = model.inference(fragment, language, gpt_cond_latent, speaker_embedding, temperature=0.7) |
|
torchaudio.save(fragment_file_path, torch.tensor(out["wav"]).unsqueeze(0), 24000) |
|
else: |
|
speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path |
|
language_code = language if language else default_language_code |
|
tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language_code) |
|
temp_count += 1 |
|
|
|
combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name) |
|
wipe_folder(temp_audio_directory) |
|
print(f"Converted chapter {chapter_num} to audio.") |
|
|
|
|
|
|
|
def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, target_voice_path=None, language="en"): |
|
selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2" |
|
tts = TTS(selected_tts_model, progress_bar=False).to(device) |
|
|
|
if not os.path.exists(output_audio_dir): |
|
os.makedirs(output_audio_dir) |
|
|
|
for chapter_file in sorted(os.listdir(chapters_dir)): |
|
if chapter_file.endswith('.txt'): |
|
match = re.search(r"chapter_(\d+).txt", chapter_file) |
|
if match: |
|
chapter_num = int(match.group(1)) |
|
else: |
|
print(f"Skipping file {chapter_file} as it does not match the expected format.") |
|
continue |
|
|
|
chapter_path = os.path.join(chapters_dir, chapter_file) |
|
output_file_name = f"audio_chapter_{chapter_num}.wav" |
|
output_file_path = os.path.join(output_audio_dir, output_file_name) |
|
temp_audio_directory = os.path.join(".", "Working_files", "temp") |
|
os.makedirs(temp_audio_directory, exist_ok=True) |
|
temp_count = 0 |
|
|
|
with open(chapter_path, 'r', encoding='utf-8') as file: |
|
chapter_text = file.read() |
|
|
|
nltk_language = language_mapping.get(language) |
|
if nltk_language: |
|
|
|
sentences = sent_tokenize(chapter_text, language=nltk_language) |
|
else: |
|
|
|
sentences = [chapter_text] |
|
|
|
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"): |
|
fragments = split_long_sentence(sentence, language=language) |
|
for fragment in fragments: |
|
if fragment != "": |
|
print(f"Generating fragment: {fragment}...") |
|
fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav") |
|
speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path |
|
tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language) |
|
temp_count += 1 |
|
|
|
combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name) |
|
wipe_folder(temp_audio_directory) |
|
print(f"Converted chapter {chapter_num} to audio.") |
|
|
|
|
|
|
|
|
|
def convert_ebook_to_audio(ebook_file, target_voice_file, language, use_custom_model, custom_model_file, custom_config_file, custom_vocab_file, custom_model_url=None, progress=gr.Progress()): |
|
ebook_file_path = args.ebook if args.ebook else ebook_file.name |
|
target_voice = args.voice if args.voice else target_voice_file.name if target_voice_file else None |
|
custom_model = None |
|
|
|
|
|
working_files = os.path.join(".", "Working_files", "temp_ebook") |
|
full_folder_working_files = os.path.join(".", "Working_files") |
|
chapters_directory = os.path.join(".", "Working_files", "temp_ebook") |
|
output_audio_directory = os.path.join(".", 'Chapter_wav_files') |
|
remove_folder_with_contents(full_folder_working_files) |
|
remove_folder_with_contents(output_audio_directory) |
|
|
|
|
|
if args.headless and args.language: |
|
language = args.language |
|
else: |
|
language = language |
|
|
|
|
|
if args.use_custom_model and args.custom_model and args.custom_config and args.custom_vocab: |
|
custom_model = { |
|
'model': args.custom_model, |
|
'config': args.custom_config, |
|
'vocab': args.custom_vocab |
|
} |
|
|
|
elif use_custom_model and custom_model_file and custom_config_file and custom_vocab_file: |
|
custom_model = { |
|
'model': custom_model_file.name, |
|
'config': custom_config_file.name, |
|
'vocab': custom_vocab_file.name |
|
} |
|
if (use_custom_model and custom_model_url) or (args.use_custom_model and custom_model_url): |
|
print(f"Received custom model URL: {custom_model_url}") |
|
download_dir = os.path.join(".", "Working_files", "custom_model") |
|
download_and_extract_zip(custom_model_url, download_dir) |
|
|
|
|
|
if rename_vocab_file_if_exists(download_dir): |
|
print("vocab.json file was found and renamed.") |
|
|
|
custom_model = { |
|
'model': os.path.join(download_dir, 'model.pth'), |
|
'config': os.path.join(download_dir, 'config.json'), |
|
'vocab': os.path.join(download_dir, 'vocab.json_') |
|
} |
|
|
|
try: |
|
progress(0, desc="Starting conversion") |
|
except Exception as e: |
|
print(f"Error updating progress: {e}") |
|
|
|
if not calibre_installed(): |
|
return "Calibre is not installed." |
|
|
|
|
|
try: |
|
progress(0.1, desc="Creating chapter-labeled book") |
|
except Exception as e: |
|
print(f"Error updating progress: {e}") |
|
|
|
create_chapter_labeled_book(ebook_file_path) |
|
audiobook_output_path = os.path.join(".", "Audiobooks") |
|
|
|
try: |
|
progress(0.3, desc="Converting chapters to audio") |
|
except Exception as e: |
|
print(f"Error updating progress: {e}") |
|
|
|
if use_custom_model: |
|
convert_chapters_to_audio_custom_model(chapters_directory, output_audio_directory, target_voice, language, custom_model) |
|
else: |
|
convert_chapters_to_audio_standard_model(chapters_directory, output_audio_directory, target_voice, language) |
|
|
|
try: |
|
progress(0.9, desc="Creating M4B from chapters") |
|
except Exception as e: |
|
print(f"Error updating progress: {e}") |
|
|
|
create_m4b_from_chapters(output_audio_directory, ebook_file_path, audiobook_output_path) |
|
|
|
|
|
m4b_filename = os.path.splitext(os.path.basename(ebook_file_path))[0] + '.m4b' |
|
m4b_filepath = os.path.join(audiobook_output_path, m4b_filename) |
|
|
|
try: |
|
progress(1.0, desc="Conversion complete") |
|
except Exception as e: |
|
print(f"Error updating progress: {e}") |
|
print(f"Audiobook created at {m4b_filepath}") |
|
return f"Audiobook created at {m4b_filepath}", m4b_filepath |
|
|
|
|
|
def list_audiobook_files(audiobook_folder): |
|
|
|
files = [] |
|
for filename in os.listdir(audiobook_folder): |
|
if filename.endswith('.m4b'): |
|
files.append(os.path.join(audiobook_folder, filename)) |
|
return files |
|
|
|
def download_audiobooks(): |
|
audiobook_output_path = os.path.join(".", "Audiobooks") |
|
return list_audiobook_files(audiobook_output_path) |
|
|
|
|
|
|
|
def run_gradio_interface(): |
|
language_options = [ |
|
"en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko" |
|
] |
|
|
|
theme = gr.themes.Soft( |
|
primary_hue="blue", |
|
secondary_hue="blue", |
|
neutral_hue="blue", |
|
text_size=gr.themes.sizes.text_md, |
|
) |
|
|
|
with gr.Blocks(theme=theme) as demo: |
|
gr.Markdown( |
|
""" |
|
# eBook to Audiobook Converter |
|
|
|
Transform your eBooks into immersive audiobooks with optional custom TTS models. |
|
|
|
This interface is based on [Ebook2AudioBookXTTS](https://github.com/DrewThomasson/ebook2audiobookXTTS). |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=3): |
|
ebook_file = gr.File(label="eBook File") |
|
target_voice_file = gr.File(label="Target Voice File (Optional)") |
|
language = gr.Dropdown(label="Language", choices=language_options, value="en") |
|
|
|
with gr.Column(scale=3): |
|
use_custom_model = gr.Checkbox(label="Use Custom Model") |
|
custom_model_file = gr.File(label="Custom Model File (Optional)", visible=False) |
|
custom_config_file = gr.File(label="Custom Config File (Optional)", visible=False) |
|
custom_vocab_file = gr.File(label="Custom Vocab File (Optional)", visible=False) |
|
custom_model_url = gr.Textbox(label="Custom Model Zip URL (Optional)", visible=False) |
|
|
|
convert_btn = gr.Button("Convert to Audiobook", variant="primary") |
|
output = gr.Textbox(label="Conversion Status") |
|
audio_player = gr.Audio(label="Audiobook Player", type="filepath") |
|
download_btn = gr.Button("Download Audiobook Files") |
|
download_files = gr.File(label="Download Files", interactive=False) |
|
|
|
convert_btn.click( |
|
convert_ebook_to_audio, |
|
inputs=[ebook_file, target_voice_file, language, use_custom_model, custom_model_file, custom_config_file, custom_vocab_file, custom_model_url], |
|
outputs=[output, audio_player] |
|
) |
|
|
|
use_custom_model.change( |
|
lambda x: [gr.update(visible=x)] * 4, |
|
inputs=[use_custom_model], |
|
outputs=[custom_model_file, custom_config_file, custom_vocab_file, custom_model_url] |
|
) |
|
|
|
download_btn.click( |
|
download_audiobooks, |
|
outputs=[download_files] |
|
) |
|
|
|
|
|
hostname = socket.gethostname() |
|
local_ip = socket.gethostbyname(hostname) |
|
|
|
|
|
print(f"Running on local URL: http://{local_ip}:7860") |
|
print(f"Running on local URL: http://localhost:7860") |
|
|
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860, share=args.share) |
|
|
|
|
|
|
|
|
|
if args.headless: |
|
|
|
custom_model_url = args.custom_model_url if args.custom_model_url else None |
|
|
|
if not args.ebook: |
|
print("Error: In headless mode, you must specify an ebook file using --ebook.") |
|
exit(1) |
|
|
|
ebook_file_path = args.ebook |
|
target_voice = args.voice if args.voice else None |
|
custom_model = None |
|
|
|
if args.use_custom_model: |
|
|
|
if args.custom_model_url: |
|
|
|
custom_model_url = args.custom_model_url |
|
else: |
|
|
|
if not args.custom_model or not args.custom_config or not args.custom_vocab: |
|
print("Error: You must provide either a --custom_model_url or all of the following arguments:") |
|
print("--custom_model, --custom_config, and --custom_vocab") |
|
exit(1) |
|
else: |
|
|
|
custom_model = { |
|
'model': args.custom_model, |
|
'config': args.custom_config, |
|
'vocab': args.custom_vocab |
|
} |
|
|
|
|
|
|
|
|
|
convert_ebook_to_audio(ebook_file_path, target_voice, args.language, args.use_custom_model, args.custom_model, args.custom_config, args.custom_vocab, custom_model_url) |
|
|
|
else: |
|
|
|
run_gradio_interface() |