print("starting...") |
import os |
import shutil |
import subprocess |
import re |
from pydub import AudioSegment |
import tempfile |
from pydub import AudioSegment |
import os |
import nltk |
from nltk.tokenize import sent_tokenize |
import sys |
import torch |
from TTS.api import TTS |
from TTS.tts.configs.xtts_config import XttsConfig |
from TTS.tts.models.xtts import Xtts |
from tqdm import tqdm |
import gradio as gr |
from gradio import Progress |
import urllib.request |
import zipfile |
nltk.download('punkt_tab') |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
print(f"Device selected is: {device}") |
def download_and_extract_zip(url, extract_to='.'): |
try: |
os.makedirs(extract_to, exist_ok=True) |
zip_path = os.path.join(extract_to, 'model.zip') |
with tqdm(unit='B', unit_scale=True, miniters=1, desc="Downloading Model") as t: |
def reporthook(blocknum, blocksize, totalsize): |
t.total = totalsize |
t.update(blocknum * blocksize - t.n) |
urllib.request.urlretrieve(url, zip_path, reporthook=reporthook) |
print(f"Downloaded zip file to {zip_path}") |
with zipfile.ZipFile(zip_path, 'r') as zip_ref: |
files = zip_ref.namelist() |
with tqdm(total=len(files), unit="file", desc="Extracting Files") as t: |
for file in files: |
if not file.endswith('/'): |
extracted_path = zip_ref.extract(file, extract_to) |
base_file_path = os.path.join(extract_to, os.path.basename(file)) |
os.rename(extracted_path, base_file_path) |
t.update(1) |
os.remove(zip_path) |
for root, dirs, files in os.walk(extract_to, topdown=False): |
for name in dirs: |
os.rmdir(os.path.join(root, name)) |
print(f"Extracted files to {extract_to}") |
required_files = ['model.pth', 'config.json', 'vocab.json_'] |
missing_files = [file for file in required_files if not os.path.exists(os.path.join(extract_to, file))] |
if not missing_files: |
print("All required files (model.pth, config.json, vocab.json_) found.") |
else: |
print(f"Missing files: {', '.join(missing_files)}") |
except Exception as e: |
print(f"Failed to download or extract zip file: {e}") |
def is_folder_empty(folder_path): |
if os.path.exists(folder_path) and os.path.isdir(folder_path): |
if not os.listdir(folder_path): |
return True |
else: |
return False |
else: |
print(f"The path {folder_path} is not a valid folder.") |
return None |
def remove_folder_with_contents(folder_path): |
try: |
shutil.rmtree(folder_path) |
print(f"Successfully removed {folder_path} and all of its contents.") |
except Exception as e: |
print(f"Error removing {folder_path}: {e}") |
def wipe_folder(folder_path): |
if not os.path.exists(folder_path): |
print(f"The folder {folder_path} does not exist.") |
return |
for item in os.listdir(folder_path): |
item_path = os.path.join(folder_path, item) |
if os.path.isfile(item_path): |
os.remove(item_path) |
print(f"Removed file: {item_path}") |
elif os.path.isdir(item_path): |
shutil.rmtree(item_path) |
print(f"Removed directory and its contents: {item_path}") |
print(f"All contents wiped from {folder_path}.") |
def create_m4b_from_chapters(input_dir, ebook_file, output_dir): |
def sort_key(chapter_file): |
numbers = re.findall(r'\d+', chapter_file) |
return int(numbers[0]) if numbers else 0 |
def extract_metadata_and_cover(ebook_path): |
try: |
cover_path = ebook_path.rsplit('.', 1)[0] + '.jpg' |
subprocess.run(['ebook-meta', ebook_path, '--get-cover', cover_path], check=True) |
if os.path.exists(cover_path): |
return cover_path |
except Exception as e: |
print(f"Error extracting eBook metadata or cover: {e}") |
return None |
def combine_wav_files(chapter_files, output_path): |
combined_audio = AudioSegment.empty() |
for chapter_file in chapter_files: |
audio_segment = AudioSegment.from_wav(chapter_file) |
combined_audio += audio_segment |
combined_audio.export(output_path, format='wav') |
print(f"Combined audio saved to {output_path}") |
def generate_ffmpeg_metadata(chapter_files, metadata_file): |
with open(metadata_file, 'w') as file: |
file.write(';FFMETADATA1\n') |
start_time = 0 |
for index, chapter_file in enumerate(chapter_files): |
duration_ms = len(AudioSegment.from_wav(chapter_file)) |
file.write(f'[CHAPTER]\nTIMEBASE=1/1000\nSTART={start_time}\n') |
file.write(f'END={start_time + duration_ms}\ntitle=Chapter {index + 1}\n') |
start_time += duration_ms |
def create_m4b(combined_wav, metadata_file, cover_image, output_m4b): |
os.makedirs(os.path.dirname(output_m4b), exist_ok=True) |
ffmpeg_cmd = ['ffmpeg', '-i', combined_wav, '-i', metadata_file] |
if cover_image: |
ffmpeg_cmd += ['-i', cover_image, '-map', '0:a', '-map', '2:v'] |
else: |
ffmpeg_cmd += ['-map', '0:a'] |
ffmpeg_cmd += ['-map_metadata', '1', '-c:a', 'aac', '-b:a', '192k'] |
if cover_image: |
ffmpeg_cmd += ['-c:v', 'png', '-disposition:v', 'attached_pic'] |
ffmpeg_cmd += [output_m4b] |
subprocess.run(ffmpeg_cmd, check=True) |
chapter_files = sorted([os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith('.wav')], key=sort_key) |
temp_dir = tempfile.gettempdir() |
temp_combined_wav = os.path.join(temp_dir, 'combined.wav') |
metadata_file = os.path.join(temp_dir, 'metadata.txt') |
cover_image = extract_metadata_and_cover(ebook_file) |
output_m4b = os.path.join(output_dir, os.path.splitext(os.path.basename(ebook_file))[0] + '.m4b') |
combine_wav_files(chapter_files, temp_combined_wav) |
generate_ffmpeg_metadata(chapter_files, metadata_file) |
create_m4b(temp_combined_wav, metadata_file, cover_image, output_m4b) |
if os.path.exists(temp_combined_wav): |
os.remove(temp_combined_wav) |
if os.path.exists(metadata_file): |
os.remove(metadata_file) |
if cover_image and os.path.exists(cover_image): |
os.remove(cover_image) |
import os |
import subprocess |
import ebooklib |
from ebooklib import epub |
from bs4 import BeautifulSoup |
import re |
import csv |
import nltk |
def create_chapter_labeled_book(ebook_file_path): |
def ensure_directory(directory_path): |
if not os.path.exists(directory_path): |
os.makedirs(directory_path) |
print(f"Created directory: {directory_path}") |
ensure_directory(os.path.join(".", 'Working_files', 'Book')) |
def convert_to_epub(input_path, output_path): |
try: |
subprocess.run(['ebook-convert', input_path, output_path], check=True) |
except subprocess.CalledProcessError as e: |
print(f"An error occurred while converting the eBook: {e}") |
return False |
return True |
def save_chapters_as_text(epub_path): |
directory = os.path.join(".", "Working_files", "temp_ebook") |
ensure_directory(directory) |
book = epub.read_epub(epub_path) |
previous_chapter_text = '' |
previous_filename = '' |
chapter_counter = 0 |
for item in book.get_items(): |
if item.get_type() == ebooklib.ITEM_DOCUMENT: |
soup = BeautifulSoup(item.get_content(), 'html.parser') |
text = soup.get_text() |
if text.strip(): |
if len(text) < 2300 and previous_filename: |
with open(previous_filename, 'a', encoding='utf-8') as file: |
file.write('\n' + text) |
else: |
previous_filename = os.path.join(directory, f"chapter_{chapter_counter}.txt") |
chapter_counter += 1 |
with open(previous_filename, 'w', encoding='utf-8') as file: |
file.write(text) |
print(f"Saved chapter: {previous_filename}") |
input_ebook = ebook_file_path |
output_epub = os.path.join(".", "Working_files", "temp.epub") |
if os.path.exists(output_epub): |
os.remove(output_epub) |
print(f"File {output_epub} has been removed.") |
else: |
print(f"The file {output_epub} does not exist.") |
if convert_to_epub(input_ebook, output_epub): |
save_chapters_as_text(output_epub) |
def process_chapter_files(folder_path, output_csv): |
with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile: |
writer = csv.writer(csvfile) |
writer.writerow(['Text', 'Start Location', 'End Location', 'Is Quote', 'Speaker', 'Chapter']) |
chapter_files = sorted(os.listdir(folder_path), key=lambda x: int(x.split('_')[1].split('.')[0])) |
for filename in chapter_files: |
if filename.startswith('chapter_') and filename.endswith('.txt'): |
chapter_number = int(filename.split('_')[1].split('.')[0]) |
file_path = os.path.join(folder_path, filename) |
try: |
with open(file_path, 'r', encoding='utf-8') as file: |
text = file.read() |
if text: |
text = "NEWCHAPTERABC" + text |
sentences = nltk.tokenize.sent_tokenize(text) |
for sentence in sentences: |
start_location = text.find(sentence) |
end_location = start_location + len(sentence) |
writer.writerow([sentence, start_location, end_location, 'True', 'Narrator', chapter_number]) |
except Exception as e: |
print(f"Error processing file {filename}: {e}") |
folder_path = os.path.join(".", "Working_files", "temp_ebook") |
output_csv = os.path.join(".", "Working_files", "Book", "Other_book.csv") |
process_chapter_files(folder_path, output_csv) |
def sort_key(filename): |
"""Extract chapter number for sorting.""" |
match = re.search(r'chapter_(\d+)\.txt', filename) |
return int(match.group(1)) if match else 0 |
def combine_chapters(input_folder, output_file): |
os.makedirs(os.path.dirname(output_file), exist_ok=True) |
files = [f for f in os.listdir(input_folder) if f.endswith('.txt')] |
sorted_files = sorted(files, key=sort_key) |
with open(output_file, 'w', encoding='utf-8') as outfile: |
for i, filename in enumerate(sorted_files): |
with open(os.path.join(input_folder, filename), 'r', encoding='utf-8') as infile: |
outfile.write(infile.read()) |
if i < len(sorted_files) - 1: |
outfile.write("\nNEWCHAPTERABC\n") |
input_folder = os.path.join(".", 'Working_files', 'temp_ebook') |
output_file = os.path.join(".", 'Working_files', 'Book', 'Chapter_Book.txt') |
combine_chapters(input_folder, output_file) |
ensure_directory(os.path.join(".", "Working_files", "Book")) |
import os |
import subprocess |
import sys |
import torchaudio |
def calibre_installed(): |
try: |
subprocess.run(['ebook-convert', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
return True |
except FileNotFoundError: |
print("Calibre is not installed. Please install Calibre for this functionality.") |
return False |
import os |
import torch |
from TTS.api import TTS |
from nltk.tokenize import sent_tokenize |
from pydub import AudioSegment |
default_target_voice_path = "default_voice.wav" |
default_language_code = "en" |
def combine_wav_files(input_directory, output_directory, file_name): |
os.makedirs(output_directory, exist_ok=True) |
output_file_path = os.path.join(output_directory, file_name) |
combined_audio = AudioSegment.empty() |
input_file_paths = sorted( |
[os.path.join(input_directory, f) for f in os.listdir(input_directory) if f.endswith(".wav")], |
key=lambda f: int(''.join(filter(str.isdigit, f))) |
) |
for input_file_path in input_file_paths: |
audio_segment = AudioSegment.from_wav(input_file_path) |
combined_audio += audio_segment |
combined_audio.export(output_file_path, format='wav') |
print(f"Combined audio saved to {output_file_path}") |
def split_long_sentence(sentence, max_length=249, max_pauses=10): |
""" |
Splits a sentence into parts based on length or number of pauses without recursion. |
:param sentence: The sentence to split. |
:param max_length: Maximum allowed length of a sentence. |
:param max_pauses: Maximum allowed number of pauses in a sentence. |
:return: A list of sentence parts that meet the criteria. |
""" |
parts = [] |
while len(sentence) > max_length or sentence.count(',') + sentence.count(';') + sentence.count('.') > max_pauses: |
possible_splits = [i for i, char in enumerate(sentence) if char in ',;.' and i < max_length] |
if possible_splits: |
split_at = possible_splits[-1] + 1 |
else: |
split_at = max_length |
parts.append(sentence[:split_at].strip()) |
sentence = sentence[split_at:].strip() |
parts.append(sentence) |
return parts |
""" |
if 'tts' not in locals(): |
tts = TTS(selected_tts_model, progress_bar=True).to(device) |
""" |
from tqdm import tqdm |
def convert_chapters_to_audio_custom_model(chapters_dir, output_audio_dir, target_voice_path=None, language=None, custom_model=None): |
if target_voice_path==None: |
target_voice_path = default_target_voice_path |
if custom_model: |
print("Loading custom model...") |
config = XttsConfig() |
config.load_json(custom_model['config']) |
model = Xtts.init_from_config(config) |
model.load_checkpoint(config, checkpoint_path=custom_model['model'], vocab_path=custom_model['vocab'], use_deepspeed=False) |
model.device |
print("Computing speaker latents...") |
gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[target_voice_path]) |
else: |
selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2" |
tts = TTS(selected_tts_model, progress_bar=False).to(device) |
if not os.path.exists(output_audio_dir): |
os.makedirs(output_audio_dir) |
for chapter_file in sorted(os.listdir(chapters_dir)): |
if chapter_file.endswith('.txt'): |
match = re.search(r"chapter_(\d+).txt", chapter_file) |
if match: |
chapter_num = int(match.group(1)) |
else: |
print(f"Skipping file {chapter_file} as it does not match the expected format.") |
continue |
chapter_path = os.path.join(chapters_dir, chapter_file) |
output_file_name = f"audio_chapter_{chapter_num}.wav" |
output_file_path = os.path.join(output_audio_dir, output_file_name) |
temp_audio_directory = os.path.join(".", "Working_files", "temp") |
os.makedirs(temp_audio_directory, exist_ok=True) |
temp_count = 0 |
with open(chapter_path, 'r', encoding='utf-8') as file: |
chapter_text = file.read() |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english') |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"): |
fragments = split_long_sentence(sentence, max_length=249 if language == "en" else 213, max_pauses=10) |
for fragment in fragments: |
if fragment != "": |
print(f"Generating fragment: {fragment}...") |
fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav") |
if custom_model: |
out = model.inference(fragment, language, gpt_cond_latent, speaker_embedding, temperature=0.7) |
torchaudio.save(fragment_file_path, torch.tensor(out["wav"]).unsqueeze(0), 24000) |
else: |
speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path |
language_code = language if language else default_language_code |
tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language_code) |
temp_count += 1 |
combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name) |
wipe_folder(temp_audio_directory) |
print(f"Converted chapter {chapter_num} to audio.") |
def convert_chapters_to_audio_standard_model(chapters_dir, output_audio_dir, target_voice_path=None, language=None): |
selected_tts_model = "tts_models/multilingual/multi-dataset/xtts_v2" |
tts = TTS(selected_tts_model, progress_bar=False).to(device) |
if not os.path.exists(output_audio_dir): |
os.makedirs(output_audio_dir) |
for chapter_file in sorted(os.listdir(chapters_dir)): |
if chapter_file.endswith('.txt'): |
match = re.search(r"chapter_(\d+).txt", chapter_file) |
if match: |
chapter_num = int(match.group(1)) |
else: |
print(f"Skipping file {chapter_file} as it does not match the expected format.") |
continue |
chapter_path = os.path.join(chapters_dir, chapter_file) |
output_file_name = f"audio_chapter_{chapter_num}.wav" |
output_file_path = os.path.join(output_audio_dir, output_file_name) |
temp_audio_directory = os.path.join(".", "Working_files", "temp") |
os.makedirs(temp_audio_directory, exist_ok=True) |
temp_count = 0 |
with open(chapter_path, 'r', encoding='utf-8') as file: |
chapter_text = file.read() |
sentences = sent_tokenize(chapter_text, language='italian' if language == 'it' else 'english') |
for sentence in tqdm(sentences, desc=f"Chapter {chapter_num}"): |
fragments = split_long_sentence(sentence, max_length=249 if language == "en" else 213, max_pauses=10) |
for fragment in fragments: |
if fragment != "": |
print(f"Generating fragment: {fragment}...") |
fragment_file_path = os.path.join(temp_audio_directory, f"{temp_count}.wav") |
speaker_wav_path = target_voice_path if target_voice_path else default_target_voice_path |
language_code = language if language else default_language_code |
tts.tts_to_file(text=fragment, file_path=fragment_file_path, speaker_wav=speaker_wav_path, language=language_code) |
temp_count += 1 |
combine_wav_files(temp_audio_directory, output_audio_dir, output_file_name) |
wipe_folder(temp_audio_directory) |
print(f"Converted chapter {chapter_num} to audio.") |
def convert_ebook_to_audio(ebook_file, target_voice_file, language, use_custom_model, custom_model_file, custom_config_file, custom_vocab_file, custom_model_url=None, progress=gr.Progress()): |
ebook_file_path = ebook_file.name |
target_voice = target_voice_file.name if target_voice_file else None |
custom_model = None |
working_files = os.path.join(".", "Working_files", "temp_ebook") |
full_folder_working_files = os.path.join(".", "Working_files") |
chapters_directory = os.path.join(".", "Working_files", "temp_ebook") |
output_audio_directory = os.path.join(".", 'Chapter_wav_files') |
remove_folder_with_contents(full_folder_working_files) |
remove_folder_with_contents(output_audio_directory) |
if use_custom_model and custom_model_file and custom_config_file and custom_vocab_file: |
custom_model = { |
'model': custom_model_file.name, |
'config': custom_config_file.name, |
'vocab': custom_vocab_file.name |
} |
if use_custom_model and custom_model_url: |
print(f"Received custom model URL: {custom_model_url}") |
download_dir = os.path.join(".", "Working_files", "custom_model") |
download_and_extract_zip(custom_model_url, download_dir) |
custom_model = { |
'model': os.path.join(download_dir, 'model.pth'), |
'config': os.path.join(download_dir, 'config.json'), |
'vocab': os.path.join(download_dir, 'vocab.json_') |
} |
try: |
progress(0, desc="Starting conversion") |
except Exception as e: |
print(f"Error updating progress: {e}") |
if not calibre_installed(): |
return "Calibre is not installed." |
try: |
progress(0.1, desc="Creating chapter-labeled book") |
except Exception as e: |
print(f"Error updating progress: {e}") |
create_chapter_labeled_book(ebook_file_path) |
audiobook_output_path = os.path.join(".", "Audiobooks") |
try: |
progress(0.3, desc="Converting chapters to audio") |
except Exception as e: |
print(f"Error updating progress: {e}") |
if use_custom_model: |
convert_chapters_to_audio_custom_model(chapters_directory, output_audio_directory, target_voice, language, custom_model) |
else: |
convert_chapters_to_audio_standard_model(chapters_directory, output_audio_directory, target_voice, language) |
try: |
progress(0.9, desc="Creating M4B from chapters") |
except Exception as e: |
print(f"Error updating progress: {e}") |
create_m4b_from_chapters(output_audio_directory, ebook_file_path, audiobook_output_path) |
m4b_filename = os.path.splitext(os.path.basename(ebook_file_path))[0] + '.m4b' |
m4b_filepath = os.path.join(audiobook_output_path, m4b_filename) |
try: |
progress(1.0, desc="Conversion complete") |
except Exception as e: |
print(f"Error updating progress: {e}") |
print(f"Audiobook created at {m4b_filepath}") |
return f"Audiobook created at {m4b_filepath}", m4b_filepath |
def list_audiobook_files(audiobook_folder): |
files = [] |
for filename in os.listdir(audiobook_folder): |
if filename.endswith('.m4b'): |
files.append(os.path.join(audiobook_folder, filename)) |
return files |
def download_audiobooks(): |
audiobook_output_path = os.path.join(".", "Audiobooks") |
return list_audiobook_files(audiobook_output_path) |
language_options = [ |
"en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko" |
] |
theme = gr.themes.Soft( |
primary_hue="blue", |
secondary_hue="blue", |
neutral_hue="blue", |
text_size=gr.themes.sizes.text_md, |
) |
with gr.Blocks(theme=theme) as demo: |
gr.Markdown( |
""" |
# eBook to Audiobook Converter |
Transform your eBooks into immersive audiobooks with optional custom TTS models. |
""" |
) |
with gr.Row(): |
with gr.Column(scale=3): |
ebook_file = gr.File(label="eBook File") |
target_voice_file = gr.File(label="Target Voice File (Optional)") |
language = gr.Dropdown(label="Language", choices=language_options, value="en") |
with gr.Column(scale=3): |
use_custom_model = gr.Checkbox(label="Use Custom Model") |
custom_model_file = gr.File(label="Custom Model File (Optional)", visible=False) |
custom_config_file = gr.File(label="Custom Config File (Optional)", visible=False) |
custom_vocab_file = gr.File(label="Custom Vocab File (Optional)", visible=False) |
custom_model_url = gr.Textbox(label="Custom Model Zip URL (Optional)", visible=False) |
convert_btn = gr.Button("Convert to Audiobook", variant="primary") |
output = gr.Textbox(label="Conversion Status") |
audio_player = gr.Audio(label="Audiobook Player", type="filepath") |
download_btn = gr.Button("Download Audiobook Files") |
download_files = gr.File(label="Download Files", interactive=False) |
convert_btn.click( |
convert_ebook_to_audio, |
inputs=[ebook_file, target_voice_file, language, use_custom_model, custom_model_file, custom_config_file, custom_vocab_file, custom_model_url], |
outputs=[output, audio_player] |
) |
use_custom_model.change( |
lambda x: [gr.update(visible=x)] * 4, |
inputs=[use_custom_model], |
outputs=[custom_model_file, custom_config_file, custom_vocab_file, custom_model_url] |
) |
download_btn.click( |
download_audiobooks, |
outputs=[download_files] |
) |
demo.launch(share=True) |