e_book_reading / main.py
HoneyTian's picture
update
880d02b
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import asyncio
from functools import partial
import json
import logging
from pathlib import Path
import platform
from typing import List
import uuid
from project_settings import project_path, log_directory, temp_directory, edge_tts_temp_directory
import log
log.setup(log_directory=log_directory)
import aiofiles
import anyio
import edge_tts
import gradio as gr
import librosa
from scipy.io import wavfile
import spacy
from toolbox.os.command import Command
main_logger = logging.getLogger("main")
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--example_wav_dir",
default=(project_path / "data/examples").as_posix(),
type=str
)
args = parser.parse_args()
return args
async def edge_tts_get_speakers() -> List[str]:
edge_tts_speakers_choices = list()
voices = await edge_tts.list_voices()
for voice in voices:
short_name = voice["ShortName"]
locale = voice["Locale"]
if locale != "zh-CN":
continue
edge_tts_speakers_choices.append(short_name)
return edge_tts_speakers_choices
async def edge_tts_text_to_speech(text: str, speaker: str, audio_dir: Path = edge_tts_temp_directory):
# tts
main_logger.info(f"edge tts; speaker: {speaker}; text: {text}")
communicate = edge_tts.Communicate(text, speaker)
# save audio
audio_dir.mkdir(parents=True, exist_ok=True)
audio_file = audio_dir / "{}.wav".format(uuid.uuid4())
audio_file = audio_file.as_posix()
record_file = audio_dir / "edge_tts.jsonl"
try:
await communicate.save(audio_file)
except edge_tts.exceptions.NoAudioReceived:
audio_file = None
# save record
async with aiofiles.open(record_file.as_posix(), "a+", encoding="utf-8") as f:
row = json.dumps({
"text": text,
"speaker": speaker,
"filename": audio_file,
},
ensure_ascii=False)
await f.write("{}\n".format(row))
return audio_file
spacy_model = spacy.load("zh_core_web_sm")
async def e_book_reading(txt_file: str, speaker: str):
txt_file = Path(txt_file)
audio_dir = temp_directory / "e_book_reading" / txt_file.stem / speaker
while True:
async with aiofiles.open(txt_file.as_posix(), "r", encoding="utf-8") as f:
data = await f.read()
doc = spacy_model(data)
for sentence in doc.sents:
text = sentence.text.strip()
if len(text) == 0:
continue
filename = await edge_tts_text_to_speech(text=text, speaker=speaker, audio_dir=audio_dir)
# sample_rate, signal = wavfile.read(filename)
signal, sample_rate = librosa.load(filename)
duration = len(signal) / sample_rate
yield filename
await asyncio.sleep(duration)
def shell(cmd: str):
return Command.popen(cmd)
def main():
args = get_args()
title = "## 电子书阅读."
loop = asyncio.get_event_loop()
edge_tts_speakers_choices = loop.run_until_complete(edge_tts_get_speakers())
# blocks
with gr.Blocks() as blocks:
gr.Markdown(value=title)
with gr.Tabs():
with gr.TabItem("Edge TTS"):
edge_tts_text = gr.Textbox(value="学而时习之,不亦悦乎。", lines=4, max_lines=50, label="text")
edge_tts_speaker = gr.Dropdown(choices=edge_tts_speakers_choices, value="zh-CN-XiaoxiaoNeural", label="speakers")
edge_tts_audio = gr.Audio(type="filepath", label="audio", autoplay=True)
edge_tts_button = gr.Button(value="edge_tts", variant="primary")
edge_tts_button.click(
edge_tts_text_to_speech,
inputs=[
edge_tts_text,
edge_tts_speaker,
],
outputs=[
edge_tts_audio
],
)
with gr.TabItem("Ebook Reading"):
e_book_reading_file = gr.File(
value=(project_path / "data/e_book/confucianism/the_analects/the_analects.txt").as_posix(),
label="txt"
)
e_book_reading_speaker = gr.Dropdown(choices=edge_tts_speakers_choices, value="zh-CN-XiaoxiaoNeural", label="speakers")
e_book_reading_audio = gr.Audio(type="filepath", label="audio", streaming=True)
e_book_reading_button = gr.Button(value="e_book_reading", variant="primary")
e_book_reading_button.click(
e_book_reading,
inputs=[
e_book_reading_file,
e_book_reading_speaker,
],
outputs=[
e_book_reading_audio
],
)
with gr.TabItem("shell"):
shell_text = gr.Textbox(label="cmd")
shell_button = gr.Button("run")
shell_output = gr.Textbox(label="output")
shell_button.click(
shell,
inputs=[
shell_text,
],
outputs=[
shell_output
],
)
launch = partial(
blocks.queue().launch,
share=False if platform.system() == "Windows" else False,
server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
server_port=7860,
)
anyio.run(
launch,
backend="asyncio"
)
return
if __name__ == "__main__":
main()