Spaces:

intelli-zen
/

e_book_reading

Sleeping

File size: 5,812 Bytes

#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import asyncio
from functools import partial
import json
import logging
from pathlib import Path
import platform
from typing import List
import uuid

from project_settings import project_path, log_directory, temp_directory, edge_tts_temp_directory
import log

log.setup(log_directory=log_directory)

import aiofiles
import anyio
import edge_tts
import gradio as gr
import librosa
from scipy.io import wavfile
import spacy

from toolbox.os.command import Command

main_logger = logging.getLogger("main")


def get_args():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--example_wav_dir",
        default=(project_path / "data/examples").as_posix(),
        type=str
    )
    args = parser.parse_args()
    return args


async def edge_tts_get_speakers() -> List[str]:
    edge_tts_speakers_choices = list()
    voices = await edge_tts.list_voices()
    for voice in voices:
        short_name = voice["ShortName"]
        locale = voice["Locale"]
        if locale != "zh-CN":
            continue

        edge_tts_speakers_choices.append(short_name)
    return edge_tts_speakers_choices


async def edge_tts_text_to_speech(text: str, speaker: str, audio_dir: Path = edge_tts_temp_directory):
    # tts
    main_logger.info(f"edge tts; speaker: {speaker}; text: {text}")
    communicate = edge_tts.Communicate(text, speaker)

    # save audio
    audio_dir.mkdir(parents=True, exist_ok=True)
    audio_file = audio_dir / "{}.wav".format(uuid.uuid4())
    audio_file = audio_file.as_posix()
    record_file = audio_dir / "edge_tts.jsonl"
    try:
        await communicate.save(audio_file)
    except edge_tts.exceptions.NoAudioReceived:
        audio_file = None

    # save record
    async with aiofiles.open(record_file.as_posix(), "a+", encoding="utf-8") as f:
        row = json.dumps({
                "text": text,
                "speaker": speaker,
                "filename": audio_file,
            },
            ensure_ascii=False)
        await f.write("{}\n".format(row))
    return audio_file


spacy_model = spacy.load("zh_core_web_sm")


async def e_book_reading(txt_file: str, speaker: str):
    txt_file = Path(txt_file)

    audio_dir = temp_directory / "e_book_reading" / txt_file.stem / speaker

    while True:
        async with aiofiles.open(txt_file.as_posix(), "r", encoding="utf-8") as f:
            data = await f.read()
        doc = spacy_model(data)
        for sentence in doc.sents:
            text = sentence.text.strip()
            if len(text) == 0:
                continue
            filename = await edge_tts_text_to_speech(text=text, speaker=speaker, audio_dir=audio_dir)

            # sample_rate, signal = wavfile.read(filename)
            signal, sample_rate = librosa.load(filename)
            duration = len(signal) / sample_rate

            yield filename
            await asyncio.sleep(duration)


def shell(cmd: str):
    return Command.popen(cmd)


def main():
    args = get_args()

    title = "## 电子书阅读."

    loop = asyncio.get_event_loop()
    edge_tts_speakers_choices = loop.run_until_complete(edge_tts_get_speakers())

    # blocks
    with gr.Blocks() as blocks:
        gr.Markdown(value=title)

        with gr.Tabs():
            with gr.TabItem("Edge TTS"):
                edge_tts_text = gr.Textbox(value="学而时习之，不亦悦乎。", lines=4, max_lines=50, label="text")
                edge_tts_speaker = gr.Dropdown(choices=edge_tts_speakers_choices, value="zh-CN-XiaoxiaoNeural", label="speakers")

                edge_tts_audio = gr.Audio(type="filepath", label="audio", autoplay=True)

                edge_tts_button = gr.Button(value="edge_tts", variant="primary")
                edge_tts_button.click(
                    edge_tts_text_to_speech,
                    inputs=[
                        edge_tts_text,
                        edge_tts_speaker,
                    ],
                    outputs=[
                        edge_tts_audio
                    ],
                )

            with gr.TabItem("Ebook Reading"):
                e_book_reading_file = gr.File(
                    value=(project_path / "data/e_book/confucianism/the_analects/the_analects.txt").as_posix(),
                    label="txt"
                )
                e_book_reading_speaker = gr.Dropdown(choices=edge_tts_speakers_choices, value="zh-CN-XiaoxiaoNeural", label="speakers")

                e_book_reading_audio = gr.Audio(type="filepath", label="audio", streaming=True)
                e_book_reading_button = gr.Button(value="e_book_reading", variant="primary")

                e_book_reading_button.click(
                    e_book_reading,
                    inputs=[
                        e_book_reading_file,
                        e_book_reading_speaker,
                    ],
                    outputs=[
                        e_book_reading_audio
                    ],
                )

            with gr.TabItem("shell"):
                shell_text = gr.Textbox(label="cmd")
                shell_button = gr.Button("run")
                shell_output = gr.Textbox(label="output")

                shell_button.click(
                    shell,
                    inputs=[
                        shell_text,
                    ],
                    outputs=[
                        shell_output
                    ],
                )

    launch = partial(
        blocks.queue().launch,
        share=False if platform.system() == "Windows" else False,
        server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
        server_port=7860,
    )
    anyio.run(
        launch,
        backend="asyncio"
    )
    return


if __name__ == "__main__":
    main()