File size: 4,824 Bytes
60b21a4
dc049ca
60b21a4
2ff9b99
758024b
dc049ca
60b21a4
 
 
758024b
 
 
66f6922
 
 
 
 
bc7b359
 
60b21a4
 
758024b
 
dc049ca
f499833
 
 
 
 
dc049ca
 
758024b
dc049ca
758024b
dc049ca
758024b
 
dc049ca
758024b
 
 
 
 
f499833
758024b
 
dc049ca
758024b
 
dc049ca
 
758024b
 
 
 
 
 
 
 
 
 
 
 
 
dc049ca
758024b
dc049ca
758024b
dc049ca
758024b
dc049ca
758024b
 
dc049ca
 
758024b
dc049ca
60b21a4
758024b
 
dc049ca
f499833
 
758024b
 
 
f499833
758024b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc7b359
758024b
 
 
 
 
 
 
 
dc049ca
758024b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import tempfile
import os
import numpy as np

# テンプレート設定
TEMPLATES = {
    "パラオ高め(ポーランドボール風)": {"rate": 180, "volume": 1.0},
    "低めのナレーター": {"rate": 120, "volume": 0.8},
    "普通の話し方": {"rate": 150, "volume": 1.0},
    "元気な女の子": {"rate": 180, "volume": 1.2},
    "落ち着いた男性": {"rate": 130, "volume": 0.9},
    "ロボット風(機械的)": {"rate": 140, "volume": 1.0},
    "さっぱりした女性": {"rate": 160, "volume": 1.1},
    "しっとりした声": {"rate": 140, "volume": 0.9},
    "おじさん風": {"rate": 60, "volume": 0.75},
    "怒った声": {"rate": 45, "volume": 0.9},
}

EFFECTS = ["なし", "ふわふわ化", "かちかち化", "減衰", "リバーブ", "音揺れ"]

def generate_tts(text, template_name, pitch_factor=1.0, speed_factor=1.0, effect_type="なし", effect_strength=1.0):
    # テンプレートの設定を反映
    template = TEMPLATES.get(template_name, {"rate": 150, "volume": 1.0})
    rate = template["rate"] * speed_factor  # 速度調整
    volume = template["volume"]  # ボリューム調整
    
    # 音声合成(Gtts使用)
    tts = gTTS(text=text, lang='ja')
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
        tts_path = f.name
    tts.save(tts_path)

    # 音声読み込み
    sound = AudioSegment.from_mp3(tts_path)
    
    # ピッチ変更
    sound = change_pitch(sound, pitch_factor)
    
    # 速度変更
    sound = change_speed(sound, rate / 100)  # 速度が「%」であることを考慮
    
    # エフェクト適用
    sound = apply_effect(sound, effect_type, effect_strength)
    
    # 一時ファイルに保存
    output_path = tts_path.replace(".mp3", "_modified.mp3")
    sound.export(output_path, format="mp3")
    
    return output_path

def change_pitch(sound, factor):
    new_frame_rate = int(sound.frame_rate * factor)
    pitched_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return pitched_sound.set_frame_rate(44100)

def change_speed(sound, speed=1.0):
    new_frame_rate = int(sound.frame_rate * speed)
    sped_up_sound = sound._spawn(sound.raw_data, overrides={"frame_rate": new_frame_rate})
    return sped_up_sound.set_frame_rate(44100)

def apply_effect(sound, effect_type, effect_strength):
    if effect_type == "ふわふわ化":
        return sound.low_pass_filter(1000 * effect_strength)
    elif effect_type == "かちかち化":
        return sound.high_pass_filter(3000 * effect_strength)
    elif effect_type == "減衰":
        return sound.fade_out(int(len(sound) * effect_strength))
    elif effect_type == "リバーブ":
        reversed_sound = sound.reverse()
        faded = reversed_sound.fade_in(200 * effect_strength).fade_out(200 * effect_strength)
        return (sound + faded.reverse()) - (10 * effect_strength)
    elif effect_type == "音揺れ":
        return wobble(sound, effect_strength)
    else:
        return sound

def wobble(sound, strength):
    # 0.1秒ごとにランダムにピッチを揺らす(揺れを強くする)
    chunk_ms = 100
    chunks = [sound[i:i+chunk_ms] for i in range(0, len(sound), chunk_ms)]
    wobbled = AudioSegment.empty()
    for chunk in chunks:
        pitch_shift = np.random.uniform(1 - 0.05 * strength, 1 + 0.05 * strength)  # 強めの揺れ
        chunk = change_pitch(chunk, pitch_shift)
        wobbled += chunk
    return wobbled

with gr.Blocks() as app:
    gr.Markdown("# オリジナル声読み上げ機")
    
    with gr.Row():
        text_input = gr.Textbox(label="読み上げるテキスト", lines=2, placeholder="ここに入力...")
    
    with gr.Row():
        template_dropdown = gr.Dropdown(choices=list(TEMPLATES.keys()), value="パラオ高め(ポーランドボール風)", label="テンプレートを選ぶ")
    
    with gr.Row():
        pitch_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="ピッチ倍率(高く・低く)")
        speed_slider = gr.Slider(0.1, 5.0, value=1.0, step=0.05, label="速度倍率(速く・遅く)")
    
    with gr.Row():
        effect_dropdown = gr.Dropdown(choices=EFFECTS, value="なし", label="エフェクトを選ぶ")
        effect_strength_slider = gr.Slider(0.1, 10.0, value=1.0, step=0.05, label="エフェクト強さ")
    
    with gr.Row():
        submit_btn = gr.Button("生成する")
    
    audio_output = gr.Audio(label="出力音声", type="filepath")
    
    submit_btn.click(
        fn=generate_tts,
        inputs=[text_input, template_dropdown, pitch_slider, speed_slider, effect_dropdown, effect_strength_slider],
        outputs=audio_output
    )

app.launch()