File size: 3,555 Bytes
6e78f43
 
 
 
 
 
c4effd2
6e78f43
 
 
 
 
 
 
 
 
2d0e2b6
6e78f43
 
c4effd2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e78f43
 
 
 
 
c4effd2
 
 
2d0e2b6
 
 
6e78f43
2d0e2b6
 
 
 
 
 
 
6e78f43
2d0e2b6
 
6e78f43
c4effd2
 
 
 
 
 
 
 
 
 
 
 
6e78f43
c4effd2
6e78f43
2d0e2b6
6e78f43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4effd2
2d0e2b6
6e78f43
2d0e2b6
6e78f43
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133

# -*- coding: utf-8 -*-
import numpy as np
import soundfile
import audresample
import text_utils

import re
import srt
import subprocess
import markdown
import json
from pathlib import Path
from types import SimpleNamespace
from flask import Flask, request, send_from_directory
from flask_cors import CORS
from audiocraft.audiogen import AudioGen #, audio_write

sound_generator = AudioGen.get_pretrained('facebook/audiogen-medium')
sound_generator.set_generation_params(duration=4)

    
# ====STYLE VECTOR====



# AFFECTIVE = True
# VOICE = 'en_UK/apope_low'  #  	en_US/m-ailabs_low#mary_ann

# _dir = '/' if AFFECTIVE else '_v2/'
# precomputed_style_vector = msinference.compute_style(
#     'assets/wavs/style_vector' + _dir + VOICE.replace(
#         '/', '_').replace(
#         '#', '_').replace(
#         'cmu-arctic', 'cmu_arctic').replace(
#         '_low', '') + '.wav')
# print('\n  STYLE VECTOR \n', precomputed_style_vector.shape)


# ==== STYLE VECTOR 

CACHE_DIR = 'flask_cache/'
Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)




def tts_multi_sentence(scene=None):
    if scene is not None and len(scene) >= 4:
        print(f'Processing: {scene} ..')
        x = sound_generator.generate([scene])[0].detach().cpu().numpy()[0, :]
        
        x /= np.abs(x).max() + 1e-7
        # sound_background = audio_write(None, 
        #                                sound_background.cpu(), 
        #                                16000, #24000,  # Same as styleTTs sample_rate, 
        #                                strategy="loudness", 
        #                                loudness_compressor=True)
        print(f'Craft Finished for: {scene}\n\n\n\n____{x.shape}')
    else:
        print(scene, '\nDrop\n')
        x = np.zeros(400)
        
    # # StyleTTS2
    # if ('en_US/' in voice) or ('en_UK/' in voice) or (voice is None):
    #     assert precomputed_style_vector is not None, 'For affective TTS, style vector is needed.'
    #     x = []
    #     for _sentence in text:
    #         x.append(msinference.inference(_sentence,
    #                     precomputed_style_vector,
    #                                 alpha=0.3,
    #                                 beta=0.7,
    #                                 diffusion_steps=7,
    #                                 embedding_scale=1))
    #     x = np.concatenate(x)
        
    #     return overlay(x, sound_background)
    
    return x
    
    




app = Flask(__name__)
cors = CORS(app)


@app.route("/")
def index():
    with open('README.md', 'r') as f:
        return markdown.markdown(f.read())


@app.route("/", methods=['GET', 'POST', 'PUT'])
def serve_wav():
    # https://stackoverflow.com/questions/13522137/in-flask-convert-form-post-
    #                      object-into-a-representation-suitable-for-mongodb
    r = request.form.to_dict(flat=False)
    

    args = SimpleNamespace(
        text=None if r.get('text') is None else r.get('text'),  # string not file?
        scene=r.get('scene')[0]
        )
    # print('\n==RECOMPOSED as \n',request.data,request.form,'\n==')
    




    
    
    x = tts_multi_sentence(args.scene)
    
    OUT_FILE = 'tmp.wav'
    soundfile.write(CACHE_DIR + OUT_FILE, x, 16000)


    
    
    
    # send server's output as default file -> srv_result.xx
    print(f'\n=SERVER saved as {OUT_FILE=}\n')
    response = send_from_directory(CACHE_DIR, path=OUT_FILE)
    response.headers['suffix-file-type'] = OUT_FILE
    return response


if __name__ == "__main__":
    app.run(host="0.0.0.0")