File size: 2,108 Bytes
d587895
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0bac575
 
 
d587895
 
9b1ff5b
 
d587895
e0671b1
d587895
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e0671b1
d587895
 
 
 
 
 
 
e0671b1
d587895
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# import gradio as gr

# def greet(name):
#     return "Hello " + name + "!!"

# iface = gr.Interface(fn=greet, inputs="text", outputs="text")
# iface.launch(share=True)

# -*- coding: utf-8 -*-
"""gradio.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1_GWRGRVb3znpFoAUO_7jj36vZG2FseJi
"""
# import os
# os.system('pip install wit')
# os.system('pip install scipy')

# !pip install gradio
# !pip install wit
# !pip install scipy

import gradio as gr
from wit import Wit
import numpy as np
import io
from scipy.io.wavfile import write

def is_enrolled_voice(name, wave_file):
    return True

def STT(wave_file):
    # Wit.ai API 연결을 μœ„ν•œ ν΄λΌμ΄μ–ΈνŠΈ 생성
    wit_api_key = "HBAX6TJEDWRKUIUA62MFQZCXYG4BKQBQ"
    wit_url = "https://api.wit.ai/speech"
    wit_client = Wit(wit_api_key)

    # μŒμ„± νŒŒμΌμ—μ„œ ν…μŠ€νŠΈ μΆ”μΆœ
    print("STT μˆ˜ν–‰ μ „")
    print("wave_file 의 νƒ€μž… :", type(wave_file))
    print(wave_file)

    sample_rate, data = wave_file
    # μ •κ·œν™” 및 16λΉ„νŠΈλ‘œ λ³€ν™˜
    scaled_data = (data / np.max(np.abs(data)) * 32767).astype(np.int16)
    # λ³€μˆ˜μ— .wav 파일 데이터 μ €μž₯
    wav_data = io.BytesIO()
    write(wav_data, sample_rate, scaled_data)

    response = wit_client.speech(wav_data, headers={'Content-Type': 'audio/wav'})
    print("STT μˆ˜ν–‰ ν›„")
    print(response)
    print(type(response))
    # μΆ”μΆœλœ ν…μŠ€νŠΈ 확인
    if 'text' in response:
        return response['text']
    else:
        return "μΆ”μΆœλœ ν…μŠ€νŠΈ μ—†μŒ!"

def function(name, wave_file):
    is_enrolled = is_enrolled_voice(name, wave_file)
    if is_enrolled:  # λ“±λ‘λœ λͺ©μ†Œλ¦¬μΈ 경우 STT
        print("λ“±λ‘λœ λͺ©μ†Œλ¦¬ μž…λ‹ˆλ‹€!")
        output_text = STT(wave_file)
        return output_text
    else:
        return "λ“±λ‘λ˜μ§€ μ•Šμ€ λͺ©μ†Œλ¦¬μž…λ‹ˆλ‹€!"

def main():
    demo = gr.Interface(
        function,
        inputs=["text", "audio"],
        outputs=["text"]
    )
    demo.launch(debug=True, share=True)

if __name__ == '__main__':
    main()