File size: 3,016 Bytes
5e3d8aa
abe4e34
f6513f7
63523f5
5949d34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e3d8aa
a7b8833
63523f5
5e3d8aa
 
63523f5
5e3d8aa
2b94432
63523f5
 
5e3d8aa
63523f5
5e3d8aa
 
 
 
 
63523f5
abe4e34
1f43cd4
63523f5
23a51eb
63523f5
5949d34
ed1617d
63523f5
23a51eb
 
 
5949d34
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from transformers import pipeline
import gradio as gr
from IPython.display import Audio

# Create pipelines for text-to-speech and speech-to-text
tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")
stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium")

# Create pipeline for text generation, considering using a model trained for dialogue
chat = pipeline("text-generation", model="facebook/bart-base-conversational")

def handle_user_input(user_text, user_voice):
    if user_text:
        user_text = user_text.strip()  # Remove leading/trailing whitespace
        input_type = "text"
    else:
        try:
            user_text = stt(user_voice)["text"]
        except:
            user_text = ""
        input_type = "voice"

    # Generate response
    messages = [
        {"role": "system", "content": "Hi! How can I help you today?"},
        {"role": "user", "content": user_text},
    ]
    chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"]
    messages.append({"role": "assistant", "content": chat_reply})

    # Generate audio output (only if input was text)
    audio = None
    if input_type == "text":
        audio = tts(chat_reply)["audio"]

    return chat_reply, audio

# Define input components
text_input = gr.Textbox(label="Enter your text (optional)")
voice_input = gr.Audio(sources=["microphone"], type="filepath")

# Create and launch the Gradio interface
iface = gr.Interface(
    fn=handle_user_input,
    inputs=[text_input, voice_input],
    outputs=[gr.Textbox(label="Assistant Text"), gr.Audio(label="Assistant Voice (if text input)")],
    live=True,
    title="AI Voice Assistant",
)
iface.launch(debug=True)


"""
from transformers import pipeline
import gradio as gr
from IPython.display import Audio

# Create a pipeline for text-to-speech
tts = pipeline("text-to-speech", model="facebook/mms-tts-eng")

# Create a pipeline for speech-to-text
stt = pipeline("automatic-speech-recognition", model="openai/whisper-medium")

# Create a pipeline for text generation
chat = pipeline("text-generation", model="openai-community/gpt2-xl")

def voice_chat(user_voice):
    user_text = stt(user_voice)["text"]
    messages = [{"role": "system", "content": "You are a kind helpful assistant."}]
    messages.append({"role": "user", "content": user_text})
    chat_reply = chat(messages=messages, max_length=100, top_p=0.95, temperature=0.7)[0]["generated_text"]
    messages.append({"role": "assistant", "content": chat_reply})
    audio = tts(chat_reply)["audio"]
    return chat_reply, audio

text_reply = gr.Textbox(label="ChatGPT Text") 
voice_reply = gr.Audio(type="filepath")

iface = gr.Interface(
    fn=voice_chat,
    inputs=[gr.Textbox(label="Enter your text"), gr.Audio(sources=["microphone"], type="filepath")],
    outputs=[gr.Textbox(label="ChatGPT Text") , gr.Audio(label = "ChatGPT Voice")],
    live=True,
    title="AI Voice Assistant with ChatGPT AI",
)

iface.launch(debug=True)
"""