Spaces:
Sleeping
Sleeping
File size: 5,731 Bytes
f0ad67c 8afdb95 0bb7775 8afdb95 0bb7775 c2924bd 0bb7775 8afdb95 02d76aa b92c64c 8afdb95 02d76aa 586d983 02d76aa 586d983 02d76aa 586d983 94786a8 bbbe41e 586d983 94786a8 586d983 02d76aa c2924bd 586d983 02d76aa 586d983 8afdb95 02d76aa 8afdb95 02d76aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# import os
# import gradio as gr
# import whisper
# from gtts import gTTS
# import io
# from groq import Groq
# # Initialize the Groq client
# groq_api_key = os.getenv('GROQ_API_KEY')
# client = Groq(api_key=groq_api_key)
# # Load the Whisper model
# model = whisper.load_model("base") # You can choose other models like "small", "medium", "large"
# def process_audio(file_path):
# try:
# # Load the audio file
# audio = whisper.load_audio(file_path)
# # Transcribe the audio using Whisper
# result = model.transcribe(audio)
# text = result["text"]
# # Generate a response using Groq
# chat_completion = client.chat.completions.create(
# messages=[{"role": "user", "content": text}],
# model="llama3-8b-8192", # Replace with the correct model if necessary
# )
# # Access the response using dot notation
# response_message = chat_completion.choices[0].message.content.strip()
# # Convert the response text to speech
# tts = gTTS(response_message)
# response_audio_io = io.BytesIO()
# tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
# response_audio_io.seek(0)
# # Save audio to a file to ensure it's generated correctly
# with open("response.mp3", "wb") as audio_file:
# audio_file.write(response_audio_io.getvalue())
# # Return the response text and the path to the saved audio file
# return response_message, "response.mp3"
# except Exception as e:
# return f"An error occurred: {e}", None
# iface = gr.Interface(
# fn=process_audio,
# inputs=gr.Audio(type="filepath"), # Use type="filepath"
# outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
# live=True
# )
# iface.launch()
import os
import gradio as gr
import whisper
from gtts import gTTS
from anthropic import Anthropic # Import the Anthropic client
import io # Import io for BytesIO
# Get the Anthropic API key from environment variables
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
if not ANTHROPIC_API_KEY:
raise ValueError("ANTHROPIC_API_KEY environment variable is not set.")
client = Anthropic(api_key=ANTHROPIC_API_KEY) # Initialize the Anthropic client
# Load Whisper model
model = whisper.load_model("base") # You can also use "small", "medium", "large"
def chatbot(audio=None):
try:
if audio is None:
return "No input detected. Please provide an audio input.", None
# Transcribe the audio input using Whisper
transcription = model.transcribe(audio)
user_input = transcription.get("text", "")
# Generate a response using Anthropic API
chat_completion = client.completions.create(
model="claude-v1", # Specify the model
prompt=user_input, # Provide the user input as the prompt
max_tokens_to_sample=100, # Specify the maximum tokens to sample
)
response_text = chat_completion['completion']
# Convert the response text to speech using gTTS
tts = gTTS(text=response_text, lang='en')
response_audio_io = io.BytesIO() # Create a BytesIO object
tts.save(response_audio_io) # Save the audio to the BytesIO object
response_audio_io.seek(0) # Rewind the BytesIO object
return response_text, response_audio_io
except Exception as e:
return f"An error occurred: {e}", None
def clear_inputs():
return None, None, None
# Create a custom interface
def build_interface():
with gr.Blocks(css="""
.block-title {
text-align: center;
color: white;
background-color: #4CAF50;
padding: 10px;
border-radius: 8px;
}
.gradio-row {
background-color: #f9f9f9;
border-radius: 8px;
padding: 20px;
margin: 10px;
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.1);
}
.gradio-column {
padding: 10px;
}
.gradio-button {
background-color: #ff6347 !important;
color: white !important;
border-radius: 8px !important;
padding: 10px 20px !important;
font-size: 16px !important;
border: none !important;
cursor: pointer !important;
box-shadow: 0px 4px 12px rgba(0, 0, 0, 0.2) !important;
transition: background-color 0.3s ease !important;
}
.gradio-button:hover {
background-color: #e5533d !important;
}
""") as demo:
gr.Markdown(
"""
<h1 class="block-title">Voice-to-Voice AI Chatbot</h1>
"""
)
with gr.Row(elem_classes="gradio-row"):
with gr.Column(elem_classes="gradio-column", scale=1):
audio_input = gr.Audio(type="filepath", label="Record Your Voice")
with gr.Column(elem_classes="gradio-column", scale=2):
chatbot_output_text = gr.Textbox(label="Chatbot Response")
chatbot_output_audio = gr.Audio(label="Audio Response")
clear_button = gr.Button("Clear", elem_classes="gradio-button")
clear_button.click(
fn=clear_inputs,
outputs=[audio_input, chatbot_output_text, chatbot_output_audio]
)
audio_input.change(
fn=chatbot,
inputs=[audio_input],
outputs=[chatbot_output_text, chatbot_output_audio]
)
return demo
# Launch the interface
if __name__ == "__main__":
interface = build_interface()
interface.launch() |