File size: 7,894 Bytes
0917861
 
 
 
 
cac2464
 
 
0917861
 
cac2464
0917861
cac2464
 
 
 
 
0917861
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cac2464
0917861
 
 
cac2464
0917861
 
 
 
 
 
cac2464
0917861
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cac2464
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0917861
 
 
 
fc29280
cac2464
0917861
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cac2464
 
 
 
 
 
0917861
cac2464
 
 
 
 
 
 
0917861
cac2464
 
0917861
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from botocore.exceptions import BotoCoreError, ClientError
from contextlib import closing
import os
import urllib
import boto3
import gradio as gr
from openai import OpenAI
import os
import time
import json
from dotenv import load_dotenv


# Loads and set environment variables
load_dotenv(".env")
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
access_key = os.getenv('access_key')
secret_key = os.getenv('secret_key')


def recognize_from_microphone(file_info):
    if not file_info:
        return "No audio file received.", ""
    file_path = file_info
    print(f"File path received: {file_path}")

    # Check file existence
    if not os.path.exists(file_path):
        return f"File not found: {file_path}", ""

    # Configuring Amazon Transcribe
    transcribe_client = boto3.client('transcribe', region_name='us-east-1', aws_access_key_id=access_key, aws_secret_access_key=secret_key)
    s3_client = boto3.client('s3', region_name='us-west-2', aws_access_key_id=access_key, aws_secret_access_key=secret_key)
    bucket_name = 'nutrition-bot'  # Specify your S3 bucket name
    object_name = os.path.basename(file_path)

    # Upload file to S3
    s3_client.upload_file(file_path, bucket_name, object_name)
    job_name = f"TranscriptionJob-{int(time.time())}"
    job_uri = f"s3://{bucket_name}/{object_name}"

    # Start transcription job
    transcribe_client.start_transcription_job(
        TranscriptionJobName=job_name,
        Media={'MediaFileUri': job_uri},
        MediaFormat='mp3',  # or your file format, e.g., wav
        LanguageCode='en-US'
    )

    # Checking job status
    while True:
        status = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
        if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
            break
        time.sleep(5)

    # Process the transcription result
    if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':
        transcript_uri = status['TranscriptionJob']['Transcript']['TranscriptFileUri']
        transcript_response = urllib.request.urlopen(transcript_uri)
        transcript_data = json.load(transcript_response)
        transcript_text = transcript_data['results']['transcripts'][0]['transcript']
        return transcript_text, ""
    return "Failed to transcribe audio.", ""


def synthesize_speech(text, filename="output.mp3"):
    """Converts text to speech using Amazon Polly and saves it to an MP3 file."""
    # Create a Polly client
    polly_client = boto3.client('polly', region_name='us-east-1', aws_access_key_id=access_key, aws_secret_access_key=secret_key)

    # Synthesize speech
    response = polly_client.synthesize_speech(
        Text=text,
        OutputFormat='mp3',  # MP3 output format
        VoiceId='Salli'     # Using Joanna voice, you can choose another
    )

    # Accessing the audio stream from the response
    if "AudioStream" in response:
        with open(filename, 'wb') as file:
            file.write(response['AudioStream'].read())
        print(f"Speech synthesized for text [{text}] and saved to {filename}")
    else:
        print(f"Failed to synthesize speech for text [{text}]")

    return filename



def chatbot_response(user_input="", gender=None, plan_type=None, weight=None, height=None, audio_input=None):
    transcription, response = "", ""  # Initialize variables for transcription and response
    error_message = ""  # Initialize error_message at the start of the function

    if audio_input:
        transcription, error = recognize_from_microphone(audio_input)
        if error:
            error_message = error  # Capture the error to return it properly
        else:
            user_input = transcription  # Use the transcription if there's no error

    # Check if there's user input or transcription, and there's no error message
    if not user_input.strip() and not transcription.strip() and not error_message:
        error_message = "Please provide audio input or type your question."

    if error_message:
        return error_message, ""  # Return the error with an empty second value

    # Process user_input as before (assuming previous code handled it)
    detailed_input = f"User details - Gender: {gender}, Plan Type: {plan_type}, Weight: {weight} kg, Height: {height} cm. Question: {user_input}"
    try:
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a nutrition consultant AI, capable of providing natural diet plans and emergency assistance based on user inputs."},
                {"role": "user", "content": detailed_input},
            ]
        )
        response = completion.choices[0].message.content
        if response:
            audio_path = synthesize_speech(response)
            return transcription, response, audio_path  # Return audio path along with text and transcription
    except Exception as e:
        return transcription, f"An error occurred during response generation: {str(e)}"  # Return both values even in case of an exception


def emergency_assistance(query):
    if not query.strip():
        return "Please provide a query for emergency assistance."
    try:
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "As an AI serving as an emergency nutrition advisor, your objective is to provide prompt and accurate nutritional guidance in urgent situations. When users present their concerns, you should deliver tailored advice that addresses the critical aspects of their nutritional needs quickly and effectively. Focus on offering clear, practical, and context-specific solutions to ensure their immediate dietary requirements are met."},
                {"role": "user", "content": query},
            ]
        )
        response = completion.choices[0].message.content
    except Exception as e:
        return f"An error occurred: {str(e)}"
    # After generating response:
    if response:
        audio_path = synthesize_speech(response)
        return response, audio_path  # Return both response text and audio path


# Adjust Gradio interfaces to include audio output
interface1 = gr.Interface(
    fn=chatbot_response,
    inputs=[
        gr.Textbox(lines=5, label="Input Here", placeholder="Type or say your question here..."),
        gr.Radio(choices=["Male", "Female", "Other"], label="Gender"),
        gr.Radio(choices=["Weight Gain", "Weight Loss"], label="Plan Type"),
        gr.Number(label="Weight (kg)", info="Enter your weight in kg"),
        gr.Number(label="Height (cm)", info="Enter your height in cm"),
        gr.Audio(type="filepath", label="Record your question")
        ],
    outputs=[
        gr.Text(label="Transcription"),
        gr.Text(lines=10, label="Response"),
        gr.Audio(label="Listen to Response")  # New audio output for the synthesized speech
    ],
    title="Personalized Nutrition AI Advisor",
    description="Ask me anything about nutrition. Provide your Gender, Plan Type, Weight and Height for personalized advice."
)

interface2 = gr.Interface(
    fn=emergency_assistance,
    inputs=[gr.Textbox(lines=10, label="Query", placeholder="Enter your emergency nutrition query here...")],
    outputs=[
        gr.Text(lines=10, label="Response"),
        gr.Audio(label="Listen to Response")  # New audio output for the synthesized speech
    ],
    title="Emergency Assistance",
    description="To better assist you, could you explain what led to this emergency?"
)


# Combined interface with tabs
app = gr.TabbedInterface([interface1, interface2], ["Nutrition Consultant", "Emergency Assistance"], title="HealthyBytes: Your AI Nutrition Consultant")

if __name__ == "__main__":
    app.launch(share=False)