import gradio as gr
import os
import keras_nlp
from transformers import AutoModelForCausalLM

# Set Kaggle API credentials using values from environment variables
os.environ["KAGGLE_USERNAME"] = os.environ.get("KAGGLE_USERNAME")
os.environ["KAGGLE_KEY"] = os.environ.get("KAGGLE_KEY")


# Load LoRA weights if you have them
LoRA_weights_path = "fined-tuned-model.lora.h5"
gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("gemma_2b_en")
gemma_lm.backbone.enable_lora(rank=4)  # Enable LoRA with rank 4
gemma_lm.preprocessor.sequence_length = 512  # Limit sequence length
gemma_lm.backbone.load_lora_weights(LoRA_weights_path)  # Load LoRA weights

# Define the response generation function
def generate_response(message):
    # Create a prompt template
    template = "Instruction:\n{instruction}\n\nResponse:\n{response}"

    # Create the prompt with the current message
    prompt = template.format(instruction=message, response="")
    print("Prompt:\n", prompt)

    # Generate response from the model
    response = gemma_lm.generate(prompt, max_length=256)
    # Only keep the generated response
    response = response.split("Response:")[-1].strip()

    print("Generated Response:\n", response)

    # Extract and return the generated response text
    return response  # Adjust this if your model's output structure differs

# Create the Gradio chat interface
interface = gr.Interface(
    fn=generate_response,  # Function that generates responses
    inputs=gr.Textbox(placeholder="Hello, I am Sage, your mental health advisor", lines=2, scale=7),
    outputs=gr.Textbox(),
    title="Sage, your Mental Health Advisor",
#     description="Chat with Sage, your mental health advisor.",
#     live=True
)
proxy_prefix = os.environ.get("PROXY_PREFIX")
# Launch the Gradio app
interface.launch(server_name="0.0.0.0", server_port=8080, root_path=proxy_prefix, share=True)