# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Intel/neural-chat-7b-v3-1")
model = AutoModelForCausalLM.from_pretrained("Intel/neural-chat-7b-v3-1")

def generate_response(system_input, user_input):

    # Format the input using the provided template
    prompt = f"### System:\n{system_input}\n### User:\n{user_input}\n### Assistant:\n"

    # Tokenize and encode the prompt
    inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False)

    # Generate a response
    outputs = model.generate(inputs, max_length=1000, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract only the assistant's response
    return response.split("### Assistant:\n")[-1]

'''# Example usage
system_input = "Please act as a psychology counselor assistant. I will provide you counseling notes and you need to provide me with exactly two things in return, assessment and plan for the notes. Please note that the assessment should begin with keword 'Assessment:', and plan should begin with keyword 'Plan:'. Please make it logical, simple, concise, and clear."
user_input = "Can't sleep well?"
response = generate_response(system_input, user_input)
print(response)
'''

def generate_response1(user_input):
    system_input = "Please act as a psychology counselor assistant. I will provide you counseling notes and you need to provide me with exactly two things in return, assessment and plan for the notes. Please note that the assessment should begin with keword 'Assessment:', and plan should begin with keyword 'Plan:'. Please make it logical, simple, concise, and clear."
    prompt = f"### System:\n{system_input}\n### User:\n{user_input}\n### Assistant:\n"
    inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False)
    outputs = model.generate(inputs, max_length=1000, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response_parts = response.split("### Assistant:\n")[-1]

    # Split the response into Assessment and Plan
    assessment = response_parts.split("Plan:")[0].strip()
    plan = "Plan:" + response_parts.split("Plan:")[1].strip()

    return assessment, plan

import gradio as gr
# Gradio interface
iface = gr.Interface(
    fn=generate_response1,
    inputs=gr.Textbox(lines=2, label="Please Upload Notes here", placeholder="Enter your notes here..."),
    outputs=[gr.Textbox(label="Assessment"), gr.Textbox(label="Plan")],
    examples=[
        ["Can't sleep well?"],
        ["Feeling anxious all the time."]
    ]
)

# Launch the interface
iface.launch(share=True)