# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("Intel/neural-chat-7b-v3-1") model = AutoModelForCausalLM.from_pretrained("Intel/neural-chat-7b-v3-1") def generate_response(system_input, user_input): # Format the input using the provided template prompt = f"### System:\n{system_input}\n### User:\n{user_input}\n### Assistant:\n" # Tokenize and encode the prompt inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False) # Generate a response outputs = model.generate(inputs, max_length=1000, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id) response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the assistant's response return response.split("### Assistant:\n")[-1] '''# Example usage system_input = "Please act as a psychology counselor assistant. I will provide you counseling notes and you need to provide me with exactly two things in return, assessment and plan for the notes. Please note that the assessment should begin with keword 'Assessment:', and plan should begin with keyword 'Plan:'. Please make it logical, simple, concise, and clear." user_input = "Can't sleep well?" response = generate_response(system_input, user_input) print(response) ''' def generate_response1(user_input): system_input = "Please act as a psychology counselor assistant. I will provide you counseling notes and you need to provide me with exactly two things in return, assessment and plan for the notes. Please note that the assessment should begin with keword 'Assessment:', and plan should begin with keyword 'Plan:'. Please make it logical, simple, concise, and clear." prompt = f"### System:\n{system_input}\n### User:\n{user_input}\n### Assistant:\n" inputs = tokenizer.encode(prompt, return_tensors="pt", add_special_tokens=False) outputs = model.generate(inputs, max_length=1000, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id) response = tokenizer.decode(outputs[0], skip_special_tokens=True) response_parts = response.split("### Assistant:\n")[-1] # Split the response into Assessment and Plan assessment = response_parts.split("Plan:")[0].strip() plan = "Plan:" + response_parts.split("Plan:")[1].strip() return assessment, plan import gradio as gr # Gradio interface iface = gr.Interface( fn=generate_response1, inputs=gr.Textbox(lines=2, label="Please Upload Notes here", placeholder="Enter your notes here..."), outputs=[gr.Textbox(label="Assessment"), gr.Textbox(label="Plan")], examples=[ ["Can't sleep well?"], ["Feeling anxious all the time."] ] ) # Launch the interface iface.launch(share=True)