import gradio as gr
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, GenerationConfig
import spaces 

# Load model and tokenizer
model_name = "nafisneehal/Llama-3.2-3B-bnb-4bit-finetuned-TrialBrain-BaselineFeatures-it"
load_in_4bit = True  # Set this to True as per your code requirements

# Initialize model and tokenizer
model = AutoPeftModelForCausalLM.from_pretrained(model_name, load_in_4bit=load_in_4bit)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Default values for system and user input
test_instruction_string = """
You're a clinical trial expert. For each trial query, list probable baseline features (each in backticks and comma-separated).
Baseline features are demographic characteristics used in primary outcome analysis, often shown by group in clinical publications.
"""

test_input_string = """
<Title:>Vinorelbine in Treating Patients With Advanced Solid Tumors That Have Not Responded to Treatment and Liver Dysfunction <BriefSummary:>RATIONALE: Drugs used in chemotherapy, such as vinorelbine, work in different ways to stop the growth of tumor cells, either by killing the cells or by stopping them from dividing.

PURPOSE: This pilot trial is studying the side effects and best dose of vinorelbine in treating patients with advanced solid tumors that have not responded to treatment and liver dysfunction. <EligibilityCriteria:>DISEASE CHARACTERISTICS:

* Histologically confirmed advanced solid tumor

  * Any histology allowed
* Refractory to standard therapy OR no standard therapy exists

  * Previously untreated non-small cell lung cancer allowed, provided abnormal liver function is present, defined as moderate (group 3) or severe (group 4)
* Measurable disease not required

  * Present measurable disease requires baseline measurements within 4 weeks of study entry
* Patients with acute hepatitis from viral or drug etiologies should recover to a stable baseline prior to study therapy
* History of brain metastasis allowed, provided the following criteria are met:

  * Metastasis has been controlled by radiotherapy or surgery
  * Patient is not currently on corticosteroids
  * Neurologic status is stable

PATIENT CHARACTERISTICS:

* Karnofsky performance status 70-100%
* Life expectancy ≥ 2 months
* ANC = 1,500/mm³
* Platelet count = 100,000/mm³
* Hemoglobin = 10 g/dL (transfusion to this level allowed)
* Creatinine \< 1.5 mg/dL OR creatinine clearance \> 60 mL/ min
* Patients with EKG evidence of first- or second-degree AV block or left or right bundle branch block are ineligible for the lidocaine bolus, but may otherwise be treated on this protocol
* Not pregnant or nursing
* Negative pregnancy test
* Fertile patients must use effective contraception
* No concurrent illness (e.g., cardiovascular, pulmonary, or central nervous system) that is poorly controlled or of such severity that the investigator deems unwise to enter the patient on protocol
* Must have ability to comply with study treatment and required tests
* Obstructive jaundice requires a drainage procedure prior to study treatment

PRIOR CONCURRENT THERAPY:

* See Disease Characteristics
* Recovered from prior therapy
* At least 3 weeks since prior chemotherapy (6 weeks for mitomycin or nitrosourea therapy)
* No prior radiotherapy to \> 30% of the bone marrow or more than standard adjuvant pelvic radiotherapy for rectal cancer <Conditions:>Lung Cancer, Unspecified Adult Solid Tumor, Protocol Specific,  <Interventions:>indocyanine green, lidocaine, vinorelbine ditartrate, high performance liquid chromatography, intracellular fluorescence polarization analysis, liquid chromatography, mass spectrometry, pharmacological study <StudyType:>INTERVENTIONAL <PrimaryOutcomes:>Area Under the Curve, Number of Participants With Grade 3 and 4 Toxicities <OverallStatus:>COMPLETED
"""

# Function to generate response
@spaces.GPU
def generate_response(system_instruction, user_input):
    # Prepare the input and move it to the correct device
    inputs = tokenizer([f"### Instruction:\n{system_instruction}\n### Input:\n{user_input}\n### Response:\n"], return_tensors="pt").to(device)

    # Define generation configuration
    meta_config = {
        "do_sample": False,
        "temperature": 0.0,
        "max_new_tokens": 256,
        "repetition_penalty": 1.2,
        "use_cache": True 
    }
    generation_config = GenerationConfig(**meta_config)

    # Generate response
    with torch.no_grad():
        outputs = model.generate(**inputs, generation_config=generation_config)
        decoded_output = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        assistant_response = decoded_output.split("### Response:")[-1].strip()

    return assistant_response

# Gradio interface setup
with gr.Blocks() as demo:
    gr.Markdown("# Clinical Trial Chatbot")

    with gr.Row():
        # Left column for user inputs
        with gr.Column():
            system_instruction = gr.Textbox(
                value=test_instruction_string,
                placeholder="Enter system instruction here...",
                label="System Instruction"
            )
            user_input = gr.Textbox(
                value=test_input_string,
                placeholder="Type your message here...",
                label="Your Message"
            )
            submit_btn = gr.Button("Submit")

        # Right column for bot response
        with gr.Column():
            response_display = gr.Textbox(
                label="Bot Response", interactive=False, placeholder="Response will appear here."
            )

    # Link submit button to the function
    submit_btn.click(generate_response, [system_instruction, user_input], response_display)

# Launch the app with share=True to make it public
demo.launch()