import streamlit as st
import requests

# Function to call the Hugging Face model
def query_huggingface_model(prompt):
    API_TOKEN = "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"  # Replace with your Hugging Face API token
    API_URL = "https://api-inference.huggingface.co/models/MariamAde/Mistral_finetuned_Base2"  # Replace with your model's API URL

    headers = {"Authorization": f"Bearer {API_TOKEN}"}
    response = requests.post(API_URL, headers=headers, json={"inputs": prompt})
    
    if response.status_code == 200:
        return response.json()
    else:
        return {"error": response.text}

# Streamlit interface
def main():
    st.title("My Fine-tuned Model Demo")

    # User input
    user_input = st.text_area("Enter your text here", "")

    # Button to make the prediction
    if st.button("Predict"):
        with st.spinner("Predicting..."):
            response = query_huggingface_model(user_input)
            if "error" in response:
                st.error(response["error"])
            else:
                st.success("Prediction Success")
                st.write(response)  # Modify this based on how your model's response is structured

if __name__ == "__main__":
    main()


# #pip install transformers 


# from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser,TrainingArguments,pipeline, logging, TextStreamer, MistralForCausalLM
# from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model,AutoPeftModelForCausalLM
# import os,torch, platform, warnings
# from datasets import load_dataset
# from trl import SFTTrainer
# from huggingface_hub import notebook_login
# import fire
# import streamlit as st

# #git clone https://huggingface.co/spaces/J4Lee/RadiantScriptor AutoModelForSequenceClassification


# st.set_page_config(page_title= "Reports generation from Radiological Image ")

# @st.cache(allow_output_mutation=True)
# def get_model():
#     #device = "cuda" # the device to load the model onto
#     model = AutoModelForCausalLM.from_pretrained("MariamAde/Mistral_finetuned_Base2")
#     tokenizer = AutoTokenizer.from_pretrained("MariamAde/Mistral_finetuned_Base2")
#     return tokenizer, model
    

# tokenizer, model = get_model()

# def generate_report(labels): #,model,tokenizer):
#     # Tokenize the input labels
#     inputs = tokenizer(labels, return_tensors="pt") #.to(device)
#     #model.to(device)
#     # Generate output using the model
#     output = model.generate(**inputs)
#     # Decode the output sentences
#     sentences = tokenizer.decode(output[0], skip_special_tokens=True)
#     return sentences

# # Streamlit interface
# st.title("Radiology Report Generator")

# # User input for finding labels
# labels = st.text_input("Enter Finding Labels:")


# if st.button("Generate Report"):

#     # Generate the radiology report
#     report = generate_report(labels) #,model,tokenizer)
#     # Display the report
#     st.text_area("Generated Report:", value=report, height=300)


    # option 1) Mistral Usage tip 
    
# @st.cache(allow_output_mutation=True)
# def get_model():
#     #device = "cuda" # the device to load the model onto
#     model = AutoModelForCausalLM.from_pretrained("MariamAde/Mistral_finetuned_v2")
#     tokenizer = AutoTokenizer.from_pretrained("MariamAde/Mistral_finetuned_v2")
#     return tokenizer, model
    
    
    # option 2) 
    
# @st.cache(allow_output_mutation=True)
# def get_model():
#     tokenizer = LlamaTokenizer.from_pretrained("J4Lee/Medalpaca_finetuned_test")
#     model = MistralForCausalLM.from_pretrained("J4Lee/Medalpaca_finetuned_test")
#     return tokenizer, model


    # option 3) 
    
# @st.cache(allow_output_mutation=True)
# def get_model():
#     base_model, new_model = "mistralai/Mistral-7B-v0.1" , "inferenceanalytics/radmistral_7b"

#     base_model_reload = AutoModelForCausalLM.from_pretrained(
#         base_model, low_cpu_mem_usage=True,
#         return_dict=True,torch_dtype=torch.bfloat16,
#         device_map= 'auto')
    
#     model = PeftModel.from_pretrained(base_model_reload, new_model)
#     model = merged_model.merge_and_unload()

#     # Reload tokenizer
#     tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
#     tokenizer.pad_token = tokenizer.eos_token
#     tokenizer.padding_side = "right"
#     return tokenizer, model

# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# DEVICE