import gradio as gr
from transformers import LlamaTokenizer, LlamaForCausalLM
import torch
import os
from huggingface_hub import login

# Authenticate with Hugging Face using the token from Space secrets
hf_token = os.getenv("HF_TOKEN")
if hf_token:
    login(token=hf_token)
    print("Authenticated with Hugging Face token.")
else:
    print("HF_TOKEN not found in environment variables. Please set it in Space secrets.")

# Function to predict fraud based on text input
def predict(input_text):
    if not input_text:
        return "Please enter some text to analyze."
    
    try:
        # Load the fine-tuned model and tokenizer from the local directory
        tokenizer = LlamaTokenizer.from_pretrained("./fine_tuned_llama2")
        model = LlamaForCausalLM.from_pretrained("./fine_tuned_llama2")
        model.eval()
        
        # Tokenize input
        inputs = tokenizer(input_text, return_tensors="pt", max_length=512, padding="max_length", truncation=True)
        # Generate output
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=50)
        # Decode and return result
        result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return result
    except Exception as e:
        return f"Error during prediction: {e}"

# Create Gradio interface with text input
interface = gr.Interface(
    fn=predict,
    inputs=gr.Textbox(
        lines=2,
        placeholder="Enter text to analyze (e.g., 'Facility backdates policies. Is this fraudulent?')",
        label="Input Text"
    ),
    outputs=gr.Textbox(label="Prediction"),
    title="Fine-Tune LLaMA 2 for Healthcare Fraud Analysis",
    description="Test the fine-tuned LLaMA 2 model to detect healthcare fraud. Enter a description of a facility's behavior to analyze."
)

# Launch the interface
interface.launch()