import os
import pandas as pd
import gradio as gr
from llama_cpp import Llama

# Define the model path
MODEL_PATH = "models/mistral-7b-instruct-v0.1.Q2_K.gguf"

# Create models directory if not exists
os.makedirs("models", exist_ok=True)

# If the model file doesn't exist, you can download it. 
# (This download should ideally be done manually or during local testing, not in the Hugging Face Space runtime)
if not os.path.exists(MODEL_PATH):
    print("Downloading GGUF model...")
    # Use subprocess or any method to download the model file here if needed

# Load the quantized model (CPU)
llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=2048,
    n_threads=8,  # Adjust this to the number of CPU cores available
    verbose=False
)

def build_prompt(source, translation):
    return (
        f"<|system|>\n"
        "You are a helpful assistant that evaluates translation quality. "
        "Score the quality from 0 (worst) to 1 (best).\n"
        "<|user|>\n"
        f"Original: {source}\nTranslation: {translation}\n"
        "How good is the translation?\n<|assistant|>"
    )

def estimate_score(source, translation):
    prompt = build_prompt(source, translation)
    output = llm(prompt, max_tokens=10, stop=["</s>", "\n"])
    text = output["choices"][0]["text"].strip()
    try:
        score = float([s for s in text.split() if s.replace('.', '', 1).isdigit()][-1])
        return round(score, 3)
    except:
        return "N/A"

def process_file(file):
    df = pd.read_csv(file.name, sep="\t")
    df["mistral_7B_predicted_score"] = df.apply(
        lambda row: estimate_score(row["original"], row["translation"]), axis=1
    )
    return df

demo = gr.Interface(
    fn=process_file,
    inputs=gr.File(label="Upload dev.tsv with 'original' and 'translation' columns"),
    outputs=gr.Dataframe(),
    title="Mistral-7B Q2_K MT QE (Quantized, CPU)",
    description="Translation Quality Estimation using quantized Mistral-7B via llama-cpp-python (CPU)"
)

if __name__ == "__main__":
    demo.launch()