import os import pandas as pd import gradio as gr from llama_cpp import Llama # Define the model path MODEL_PATH = "models/mistral-7b-instruct-v0.1.Q2_K.gguf" # Create models directory if not exists os.makedirs("models", exist_ok=True) # If the model file doesn't exist, you can download it. # (This download should ideally be done manually or during local testing, not in the Hugging Face Space runtime) if not os.path.exists(MODEL_PATH): print("Downloading GGUF model...") # Use subprocess or any method to download the model file here if needed # Load the quantized model (CPU) llm = Llama( model_path=MODEL_PATH, n_ctx=2048, n_threads=8, # Adjust this to the number of CPU cores available verbose=False ) def build_prompt(source, translation): return ( f"<|system|>\n" "You are a helpful assistant that evaluates translation quality. " "Score the quality from 0 (worst) to 1 (best).\n" "<|user|>\n" f"Original: {source}\nTranslation: {translation}\n" "How good is the translation?\n<|assistant|>" ) def estimate_score(source, translation): prompt = build_prompt(source, translation) output = llm(prompt, max_tokens=10, stop=["", "\n"]) text = output["choices"][0]["text"].strip() try: score = float([s for s in text.split() if s.replace('.', '', 1).isdigit()][-1]) return round(score, 3) except: return "N/A" def process_file(file): df = pd.read_csv(file.name, sep="\t") df["mistral_7B_predicted_score"] = df.apply( lambda row: estimate_score(row["original"], row["translation"]), axis=1 ) return df demo = gr.Interface( fn=process_file, inputs=gr.File(label="Upload dev.tsv with 'original' and 'translation' columns"), outputs=gr.Dataframe(), title="Mistral-7B Q2_K MT QE (Quantized, CPU)", description="Translation Quality Estimation using quantized Mistral-7B via llama-cpp-python (CPU)" ) if __name__ == "__main__": demo.launch()