mistral / app.py
satyaiyer's picture
Update app.py
0b824f7 verified
import os
import pandas as pd
import gradio as gr
from llama_cpp import Llama
# Define the model path
MODEL_PATH = "models/mistral-7b-instruct-v0.1.Q2_K.gguf"
# Create models directory if not exists
os.makedirs("models", exist_ok=True)
# If the model file doesn't exist, you can download it.
# (This download should ideally be done manually or during local testing, not in the Hugging Face Space runtime)
if not os.path.exists(MODEL_PATH):
print("Downloading GGUF model...")
# Use subprocess or any method to download the model file here if needed
# Load the quantized model (CPU)
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=8, # Adjust this to the number of CPU cores available
verbose=False
)
def build_prompt(source, translation):
return (
f"<|system|>\n"
"You are a helpful assistant that evaluates translation quality. "
"Score the quality from 0 (worst) to 1 (best).\n"
"<|user|>\n"
f"Original: {source}\nTranslation: {translation}\n"
"How good is the translation?\n<|assistant|>"
)
def estimate_score(source, translation):
prompt = build_prompt(source, translation)
output = llm(prompt, max_tokens=10, stop=["</s>", "\n"])
text = output["choices"][0]["text"].strip()
try:
score = float([s for s in text.split() if s.replace('.', '', 1).isdigit()][-1])
return round(score, 3)
except:
return "N/A"
def process_file(file):
df = pd.read_csv(file.name, sep="\t")
df["mistral_7B_predicted_score"] = df.apply(
lambda row: estimate_score(row["original"], row["translation"]), axis=1
)
return df
demo = gr.Interface(
fn=process_file,
inputs=gr.File(label="Upload dev.tsv with 'original' and 'translation' columns"),
outputs=gr.Dataframe(),
title="Mistral-7B Q2_K MT QE (Quantized, CPU)",
description="Translation Quality Estimation using quantized Mistral-7B via llama-cpp-python (CPU)"
)
if __name__ == "__main__":
demo.launch()