Spaces:
Sleeping
Sleeping
File size: 2,160 Bytes
932fc71 642595e 932fc71 28ff492 87f4bdc 0526c20 87f4bdc 642595e 87f4bdc 850e11d 642595e 932fc71 642595e 87f4bdc 642595e 87f4bdc 932fc71 87f4bdc 932fc71 87f4bdc 932fc71 642595e 87f4bdc 932fc71 87f4bdc 932fc71 87f4bdc 932fc71 87f4bdc 932fc71 87f4bdc 932fc71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import pandas as pd
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import os
import re
# Load the model and tokenizer
model_name = "google/flan-t5-base"
hf_token = os.environ.get("HF_TOKEN") # Set as a secret in Hugging Face Space settings
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, use_auth_token=hf_token)
# Move the model to CPU (or GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Function to generate a clean prompt
def generate_prompt(original, translation):
return (
f"Rate the quality of this translation from 0 (poor) to 1 (excellent). "
f"Only respond with a number.\n\n"
f"Source: {original}\n"
f"Translation: {translation}\n"
f"Score:"
)
# Main prediction function
def predict_scores(file):
df = pd.read_csv(file.name, sep="\t")
scores = []
for _, row in df.iterrows():
prompt = generate_prompt(row["original"], row["translation"])
# Tokenize and send to model
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=10)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Debug print (optional)
print("Response:", response)
# Extract numeric score using regex
match = re.search(r"\b([01](?:\.\d+)?)\b", response)
if match:
score_val = float(match.group(1))
score_val = max(0, min(score_val, 1)) # Clamp between 0 and 1
else:
score_val = -1 # fallback if model output is invalid
scores.append(score_val)
df["predicted_score"] = scores
return df
# Gradio UI
iface = gr.Interface(
fn=predict_scores,
inputs=gr.File(label="Upload dev.tsv"),
outputs=gr.Dataframe(label="QE Output with Predicted Score"),
title="MT QE with FLAN-T5-Base",
description="Upload a dev.tsv file with columns: 'original' and 'translation'."
)
# Launch app
iface.launch()
|