Spaces:
Runtime error
Runtime error
File size: 3,070 Bytes
8894e1f 7dd65d1 8894e1f 4dd0f69 8894e1f 7dd65d1 8894e1f 4a5b13d 8894e1f 4a5b13d 8894e1f 4a5b13d 1216e7c b67dd08 8894e1f 4a5b13d 8894e1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import os
import numpy as np
import pandas as pd
from inspiredco.critique import Critique
from zeno import ZenoOptions, distill, metric, model
# client = Critique(api_key=os.environ["INSPIREDCO_API_KEY"])
@model
def pred_fns(name):
def pred(df, ops):
model_df = pd.read_csv(
ops.label_path + "/{}.tsv".format(name),
sep="\t",
quoting=3,
keep_default_na=False,
)
embed_df = pd.read_csv(
ops.label_path + "/ref_embed.tsv",
sep="\t",
keep_default_na=False,
quoting=3,
)
df_join = df[["text"]].merge(
model_df[["text", "translation"]], on="text", how="left"
)
df_join = df_join.merge(embed_df, on="text", how="left")
return df_join["translation"].fillna(""), [
np.fromstring(d[1:-1], sep=",") for d in df_join["embed"]
]
return pred
@distill
def bert_score(df, ops):
eval_dict = df[["source", ops.output_column, "reference"]].to_dict("records")
for d in eval_dict:
d["references"] = [d.pop("reference")]
d["target"] = d.pop(ops.output_column)
result = client.evaluate(
metric="bert_score", config={"model": "bert-base-uncased"}, dataset=eval_dict
)
return [round(r["value"], 6) for r in result["examples"]]
@distill
def bleu(df, ops):
eval_dict = df[[ops.output_column, "reference"]].to_dict("records")
for d in eval_dict:
d["references"] = [d.pop("reference")]
d["target"] = d.pop(ops.output_column)
result = client.evaluate(
metric="bleu",
config={"smooth_method": "add_k", "smooth-value": 1.0},
dataset=eval_dict,
)
return [round(r["value"], 6) for r in result["examples"]]
@distill
def chrf(df, ops):
eval_dict = df[[ops.output_column, "reference"]].to_dict("records")
for d in eval_dict:
d["references"] = [d.pop("reference")]
d["target"] = d.pop(ops.output_column)
result = client.evaluate(
metric="chrf",
config={},
dataset=eval_dict,
)
return [round(r["value"], 6) for r in result["examples"]]
@distill
def length_ratio(df, ops):
eval_dict = df[[ops.output_column, "reference"]].to_dict("records")
for d in eval_dict:
d["references"] = [d.pop("reference")]
d["target"] = d.pop(ops.output_column)
result = client.evaluate(
metric="length_ratio",
config={},
dataset=eval_dict,
)
return [round(r["value"], 6) for r in result["examples"]]
@metric
def avg_bert_score(df, ops: ZenoOptions):
return df[ops.distill_columns["bert_score"]].mean()
@metric
def avg_bleu(df, ops: ZenoOptions):
return df[ops.distill_columns["bleu"]].mean()
@metric
def avg_chrf(df, ops: ZenoOptions):
return df[ops.distill_columns["chrf"]].mean()
@metric
def avg_length_ratio(df, ops: ZenoOptions):
return df[ops.distill_columns["length_ratio"]].mean()
@distill
def length(df, ops):
return df[ops.data_column].str.len()
|