Alex Cabrera commited on
Commit
4a5b13d
β€’
1 Parent(s): 802dbc1

more metrics

Browse files
.zeno_cache/POSTDISTILLbert_scorehuman.pickle CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3874299934f57785c20ef632025c3caceb8cffc9b9029a3c026c7366be7fac4
3
  size 275525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e7695f9c6114d019cda0942d59536b9666acb7c77b0ab76064ac3413844d401
3
  size 275525
.zeno_cache/{OUTPUThuman-with-embeddings.pickle β†’ POSTDISTILLbleuhuman.pickle} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f92db696d6c39e571601552125d0b4dd2a6382071394ce0693f71fafbdab5da
3
- size 280865
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a452a66042652393d61b1c46a83395d98d543498fb6e5825d5c5c52df57da4f3
3
+ size 275519
.zeno_cache/{POSTDISTILLbert_scorehuman-with-embeddings.pickle β†’ POSTDISTILLchrfhuman.pickle} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:374317448ac7ec37fa499adfa775c4370a7cf3feca2854bc861810f075a398d5
3
- size 25744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:014caba05b6980c1c71d9602952474737446c8ffee54f49f12bd4bd9b9987375
3
+ size 275519
.zeno_cache/{EMBEDDINGhuman-with-embeddings.pickle β†’ POSTDISTILLlength_ratiohuman.pickle} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e2ddbd958723a349787a13b792e697688b04c1b1c057137db818af26c1936c9
3
- size 3477209
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1205b1c225a4fa82c63138540230f1f03bb0f19292dd98fc22284512d437ee37
3
+ size 275527
model.py CHANGED
@@ -3,7 +3,7 @@ from inspiredco.critique import Critique
3
  import os
4
  from sentence_transformers import SentenceTransformer
5
 
6
- # client = Critique(api_key=os.environ["INSPIREDCO_API_KEY"])
7
 
8
 
9
  @model
@@ -19,15 +19,62 @@ def pred_fns(name):
19
 
20
  @distill
21
  def bert_score(df, ops):
22
- eval_dict = df[["source", ops.output_column, "label"]].to_dict("records")
23
  for d in eval_dict:
24
- d["references"] = [d.pop("label")]
25
  d["target"] = d.pop(ops.output_column)
26
 
27
- # result = client.evaluate(
28
- # metric="bert_score", config={"model": "bert-base-uncased"}, dataset=eval_dict
29
- # )
30
- result = {"examples": [{"value": 0.5} for _ in range(len(eval_dict))]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  return [round(r["value"], 6) for r in result["examples"]]
33
 
@@ -37,6 +84,21 @@ def avg_bert_score(df, ops: ZenoOptions):
37
  return df[ops.distill_columns["bert_score"]].mean()
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  @distill
41
  def length(df, ops):
42
  return df[ops.data_column].str.len()
 
3
  import os
4
  from sentence_transformers import SentenceTransformer
5
 
6
+ client = Critique(api_key=os.environ["INSPIREDCO_API_KEY"])
7
 
8
 
9
  @model
 
19
 
20
  @distill
21
  def bert_score(df, ops):
22
+ eval_dict = df[["source", ops.output_column, "reference"]].to_dict("records")
23
  for d in eval_dict:
24
+ d["references"] = [d.pop("reference")]
25
  d["target"] = d.pop(ops.output_column)
26
 
27
+ result = client.evaluate(
28
+ metric="bert_score", config={"model": "bert-base-uncased"}, dataset=eval_dict
29
+ )
30
+
31
+ return [round(r["value"], 6) for r in result["examples"]]
32
+
33
+
34
+ @distill
35
+ def bleu(df, ops):
36
+ eval_dict = df[[ops.output_column, "reference"]].to_dict("records")
37
+ for d in eval_dict:
38
+ d["references"] = [d.pop("reference")]
39
+ d["target"] = d.pop(ops.output_column)
40
+
41
+ result = client.evaluate(
42
+ metric="bleu",
43
+ config={"smooth_method": "add_k", "smooth-value": 1.0},
44
+ dataset=eval_dict,
45
+ )
46
+
47
+ return [round(r["value"], 6) for r in result["examples"]]
48
+
49
+
50
+ @distill
51
+ def chrf(df, ops):
52
+ eval_dict = df[[ops.output_column, "reference"]].to_dict("records")
53
+ for d in eval_dict:
54
+ d["references"] = [d.pop("reference")]
55
+ d["target"] = d.pop(ops.output_column)
56
+
57
+ result = client.evaluate(
58
+ metric="chrf",
59
+ config={},
60
+ dataset=eval_dict,
61
+ )
62
+
63
+ return [round(r["value"], 6) for r in result["examples"]]
64
+
65
+
66
+ @distill
67
+ def length_ratio(df, ops):
68
+ eval_dict = df[[ops.output_column, "reference"]].to_dict("records")
69
+ for d in eval_dict:
70
+ d["references"] = [d.pop("reference")]
71
+ d["target"] = d.pop(ops.output_column)
72
+
73
+ result = client.evaluate(
74
+ metric="length_ratio",
75
+ config={},
76
+ dataset=eval_dict,
77
+ )
78
 
79
  return [round(r["value"], 6) for r in result["examples"]]
80
 
 
84
  return df[ops.distill_columns["bert_score"]].mean()
85
 
86
 
87
+ @metric
88
+ def avg_bleu(df, ops: ZenoOptions):
89
+ return df[ops.distill_columns["bleu"]].mean()
90
+
91
+
92
+ @metric
93
+ def avg_chrf(df, ops: ZenoOptions):
94
+ return df[ops.distill_columns["chrf"]].mean()
95
+
96
+
97
+ @metric
98
+ def avg_length_ratio(df, ops: ZenoOptions):
99
+ return df[ops.distill_columns["length_ratio"]].mean()
100
+
101
+
102
  @distill
103
  def length(df, ops):
104
  return df[ops.data_column].str.len()