Spaces:
Runtime error
Runtime error
Alex Cabrera
commited on
Commit
•
b67dd08
1
Parent(s):
7bcbbe7
embedding
Browse files- .zeno_cache/EMBEDDINGhuman-with-embeddings.pickle +3 -0
- .zeno_cache/EMBEDDINGhuman.pickle +3 -0
- .zeno_cache/OUTPUThuman-with-embeddings.pickle +3 -0
- .zeno_cache/OUTPUThuman.pickle +2 -2
- .zeno_cache/POSTDISTILLbert_scorehuman-with-embeddings.pickle +3 -0
- .zeno_cache/POSTDISTILLbert_scorehuman.pickle +2 -2
- .zeno_cache/PREDISTILLlength.pickle +2 -2
- config.toml +1 -1
- model.py +10 -12
- requirements.txt +3 -2
- wmt20-de-en.tsv +0 -0
.zeno_cache/EMBEDDINGhuman-with-embeddings.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e2ddbd958723a349787a13b792e697688b04c1b1c057137db818af26c1936c9
|
3 |
+
size 3477209
|
.zeno_cache/EMBEDDINGhuman.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca384c44b4e2b31f7912cadc6ebfcaf1b10c8f49f12bdf23b4d1412cac3686eb
|
3 |
+
size 76193865
|
.zeno_cache/OUTPUThuman-with-embeddings.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f92db696d6c39e571601552125d0b4dd2a6382071394ce0693f71fafbdab5da
|
3 |
+
size 280865
|
.zeno_cache/OUTPUThuman.pickle
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca48e73a22a1d00d38d1cea8115828aedf8907b22cc27e956005a83e028a94ea
|
3 |
+
size 2965059
|
.zeno_cache/POSTDISTILLbert_scorehuman-with-embeddings.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:374317448ac7ec37fa499adfa775c4370a7cf3feca2854bc861810f075a398d5
|
3 |
+
size 25744
|
.zeno_cache/POSTDISTILLbert_scorehuman.pickle
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3874299934f57785c20ef632025c3caceb8cffc9b9029a3c026c7366be7fac4
|
3 |
+
size 275525
|
.zeno_cache/PREDISTILLlength.pickle
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a32550e54669e452e59e07de2437b1e7af9a6aa544d731eda77fccb6fdd45b2
|
3 |
+
size 204736
|
config.toml
CHANGED
@@ -4,6 +4,6 @@ models = ["human"]
|
|
4 |
metadata = "wmt20-de-en.tsv"
|
5 |
data_column = "text"
|
6 |
label_column = "label"
|
7 |
-
batch_size =
|
8 |
port = 7860
|
9 |
host = "0.0.0.0"
|
|
|
4 |
metadata = "wmt20-de-en.tsv"
|
5 |
data_column = "text"
|
6 |
label_column = "label"
|
7 |
+
batch_size = 2000
|
8 |
port = 7860
|
9 |
host = "0.0.0.0"
|
model.py
CHANGED
@@ -1,18 +1,18 @@
|
|
1 |
from zeno import distill, model, metric, ZenoOptions
|
2 |
from inspiredco.critique import Critique
|
3 |
import os
|
|
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
# sentence_embed = SentenceTransformer("paraphrase-multilingual-mpnet-base-v2")
|
8 |
-
# client = Critique(api_key=os.environ["INSPIREDCO_API_KEY"])
|
9 |
|
10 |
|
11 |
@model
|
12 |
def pred_fns(name):
|
|
|
|
|
13 |
def pred(df, ops):
|
14 |
-
|
15 |
-
|
16 |
|
17 |
return pred
|
18 |
|
@@ -24,13 +24,11 @@ def bert_score(df, ops):
|
|
24 |
d["references"] = [d.pop("label")]
|
25 |
d["target"] = d.pop(ops.output_column)
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
# metric="bert_score", config={"model": "bert-base-uncased"}, dataset=eval_dict
|
31 |
-
# )
|
32 |
|
33 |
-
|
34 |
|
35 |
|
36 |
@metric
|
|
|
1 |
from zeno import distill, model, metric, ZenoOptions
|
2 |
from inspiredco.critique import Critique
|
3 |
import os
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
|
6 |
+
client = Critique(api_key=os.environ["INSPIREDCO_API_KEY"])
|
|
|
|
|
|
|
7 |
|
8 |
|
9 |
@model
|
10 |
def pred_fns(name):
|
11 |
+
sentence_embed = SentenceTransformer("paraphrase-multilingual-mpnet-base-v2")
|
12 |
+
|
13 |
def pred(df, ops):
|
14 |
+
embed = sentence_embed.encode(df[ops.data_column].tolist()).tolist()
|
15 |
+
return df["translation"], embed
|
16 |
|
17 |
return pred
|
18 |
|
|
|
24 |
d["references"] = [d.pop("label")]
|
25 |
d["target"] = d.pop(ops.output_column)
|
26 |
|
27 |
+
result = client.evaluate(
|
28 |
+
metric="bert_score", config={"model": "bert-base-uncased"}, dataset=eval_dict
|
29 |
+
)
|
|
|
|
|
30 |
|
31 |
+
return [round(r["value"], 6) for r in result["examples"]]
|
32 |
|
33 |
|
34 |
@metric
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
-
zenoml>=0.3.
|
2 |
-
inspiredco
|
|
|
|
1 |
+
zenoml>=0.3.14
|
2 |
+
inspiredco
|
3 |
+
sentence_transformers
|
wmt20-de-en.tsv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|