Spaces:

xu1998hz
/

sescore

Build error

App Files Files Community

xu1998hz commited on Nov 4, 2022

Commit

bfa7fa8

•

1 Parent(s): f93a3d8

add all modules

Browse files

Files changed (4) hide show

README.md +2 -2
__init__.py +38 -0
requirements.txt +2 -1
sescore.py +20 -12

README.md CHANGED Viewed

@@ -5,9 +5,9 @@ datasets:
 tags:
 - evaluate
 - metric
-description: "TODO: add a description here"
 sdk: gradio
-sdk_version: 3.0.2
 app_file: app.py
 pinned: false
 ---

 tags:
 - evaluate
 - metric
+description: "SEScore: a text generation evaluation metric"
 sdk: gradio
+sdk_version: 0.0.1
 app_file: app.py
 pinned: false
 ---

__init__.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import comet
+from typing import Dict
+import torch
+from comet.encoders.base import Encoder
+from comet.encoders.bert import BERTEncoder
+from transformers import AutoModel, AutoTokenizer
+class robertaEncoder(BERTEncoder):
+    def __init__(self, pretrained_model: str) -> None:
+        super(Encoder, self).__init__()
+        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model)
+        self.model = AutoModel.from_pretrained(
+            pretrained_model, add_pooling_layer=False
+        )
+        self.model.encoder.output_hidden_states = True
+    @classmethod
+    def from_pretrained(cls, pretrained_model: str) -> Encoder:
+        return robertaEncoder(pretrained_model)
+    def forward(
+        self, input_ids: torch.Tensor, attention_mask: torch.Tensor, **kwargs
+    ) -> Dict[str, torch.Tensor]:
+        last_hidden_states, _, all_layers = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            output_hidden_states=True,
+            return_dict=False,
+        )
+        return {
+            "sentemb": last_hidden_states[:, 0, :],
+            "wordemb": last_hidden_states,
+            "all_layers": all_layers,
+            "attention_mask": attention_mask,
+        }
+# initialize roberta into str2encoder
+comet.encoders.str2encoder['RoBERTa'] = robertaEncoder

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- git+https://github.com/huggingface/evaluate@main


1	+ git+https://github.com/huggingface/evaluate@main
2	+ gdown

sescore.py CHANGED Viewed

@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""TODO: Add a description here."""
 import evaluate
 import datasets
@@ -28,7 +28,7 @@ year={2020}
 # TODO: Add description of the module here
 _DESCRIPTION = """\
-This new module is designed to solve this great ML task and is crafted with a lot of care.
 """
@@ -82,14 +82,22 @@ class SEScore(evaluate.Metric):
         )
     def _download_and_prepare(self, dl_manager):
-        """Optional: download external resources useful to compute the scores"""
-        # TODO: Download external resources if needed
-        pass
-    def _compute(self, predictions, references):
-        """Returns the scores"""
-        # TODO: Compute the different scores of the module
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
-        return {
-            "accuracy": accuracy,
-        }

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""SEScore: a text generation evaluation metric"""
 import evaluate
 import datasets
 # TODO: Add description of the module here
 _DESCRIPTION = """\
+SEScore is an evaluation metric that trys to compute an overall score to measure text generation quality.
 """
         )
     def _download_and_prepare(self, dl_manager):
+        """download SEScore checkpoints to compute the scores"""
+        # Download SEScore checkpoint
+        from comet import load_from_checkpoint
+        import gdown
+        import os
+        url = "https://drive.google.com/uc?id=1QgMP_Y4QCbvDMTeVacYt0J76OYvwWK9V&export=download&confirm=true"
+        output = 'sescore_download.gz'
+        gdown.download(url, output, quiet=False)
+        cmd = 'tar -xvf sescore_download.gz'
+        os.system(cmd)
+        self.scorer = load_from_checkpoint('sescore_download/zh_en/checkpoint/sescore_english.ckpt')
+    def _compute(self, sources, predictions, references, gpus=None, progress_bar=False):
+        if gpus is None:
+            gpus = 1 if torch.cuda.is_available() else 0
+        data = {"src": references, "mt": predictions}
+        data = [dict(zip(data, t)) for t in zip(*data.values())]
+        scores, mean_score = self.scorer.predict(data, gpus=gpus, progress_bar=progress_bar)
+        return {"mean_score": mean_score, "scores": scores}