Spaces:

morgankavanagh
/

post_editing_evaluator

Sleeping

morgankavanagh commited on May 9

Commit

3c397fd

1 Parent(s): ee7f8eb

went back to internal running comet

Files changed (4) hide show

Dockerfile CHANGED Viewed

@@ -4,12 +4,16 @@ FROM python:3.10-slim
 # Set working directory
 WORKDIR /app
 # Copy the project files
 COPY . .
 # Install system dependencies (optional – useful for some Python packages)
 RUN apt-get update && apt-get install -y \
     git \
  && rm -rf /var/lib/apt/lists/*
 # Install Python dependencies
@@ -19,8 +23,10 @@ RUN pip install -r requirements.txt
 # Expose port for Gradio
 EXPOSE 7860
-# Set environment variable for Hugging Face secret (set on HF settings)
 ENV OPENAI_API_KEY=${OPENAI_API_KEY}
 # Run the Gradio interface
-CMD ["python", "interface.py"]

 # Set working directory
 WORKDIR /app
+# Create and set permissions for COMET cache directory
+RUN mkdir -p /tmp && chmod 777 /tmp
 # Copy the project files
 COPY . .
 # Install system dependencies (optional – useful for some Python packages)
 RUN apt-get update && apt-get install -y \
     git \
+    build-essential \
  && rm -rf /var/lib/apt/lists/*
 # Install Python dependencies
 # Expose port for Gradio
 EXPOSE 7860
+# Set environment variables
 ENV OPENAI_API_KEY=${OPENAI_API_KEY}
+ENV HF_API_TOKEN=${HF_API_TOKEN}
+ENV COMET_CACHE="/tmp"
 # Run the Gradio interface
+CMD ["python", "interface.py"]

evaluator/comet.py CHANGED Viewed

@@ -1,34 +1,37 @@
 import os
-import requests
-# Set the Hugging Face Inference API URL and token
-HF_API_URL = "https://api-inference.huggingface.co/models/Unbabel/wmt20-comet-da"
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Ensure this is set in your environment
 def calculate_comet(source_sentences, translations, references):
     """
-    Calculate COMET scores using the Hugging Face Inference API.
     :param source_sentences: List of source sentences.
     :param translations: List of translated sentences (hypotheses).
     :param references: List of reference translations.
     :return: List of COMET scores (one score per sentence pair).
     """
-    headers = {
-        "Authorization": f"Bearer {HF_API_TOKEN}",
-        "Content-Type": "application/json"
-    }
-    # Prepare data for the API
-    data = [
-        {"source": src, "translation": mt, "reference": ref}
-        for src, mt, ref in zip(source_sentences, translations, references)
-    ]
-    # Make the API call
-    response = requests.post(HF_API_URL, headers=headers, json={"inputs": data})
-    response.raise_for_status()  # Raise an error for bad responses
-    # Parse the response
-    results = response.json()
-    scores = [item["score"] for item in results]  # Extract scores from the response
-    return scores

 import os
+import torch
+from comet import download_model, load_from_checkpoint
+# Set a custom cache directory for COMET
+os.environ["COMET_CACHE"] = "/tmp"
 def calculate_comet(source_sentences, translations, references):
     """
+    Calculate COMET scores for a list of translations.
     :param source_sentences: List of source sentences.
     :param translations: List of translated sentences (hypotheses).
     :param references: List of reference translations.
     :return: List of COMET scores (one score per sentence pair).
     """
+    try:
+        # Download and load the COMET model
+        model_path = download_model("Unbabel/wmt22-comet-da")
+        model = load_from_checkpoint(model_path)
+        # Force CPU usage if GPU is not available
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        model.to(device)
+        # Prepare data for COMET
+        data = [
+            {"src": src, "mt": mt, "ref": ref}
+            for src, mt, ref in zip(source_sentences, translations, references)
+        ]
+        # Compute COMET scores
+        results = model.predict(data, batch_size=8, gpus=0)
+        scores = results["scores"]
+        return scores
+    except Exception as e:
+        print(f"COMET calculation error: {str(e)}")
+        return [0.0] * len(source_sentences)  # Return default scores on error

evaluator/{comet_internal.py → comet_hf.py} RENAMED Viewed

@@ -1,28 +1,34 @@
 import os
-from comet import download_model, load_from_checkpoint
-# Set a custom cache directory for COMET
-os.environ["COMET_CACHE"] = "/tmp"
 def calculate_comet(source_sentences, translations, references):
     """
-    Calculate COMET scores for a list of translations.
     :param source_sentences: List of source sentences.
     :param translations: List of translated sentences (hypotheses).
     :param references: List of reference translations.
     :return: List of COMET scores (one score per sentence pair).
     """
-    # Download and load the COMET model
-    model_path = download_model("Unbabel/wmt22-comet-da")  # Use a supported model
-    model = load_from_checkpoint(model_path)
-    # Prepare data for COMET
     data = [
-        {"src": src, "mt": mt, "ref": ref}
         for src, mt, ref in zip(source_sentences, translations, references)
     ]
-    # Compute COMET scores
-    results = model.predict(data, batch_size=8, gpus=0)
-    scores = results["scores"]  # Extract the scores from the results
     return scores

 import os
+import requests
+# Set the Hugging Face Inference API URL and token
+HF_API_URL = "https://api-inference.huggingface.co/models/Unbabel/wmt20-comet-da"
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Ensure this is set in your environment
 def calculate_comet(source_sentences, translations, references):
     """
+    Calculate COMET scores using the Hugging Face Inference API.
     :param source_sentences: List of source sentences.
     :param translations: List of translated sentences (hypotheses).
     :param references: List of reference translations.
     :return: List of COMET scores (one score per sentence pair).
     """
+    headers = {
+        "Authorization": f"Bearer {HF_API_TOKEN}",
+        "Content-Type": "application/json"
+    }
+    # Prepare data for the API
     data = [
+        {"source": src, "translation": mt, "reference": ref}
         for src, mt, ref in zip(source_sentences, translations, references)
     ]
+    # Make the API call
+    response = requests.post(HF_API_URL, headers=headers, json={"inputs": data})
+    response.raise_for_status()  # Raise an error for bad responses
+    # Parse the response
+    results = response.json()
+    scores = [item["score"] for item in results]  # Extract scores from the response
     return scores

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-torch
 transformers
 numpy
 pytest

+torch>=2.0.0
 transformers
 numpy
 pytest