morgankavanagh commited on
Commit
3c397fd
Β·
1 Parent(s): ee7f8eb

went back to internal running comet

Browse files
Dockerfile CHANGED
@@ -4,12 +4,16 @@ FROM python:3.10-slim
4
  # Set working directory
5
  WORKDIR /app
6
 
 
 
 
7
  # Copy the project files
8
  COPY . .
9
 
10
  # Install system dependencies (optional – useful for some Python packages)
11
  RUN apt-get update && apt-get install -y \
12
  git \
 
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
  # Install Python dependencies
@@ -19,8 +23,10 @@ RUN pip install -r requirements.txt
19
  # Expose port for Gradio
20
  EXPOSE 7860
21
 
22
- # Set environment variable for Hugging Face secret (set on HF settings)
23
  ENV OPENAI_API_KEY=${OPENAI_API_KEY}
 
 
24
 
25
  # Run the Gradio interface
26
- CMD ["python", "interface.py"]
 
4
  # Set working directory
5
  WORKDIR /app
6
 
7
+ # Create and set permissions for COMET cache directory
8
+ RUN mkdir -p /tmp && chmod 777 /tmp
9
+
10
  # Copy the project files
11
  COPY . .
12
 
13
  # Install system dependencies (optional – useful for some Python packages)
14
  RUN apt-get update && apt-get install -y \
15
  git \
16
+ build-essential \
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
  # Install Python dependencies
 
23
  # Expose port for Gradio
24
  EXPOSE 7860
25
 
26
+ # Set environment variables
27
  ENV OPENAI_API_KEY=${OPENAI_API_KEY}
28
+ ENV HF_API_TOKEN=${HF_API_TOKEN}
29
+ ENV COMET_CACHE="/tmp"
30
 
31
  # Run the Gradio interface
32
+ CMD ["python", "interface.py"]
evaluator/comet.py CHANGED
@@ -1,34 +1,37 @@
1
  import os
2
- import requests
 
3
 
4
- # Set the Hugging Face Inference API URL and token
5
- HF_API_URL = "https://api-inference.huggingface.co/models/Unbabel/wmt20-comet-da"
6
- HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Ensure this is set in your environment
7
 
8
  def calculate_comet(source_sentences, translations, references):
9
  """
10
- Calculate COMET scores using the Hugging Face Inference API.
11
  :param source_sentences: List of source sentences.
12
  :param translations: List of translated sentences (hypotheses).
13
  :param references: List of reference translations.
14
  :return: List of COMET scores (one score per sentence pair).
15
  """
16
- headers = {
17
- "Authorization": f"Bearer {HF_API_TOKEN}",
18
- "Content-Type": "application/json"
19
- }
20
 
21
- # Prepare data for the API
22
- data = [
23
- {"source": src, "translation": mt, "reference": ref}
24
- for src, mt, ref in zip(source_sentences, translations, references)
25
- ]
26
 
27
- # Make the API call
28
- response = requests.post(HF_API_URL, headers=headers, json={"inputs": data})
29
- response.raise_for_status() # Raise an error for bad responses
 
 
30
 
31
- # Parse the response
32
- results = response.json()
33
- scores = [item["score"] for item in results] # Extract scores from the response
34
- return scores
 
 
 
 
1
  import os
2
+ import torch
3
+ from comet import download_model, load_from_checkpoint
4
 
5
+ # Set a custom cache directory for COMET
6
+ os.environ["COMET_CACHE"] = "/tmp"
 
7
 
8
  def calculate_comet(source_sentences, translations, references):
9
  """
10
+ Calculate COMET scores for a list of translations.
11
  :param source_sentences: List of source sentences.
12
  :param translations: List of translated sentences (hypotheses).
13
  :param references: List of reference translations.
14
  :return: List of COMET scores (one score per sentence pair).
15
  """
16
+ try:
17
+ # Download and load the COMET model
18
+ model_path = download_model("Unbabel/wmt22-comet-da")
19
+ model = load_from_checkpoint(model_path)
20
 
21
+ # Force CPU usage if GPU is not available
22
+ device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ model.to(device)
 
 
24
 
25
+ # Prepare data for COMET
26
+ data = [
27
+ {"src": src, "mt": mt, "ref": ref}
28
+ for src, mt, ref in zip(source_sentences, translations, references)
29
+ ]
30
 
31
+ # Compute COMET scores
32
+ results = model.predict(data, batch_size=8, gpus=0)
33
+ scores = results["scores"]
34
+ return scores
35
+ except Exception as e:
36
+ print(f"COMET calculation error: {str(e)}")
37
+ return [0.0] * len(source_sentences) # Return default scores on error
evaluator/{comet_internal.py β†’ comet_hf.py} RENAMED
@@ -1,28 +1,34 @@
1
  import os
2
- from comet import download_model, load_from_checkpoint
3
 
4
- # Set a custom cache directory for COMET
5
- os.environ["COMET_CACHE"] = "/tmp"
 
6
 
7
  def calculate_comet(source_sentences, translations, references):
8
  """
9
- Calculate COMET scores for a list of translations.
10
  :param source_sentences: List of source sentences.
11
  :param translations: List of translated sentences (hypotheses).
12
  :param references: List of reference translations.
13
  :return: List of COMET scores (one score per sentence pair).
14
  """
15
- # Download and load the COMET model
16
- model_path = download_model("Unbabel/wmt22-comet-da") # Use a supported model
17
- model = load_from_checkpoint(model_path)
 
18
 
19
- # Prepare data for COMET
20
  data = [
21
- {"src": src, "mt": mt, "ref": ref}
22
  for src, mt, ref in zip(source_sentences, translations, references)
23
  ]
24
 
25
- # Compute COMET scores
26
- results = model.predict(data, batch_size=8, gpus=0)
27
- scores = results["scores"] # Extract the scores from the results
 
 
 
 
28
  return scores
 
1
  import os
2
+ import requests
3
 
4
+ # Set the Hugging Face Inference API URL and token
5
+ HF_API_URL = "https://api-inference.huggingface.co/models/Unbabel/wmt20-comet-da"
6
+ HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Ensure this is set in your environment
7
 
8
  def calculate_comet(source_sentences, translations, references):
9
  """
10
+ Calculate COMET scores using the Hugging Face Inference API.
11
  :param source_sentences: List of source sentences.
12
  :param translations: List of translated sentences (hypotheses).
13
  :param references: List of reference translations.
14
  :return: List of COMET scores (one score per sentence pair).
15
  """
16
+ headers = {
17
+ "Authorization": f"Bearer {HF_API_TOKEN}",
18
+ "Content-Type": "application/json"
19
+ }
20
 
21
+ # Prepare data for the API
22
  data = [
23
+ {"source": src, "translation": mt, "reference": ref}
24
  for src, mt, ref in zip(source_sentences, translations, references)
25
  ]
26
 
27
+ # Make the API call
28
+ response = requests.post(HF_API_URL, headers=headers, json={"inputs": data})
29
+ response.raise_for_status() # Raise an error for bad responses
30
+
31
+ # Parse the response
32
+ results = response.json()
33
+ scores = [item["score"] for item in results] # Extract scores from the response
34
  return scores
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- torch
2
  transformers
3
  numpy
4
  pytest
 
1
+ torch>=2.0.0
2
  transformers
3
  numpy
4
  pytest