Spaces:

panduwana
/

interview-ai-detector

Sleeping

App Files Files Community

bearking58 commited on Jul 3, 2024

Commit

fb8f2dc

1 Parent(s): d1d0c5f

chore: try to move hf model loading to dockerfile

Browse files

Files changed (5) hide show

cloudbuild-model.yaml → cloudbuild.yaml +8 -0
core-model-prediction/Dockerfile +13 -0
core-model-prediction/gemma2b_dependencies.py +4 -13
public-prediction/kafka_consumer.py +2 -2
public-prediction/predict_custom_model.py +1 -5

cloudbuild-model.yaml → cloudbuild.yaml RENAMED Viewed

@@ -4,10 +4,13 @@ steps:
     args:
       [
         "build",
         "-t",
         "us-central1-docker.pkg.dev/${PROJECT_ID}/interview-ai-detector/model-prediction:latest",
         ".",
       ]
   - name: "gcr.io/cloud-builders/docker"
     args:
@@ -18,3 +21,8 @@ steps:
 images:
   - "us-central1-docker.pkg.dev/${PROJECT_ID}/interview-ai-detector/model-prediction:latest"

     args:
       [
         "build",
+        "--build-arg",
+        "HF_TOKEN=${_HF_TOKEN}",
         "-t",
         "us-central1-docker.pkg.dev/${PROJECT_ID}/interview-ai-detector/model-prediction:latest",
         ".",
       ]
+    secretEnv: ["HF_TOKEN"]
   - name: "gcr.io/cloud-builders/docker"
     args:
 images:
   - "us-central1-docker.pkg.dev/${PROJECT_ID}/interview-ai-detector/model-prediction:latest"
+availableSecrets:
+  secretManager:
+    - versionName: "projects/${PROJECT_ID}/secrets/HF_TOKEN/versions/1"
+      env: "HF_TOKEN"

core-model-prediction/Dockerfile CHANGED Viewed

@@ -1,3 +1,6 @@
 # Use an official Python runtime as a base image
 FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime
@@ -17,6 +20,16 @@ RUN python -m nltk.downloader punkt wordnet averaged_perceptron_tagger
 # Unzip wordnet
 RUN unzip /root/nltk_data/corpora/wordnet.zip -d /root/nltk_data/corpora/
 # Make port 8080 available to the world outside this container
 EXPOSE 8080

+# HF Token args
+ARG HF_TOKEN
 # Use an official Python runtime as a base image
 FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime
 # Unzip wordnet
 RUN unzip /root/nltk_data/corpora/wordnet.zip -d /root/nltk_data/corpora/
+# Download HuggingFace model
+RUN python -c "from transformers import AutoTokenizer, AutoModelForCausalLM; \
+    tokenizer = AutoTokenizer.from_pretrained('google/gemma-2b', token='$HF_TOKEN'); \
+    model = AutoModelForCausalLM.from_pretrained('google/gemma-2b', token='$HF_TOKEN'); \
+    tokenizer.save_pretrained('/app/gemma-2b'); \
+    model.save_pretrained('/app/gemma-2b')"
+# Model env
+ENV MODEL_DIR=/app/gemma-2b
 # Make port 8080 available to the world outside this container
 EXPOSE 8080

core-model-prediction/gemma2b_dependencies.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 from torch.nn.functional import cosine_similarity
 from collections import Counter
 import numpy as np
 from device_manager import DeviceManager
-from google.cloud import secretmanager
 class Gemma2BDependencies:
@@ -13,21 +13,13 @@ class Gemma2BDependencies:
     def __new__(cls):
         if cls._instance is None:
             cls._instance = super(Gemma2BDependencies, cls).__new__(cls)
-            token = cls._instance.access_hf_token_secret()
-            cls._instance.tokenizer = AutoTokenizer.from_pretrained(
-                "google/gemma-2b", token=token)
-            cls._instance.model = AutoModelForCausalLM.from_pretrained(
-                "google/gemma-2b", token=token)
             cls._instance.device = DeviceManager()
             cls._instance.model.to(cls._instance.device)
         return cls._instance
-    def access_hf_token_secret(self):
-        client = secretmanager.SecretManagerServiceClient()
-        name = "projects/steady-climate-416810/secrets/HF_TOKEN/versions/1"
-        response = client.access_secret_version(request={"name": name})
-        return response.payload.data.decode('UTF-8')
     def calculate_perplexity(self, text: str):
         inputs = self.tokenizer(text, return_tensors="pt",
                                 truncation=True, max_length=1024)
@@ -42,7 +34,6 @@ class Gemma2BDependencies:
         return perplexity.item()
     def calculate_burstiness(self, text: str):
-        # Tokenize the text using GPT-2 tokenizer
         tokens = self.tokenizer.encode(text, add_special_tokens=False)
         # Count token frequencies

+import os
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 from torch.nn.functional import cosine_similarity
 from collections import Counter
 import numpy as np
 from device_manager import DeviceManager
 class Gemma2BDependencies:
     def __new__(cls):
         if cls._instance is None:
             cls._instance = super(Gemma2BDependencies, cls).__new__(cls)
+            model_dir = os.getenv("MODEL_DIR", "/app/gemma-2b")
+            cls._instance.tokenizer = AutoTokenizer.from_pretrained(model_dir)
+            cls._instance.model = AutoModelForCausalLM.from_pretrained(model_dir)
             cls._instance.device = DeviceManager()
             cls._instance.model.to(cls._instance.device)
         return cls._instance
     def calculate_perplexity(self, text: str):
         inputs = self.tokenizer(text, return_tensors="pt",
                                 truncation=True, max_length=1024)
         return perplexity.item()
     def calculate_burstiness(self, text: str):
         tokens = self.tokenizer.encode(text, add_special_tokens=False)
         # Count token frequencies

public-prediction/kafka_consumer.py CHANGED Viewed

@@ -52,7 +52,6 @@ def send_results_back(full_results: dict[str, any], job_application_id: str):
     response = requests.patch(url, json=body, headers=headers)
     print(f"Data sent with status code {response.status_code}")
-    print(response.content)
 def consume_messages():
@@ -72,7 +71,7 @@ def consume_messages():
     for message in consumer:
         try:
-            incoming_message = json.loads(message.value.decode("utf-8"))
             full_batch = incoming_message["data"]
         except json.JSONDecodeError:
             print("Failed to decode JSON from message:", message.value)
@@ -84,6 +83,7 @@ def consume_messages():
         full_results = []
         for i in range(0, len(full_batch), BATCH_SIZE):
             batch = full_batch[i:i+BATCH_SIZE]
             batch_results = process_batch(batch, BATCH_SIZE, gpt_helper)
             full_results.extend(batch_results)

     response = requests.patch(url, json=body, headers=headers)
     print(f"Data sent with status code {response.status_code}")
 def consume_messages():
     for message in consumer:
         try:
+            incoming_message = json.loads(json.loads(message.value.decode("utf-8")))
             full_batch = incoming_message["data"]
         except json.JSONDecodeError:
             print("Failed to decode JSON from message:", message.value)
         full_results = []
         for i in range(0, len(full_batch), BATCH_SIZE):
+            print(f"Processing batch {i} to {i+BATCH_SIZE}")
             batch = full_batch[i:i+BATCH_SIZE]
             batch_results = process_batch(batch, BATCH_SIZE, gpt_helper)
             full_results.extend(batch_results)

public-prediction/predict_custom_model.py CHANGED Viewed

@@ -20,13 +20,9 @@ def predict_custom_trained_model(
     # The AI Platform services require regional API endpoints.
     client_options = {"api_endpoint": api_endpoint}
-    credentials = service_account.Credentials.from_service_account_file(
-        os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
     # Initialize client that will be used to create and send requests.
     # This client only needs to be created once, and can be reused for multiple requests.
-    client = aiplatform.gapic.PredictionServiceClient(
-        credentials=credentials,
-        client_options=client_options)
     # The format of each instance should conform to the deployed model's prediction input schema.
     instances = instances if isinstance(instances, list) else [instances]
     instances = [

     # The AI Platform services require regional API endpoints.
     client_options = {"api_endpoint": api_endpoint}
     # Initialize client that will be used to create and send requests.
     # This client only needs to be created once, and can be reused for multiple requests.
+    client = aiplatform.gapic.PredictionServiceClient(client_options=client_options)
     # The format of each instance should conform to the deployed model's prediction input schema.
     instances = instances if isinstance(instances, list) else [instances]
     instances = [