model not generating text

#97
by airedwin - opened
def load(self):
        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        model_8bit = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            device_map="auto",
            load_in_8bit=True,
            trust_remote_code=True)

        self.pipeline = pipeline(
            "text-generation",
            model=model_8bit,
            tokenizer=self.tokenizer,
            torch_dtype=torch.bfloat16,
            trust_remote_code=True,
            device_map="auto",
        )

def predict(self, request: Dict) -> Dict:
        with torch.no_grad():
            try:
                prompt = request.pop("prompt")
                data = self.pipeline(
                    prompt,
                    eos_token_id=self.tokenizer.eos_token_id,
                    max_length=DEFAULT_MAX_LENGTH,
                    **request
                )[0]
                return {"data": data}

I am using this code to get an answer after a prompt. It works locally when I run this model in a docker container on a NVIDIA RTX GPU. When I deploy my container to an AWS instance p3.2xlarge which uses NVIDIA TESLA V100 GPU, the generated_text it returns is empty. I am also using a local copy of the falcon-7b-instruct snapshot with TRANSFORMERS_OFFLINE=1.

Any ideas why this works local but not on an EC2 instance?

Sign up or log in to comment