hdnh2006 commited on
Commit
4f267c9
1 Parent(s): eb033fd

1st version handler

Browse files
Files changed (1) hide show
  1. handler.py +92 -0
handler.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Custom handler for Llama 2 text-generation model.
6
+
7
+ Author: Henry
8
+ Created on: Mon Nov 20, 2023
9
+
10
+ This module defines a custom handler for the Llama 2 text-generation model,
11
+ utilizing Hugging Face's transformers pipeline. It's designed to process requests
12
+ for text generation, leveraging the capabilities of the Llama 2 model.
13
+ """
14
+
15
+ import torch
16
+ from transformers import pipeline, BitsAndBytesConfig
17
+ from typing import Dict, List, Any
18
+ import logging
19
+ import sys
20
+
21
+ logging.basicConfig(
22
+ level=logging.INFO,
23
+ format='%(levelname)s - %(asctime)s - %(message)s',
24
+ handlers=[
25
+ logging.StreamHandler(sys.stdout)
26
+ ]
27
+ )
28
+
29
+
30
+ class EndpointHandler:
31
+ """
32
+ Handler class for Llama 2 text-generation model inference.
33
+
34
+ This class initializes the model pipeline and processes incoming requests
35
+ for text generation using the Llama 2 model.
36
+ """
37
+
38
+ def __init__(self, path: str = ""):
39
+ """
40
+ Initialize the pipeline for the Llama 2 text-generation model.
41
+
42
+ Args:
43
+ path (str): Path to the model, defaults to an empty string.
44
+ """
45
+
46
+ # Set the global default compute type to float16
47
+ self.bnb_config = BitsAndBytesConfig(
48
+ load_in_4bit=True,
49
+ bnb_4bit_use_double_quant=True,
50
+ bnb_4bit_quant_type="nf4",
51
+ bnb_4bit_compute_dtype=torch.bfloat16
52
+ )
53
+
54
+ self.pipeline = pipeline('text-generation', model=path, quantization_config=self.bnb_config)
55
+
56
+
57
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
58
+ """
59
+ Process a request for text generation.
60
+
61
+ Args:
62
+ data (Dict[str, Any]): A dictionary containing inputs for text generation.
63
+
64
+ Returns:
65
+ List[Dict[str, Any]]: The generated text as a list of dictionaries.
66
+ """
67
+
68
+ # Log the received data
69
+ logging.info(f"Received data: {data}")
70
+
71
+ # Get dictionary
72
+ inputs = data.pop("inputs", data)
73
+ parameters = data.pop("parameters", None)
74
+
75
+ # Validate the input data
76
+ if not inputs:
77
+ raise ValueError(f'inputs are required and \'{inputs}\' is gotten.')
78
+
79
+ # Log the extracted image and question for debugging
80
+ logging.info(f"Extracted inputs: {inputs}")
81
+ logging.info(f"Extracted parameters: {parameters}")
82
+
83
+ # Perform the question answering using the model
84
+ output = self.pipeline(inputs, **parameters)
85
+
86
+ # pass inputs with all kwargs in data
87
+ # if parameters is not None:
88
+ # prediction = self.pipeline(inputs, **parameters)
89
+ # else:
90
+ # prediction = self.pipeline(inputs)
91
+
92
+ return output