hdnh2006
commited on
Commit
•
4f267c9
1
Parent(s):
eb033fd
1st version handler
Browse files- handler.py +92 -0
handler.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
Custom handler for Llama 2 text-generation model.
|
6 |
+
|
7 |
+
Author: Henry
|
8 |
+
Created on: Mon Nov 20, 2023
|
9 |
+
|
10 |
+
This module defines a custom handler for the Llama 2 text-generation model,
|
11 |
+
utilizing Hugging Face's transformers pipeline. It's designed to process requests
|
12 |
+
for text generation, leveraging the capabilities of the Llama 2 model.
|
13 |
+
"""
|
14 |
+
|
15 |
+
import torch
|
16 |
+
from transformers import pipeline, BitsAndBytesConfig
|
17 |
+
from typing import Dict, List, Any
|
18 |
+
import logging
|
19 |
+
import sys
|
20 |
+
|
21 |
+
logging.basicConfig(
|
22 |
+
level=logging.INFO,
|
23 |
+
format='%(levelname)s - %(asctime)s - %(message)s',
|
24 |
+
handlers=[
|
25 |
+
logging.StreamHandler(sys.stdout)
|
26 |
+
]
|
27 |
+
)
|
28 |
+
|
29 |
+
|
30 |
+
class EndpointHandler:
|
31 |
+
"""
|
32 |
+
Handler class for Llama 2 text-generation model inference.
|
33 |
+
|
34 |
+
This class initializes the model pipeline and processes incoming requests
|
35 |
+
for text generation using the Llama 2 model.
|
36 |
+
"""
|
37 |
+
|
38 |
+
def __init__(self, path: str = ""):
|
39 |
+
"""
|
40 |
+
Initialize the pipeline for the Llama 2 text-generation model.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
path (str): Path to the model, defaults to an empty string.
|
44 |
+
"""
|
45 |
+
|
46 |
+
# Set the global default compute type to float16
|
47 |
+
self.bnb_config = BitsAndBytesConfig(
|
48 |
+
load_in_4bit=True,
|
49 |
+
bnb_4bit_use_double_quant=True,
|
50 |
+
bnb_4bit_quant_type="nf4",
|
51 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
52 |
+
)
|
53 |
+
|
54 |
+
self.pipeline = pipeline('text-generation', model=path, quantization_config=self.bnb_config)
|
55 |
+
|
56 |
+
|
57 |
+
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
58 |
+
"""
|
59 |
+
Process a request for text generation.
|
60 |
+
|
61 |
+
Args:
|
62 |
+
data (Dict[str, Any]): A dictionary containing inputs for text generation.
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
List[Dict[str, Any]]: The generated text as a list of dictionaries.
|
66 |
+
"""
|
67 |
+
|
68 |
+
# Log the received data
|
69 |
+
logging.info(f"Received data: {data}")
|
70 |
+
|
71 |
+
# Get dictionary
|
72 |
+
inputs = data.pop("inputs", data)
|
73 |
+
parameters = data.pop("parameters", None)
|
74 |
+
|
75 |
+
# Validate the input data
|
76 |
+
if not inputs:
|
77 |
+
raise ValueError(f'inputs are required and \'{inputs}\' is gotten.')
|
78 |
+
|
79 |
+
# Log the extracted image and question for debugging
|
80 |
+
logging.info(f"Extracted inputs: {inputs}")
|
81 |
+
logging.info(f"Extracted parameters: {parameters}")
|
82 |
+
|
83 |
+
# Perform the question answering using the model
|
84 |
+
output = self.pipeline(inputs, **parameters)
|
85 |
+
|
86 |
+
# pass inputs with all kwargs in data
|
87 |
+
# if parameters is not None:
|
88 |
+
# prediction = self.pipeline(inputs, **parameters)
|
89 |
+
# else:
|
90 |
+
# prediction = self.pipeline(inputs)
|
91 |
+
|
92 |
+
return output
|