File size: 1,138 Bytes
5f6dd19
 
 
 
 
 
 
d217cb5
5f6dd19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from typing import Dict, List, Any


class EndpointHandler:
    # def __init__(self, path="decapoda-research/llama-65b-hf"):
    def __init__(self, path="TangrisJones/vicuna-13b-GPTQ-4bit-128g"):
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        self.model = AutoModelForCausalLM.from_pretrained(path)
        
    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        input_text = data["inputs"]
        kwargs = data.get("kwargs", {})

        # Tokenize input text
        input_tokens = self.tokenizer.encode(input_text, return_tensors="pt")

        # Generate output tokens
        with torch.no_grad():
            output_tokens = self.model.generate(input_tokens, **kwargs)

        # Decode output tokens
        output_text = self.tokenizer.decode(output_tokens[0])

        return [{"output": output_text}]


# Example usage
if __name__ == "__main__":
    handler = EndpointHandler()
    input_data = {"inputs": "Once upon a time in a small village, "}
    output_data = handler(input_data)
    print(output_data)