File size: 2,554 Bytes
caa9e65
 
 
 
 
df7ea15
caa9e65
 
 
 
 
 
 
 
 
 
 
3541e3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
caa9e65
 
3541e3b
caa9e65
3541e3b
caa9e65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from typing import Dict, List, Any
from llama_cpp import Llama

class EndpointHandler():
    def __init__(self, path="", vision_model="obsidian3b"):
        self.model = Llama.from_pretrained("MrOvkill/gemma-2-inference-endpoint-GGUF", filename="gemma-2b.q8_0.gguf")

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        data args:
            inputs (:obj: `str`)
            image (:obj: `Image`)
        Return:
            A :obj:`list` | `dict`: will be serialized and returned
        """
        # get inputs
        inputs = data.pop("inputs", "")
        temperature = data.pop("temperature", None)
        if not temperature:
            temperature = data.pop("temp", 0.33)
        if temperature > 3 or temperature < 0:
            return json.dumps({
                "status": "error",
                "reason": "invalid temperature ( 0.01 - 1.00 )"
            })
        top_p = data.pop("top-p", 0.85)
        if top_p > 3 or top_p < 0:
            return json.dumps({
                "status": "error",
                "reason": "invalid top percentage ( 0.01 - 1.00 )"
            })
        top_k = data.pop("top-k", 42)
        if top_k > 100 or top_k < 0:
            return json.dumps({
                "status": "error",
                "reason": "invalid top k ( 1 - 99 )"
            })
        #image = data.pop("image", None)

        res = self.model(inputs, temperature=temperature, top_p=top_p, top_k=42)

        return res
        
        #inputs = self.processor(inputs, image, return_tensors="pt")
        #res = self.model.generate(**inputs, do_sample=False, max_new_tokens=4096)
        #return self.processor.decode(res[0], skip_special_tokens=True)

        #if image:
            # perform image classification using Obsidian 3b vision
            #image_features = self.vision.encode_image(image)
            #image_embedding = self.vision.extract_feature(image_features)
            #image_caption = self.vision.generate_caption(image_embedding)

            # combine text and image captions
            #combined_captions = [inputs, image_caption]

            # run text classification on combined captions
            #prediction = self.pipeline(combined_captions, temperature=0.33, num_beams=5, stop=[], do_sample=True)

            #return prediction
        

        #else:
            # run text classification on plain text input
        #    prediction = self.pipeline(inputs, temperature=0.33, num_beams=5, stop=[], do_sample=True)

        #    return prediction