File size: 3,464 Bytes
6df828c
 
 
 
d018247
6df828c
 
 
 
d018247
 
 
6df828c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import json
from PIL import Image

import requests
import streamlit as st
from transformers import CLIPProcessor, CLIPModel

from embeddings import logger

HF_TOKEN = st.secrets["hf_api.key"]
#with open("hf_api.key") as f:
#    HF_TOKEN = f.read().strip()


class HuggingFaceHosted:
    def __init__(self, model_id, api_token, verbose=False):
        self.model_id = model_id
        self.api_token = api_token
        self.verbose = verbose

    def query(self, data):
        headers = {"Authorization": f"Bearer {self.api_token}"}
        API_URL = f"https://api-inference.huggingface.co/models/{self.model_id}"
        response = requests.request("POST", API_URL, headers=headers, data=data)
        return json.loads(response.content.decode("utf-8"))

    def fill_mask(self, text):
        data = json.dumps({"inputs": text})
        return self.query(data)

    def text_generation(self, text, **parameters):
        payload = {
            "inputs": text,
            "parameters": parameters,
        }
        if self.verbose:
            logger.info(payload)
        data = json.dumps(payload)
        return self.query(data)

    def summarization(self, text, do_sample=False):
        data = json.dumps({"inputs": text, "parameters": {"do_sample": do_sample}})
        return self.query(data)

    def question_answering(self, question, context):
        data = json.dumps(
            {
                "inputs": {
                    "question": question,
                    "context": context,
                }
            }
        )
        return self.query(data)


class CLIP:
    def __init__(self, model_id="openai/clip-vit-large-patch14"):
        self.model_id = model_id
        self.model = CLIPModel.from_pretrained(model_id)
        self.processor = CLIPProcessor.from_pretrained(model_id)

    def get_image_emb(self, image):
        if isinstance(image, str):
            image = Image.open(image)
        image_inputs = self.processor(images=image, return_tensors="pt", padding=True)
        out = self.model.get_image_features(**image_inputs)

        return out.detach().numpy()

    def get_text_emb(self, text):
        text_inputs = self.processor(text=text, return_tensors="pt", padding=True)
        out = self.model.get_text_features(**text_inputs)

        return out.detach().numpy()

    def __repr__(self):
        return f"CLIP Local <{self.model_id}>"


class GPTJ(HuggingFaceHosted):
    def __init__(
        self, model_id="EleutherAI/gpt-j-6B", api_token=HF_TOKEN, verbose=False
    ):
        super().__init__(model_id, api_token, verbose=verbose)

    def __call__(self, text, **parameters):
        return self.text_generation(text, **parameters)

    def __repr__(self):
        return f"GPTJ Hosted <{self.model_id}>"


class MaskEncoder(HuggingFaceHosted):
    def __init__(self, model_id="roberta-large", api_token=HF_TOKEN, verbose=False):
        super().__init__(model_id, api_token, verbose=verbose)

    def __call__(self, text):
        return self.fill_mask(text)

    def __repr__(self):
        return f"MaskEncoder Hosted <{self.model_id}>"


class T2T(HuggingFaceHosted):
    def __init__(self, model_id="bigscience/T0pp", api_token=HF_TOKEN, verbose=False):
        super().__init__(model_id, api_token, verbose=verbose)

    def __call__(self, text, **parameters):
        return self.text_generation(text, **parameters)

    def __repr__(self):
        return f"T2T Hosted <{self.model_id}>"