Spaces:

t-bank-ai
/

caif

Runtime error

App Files Files Community

Балаганский Никита Николаевич commited on Jun 6, 2022

Commit

030a0f8

•

1 Parent(s): 8c1b530

add app.py

Browse files

Files changed (4) hide show

app.py +73 -0
generator.py +221 -0
requirements.txt +3 -0
sampling.py +143 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+from typing import Tuple
+import streamlit as st
+import torch
+import transformers
+import tokenizers
+from torch import autocast
+from sampling import CAIFSampler, TopKWithTemperatureSampler
+from generator import Generator
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+def main():
+    st.subheader(
+        'Эта демонстрация позволяет поэксперементировать с моделями, которые оценивают, насколько предлагаемый ответ подходит к контексту диалога.')
+    cls_model_name = st.selectbox(
+        'Выберите модель классификации',
+        ('tinkoff-ai/response-quality-classifier-tiny', 'tinkoff-ai/response-quality-classifier-base',
+         'tinkoff-ai/response-quality-classifier-large')
+    )
+    lm_model_name = st.selectbox(
+        'Выберите языковую модель',
+        ('sberbank-ai/rugpt3small_based_on_gpt2',)
+    )
+    prompt = st.text_input("Как дела в качалке?")
+    auth_token = os.environ.get('TOKEN') or True
+    with st.spinner('Running inference...'):
+        text = inference(lm_model_name=lm_model_name, cls_model_name=cls_model_name)
+    st.text_area(text)
+@st.cache(hash_funcs={tokenizers.Tokenizer: lambda tokenizer: hash(tokenizer.to_str())}, allow_output_mutation=True)
+def load_generator(lm_model_name: str) -> Generator:
+    with st.spinner('Loading language model...'):
+        generator = Generator(lm_model_name=lm_model_name, device=device)
+        return generator
+@st.cache(hash_funcs={tokenizers.Tokenizer: lambda tokenizer: hash(tokenizer.to_str())}, allow_output_mutation=True)
+def load_sampler(cls_model_name, lm_tokenizer):
+    with st.spinner('Loading classifier model...'):
+        sampler = CAIFSampler(classifier_name=cls_model_name, lm_tokenizer=lm_tokenizer)
+        return sampler
+@st.cache
+def inference(lm_model_name: str, cls_model_name: str, prompt: str, fp16: bool = True) -> str:
+    generator = load_generator(lm_model_name=lm_model_name)
+    lm_tokenizer = transformers.AutoTokenizer.from_pretrained(lm_model_name)
+    caif_sampler = load_sampler(cls_model_name=cls_model_name, lm_tokenizer=lm_tokenizer)
+    generator.set_caif_sampler(caif_sampler)
+    ordinary_sampler = TopKWithTemperatureSampler()
+    generator.set_ordinary_sampler(ordinary_sampler)
+    with autocast(fp16):
+        sequences, tokens = generator.sample_sequences(
+            num_samples=1,
+            input_prompt=prompt,
+            max_length=20,
+            caif_period=1,
+            caif_tokens_num=100,
+            entropy=3.2
+        )
+    return sequences[0]
+if __name__ == "__main__":
+    main()

generator.py ADDED Viewed

	@@ -0,0 +1,221 @@

+from typing import Optional, Union
+import torch
+import transformers
+class Generator:
+    def __init__(self, lm_model_name, device, entropy=None):
+        self.device = device
+        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
+            lm_model_name
+        )
+        self.lm = transformers.AutoModelForCausalLM.from_pretrained(
+            lm_model_name
+        ).to(device)
+        self.lm.eval()
+        self.lm.config.pad_token_id = self.lm.config.eos_token_id
+        self.tokenizer.add_special_tokens(
+            {"pad_token": self.tokenizer.decode(self.lm.config.eos_token_id)}
+        )
+        self.caif_sampler = None
+        self.ordinary_sampler = None
+        self.entropy_based_stats = {
+            "skips": 0,
+            "avg_entropy": 0,
+            "count": 0,
+        }
+        self.entropy = entropy
+    def set_caif_sampler(self, sampler):
+        self.caif_sampler = sampler
+    def set_ordinary_sampler(self, sampler):
+        self.ordinary_sampler = sampler
+    def sample_sequences(
+        self,
+        num_samples: int,
+        input_prompt: Optional[str],
+        max_length: int,
+        caif_period: int,
+        caif_tokens_num: Union[int, None] = None,
+        entropy: float = None,
+        **sampler_kwargs
+    ):
+        self.entropy = entropy
+        input_ids, past, ended_sequences = self.get_input_ids(
+            input_prompt,
+            num_samples,
+        )
+        for i in range(max_length):
+            is_caif_step = (
+                i % caif_period == 0 and self.caif_sampler is not None
+            )
+            input_ids, past, ended_sequences = self.generation_step(
+                input_ids,
+                past,
+                ended_sequences,
+                is_caif_step,
+                caif_tokens_num=caif_tokens_num,
+                **sampler_kwargs
+            )
+            if ended_sequences.all():
+                break
+        return (
+            [
+                self.tokenizer.decode(sequence, skip_special_tokens=True)[
+                    len(input_prompt) :
+                ]
+                for sequence in input_ids
+            ],
+            input_ids,
+        )
+    def generation_step(
+        self,
+        input_ids,
+        past,
+        ended_sequences,
+        is_caif_step: bool,
+        caif_tokens_num=None,
+        **sampler_kwargs
+    ):
+        prepared_inputs = self.lm.prepare_inputs_for_generation(
+            input_ids, past, use_cache=True
+        )
+        outputs = self.lm(
+            **prepared_inputs,
+            output_attentions=False,
+            output_hidden_states=False,
+            return_dict=True
+        )
+        past = outputs.past_key_values
+        if self.entropy is not None:
+            normalized = torch.nn.functional.log_softmax(
+                outputs.logits, dim=-1
+            )
+            p = torch.exp(normalized)
+            output_probs = p
+            output_information = -normalized
+            output_entropy = (output_probs * output_information).sum(-1)[:, -1]
+            batch_size = output_entropy.shape[0]
+            caif_mask = torch.ge(output_entropy, self.entropy)
+            ordinary_mask = ~caif_mask
+            self.entropy_based_stats["skips"] += caif_mask.sum() / batch_size
+            self.entropy_based_stats["count"] += 1
+            self.entropy_based_stats["avg_entropy"] += (
+                output_entropy.sum() / batch_size
+            )
+            flatten_entropy = output_entropy.view(-1).cpu().tolist()
+            if "entropy" not in self.entropy_based_stats.keys():
+                self.entropy_based_stats["entropy"] = flatten_entropy
+            else:
+                self.entropy_based_stats["entropy"] += flatten_entropy
+            if caif_mask.sum() == 0:
+                next_tokens_sampler = self.ordinary_sampler
+                next_tokens = next_tokens_sampler(
+                    input_ids,
+                    outputs.logits,
+                    caif_tokens_num=caif_tokens_num,
+                    **sampler_kwargs
+                )
+                next_tokens = (
+                    next_tokens * (1 - ended_sequences.long())
+                    + self.lm.config.eos_token_id * ended_sequences.long()
+                ).long()
+            elif caif_mask.sum() == batch_size:
+                next_tokens_sampler = self.caif_sampler
+                next_tokens = next_tokens_sampler(
+                    input_ids,
+                    outputs.logits,
+                    caif_tokens_num=caif_tokens_num,
+                    **sampler_kwargs
+                )
+                next_tokens = (
+                    next_tokens * (1 - ended_sequences.long())
+                    + self.lm.config.eos_token_id * ended_sequences.long()
+                ).long()
+            else:
+                next_tokens_caif = self.caif_sampler(
+                    input_ids[caif_mask],
+                    outputs.logits[caif_mask],
+                    caif_tokens_num=caif_tokens_num,
+                    **sampler_kwargs
+                )
+                next_tokens_ordinary = self.ordinary_sampler(
+                    input_ids[ordinary_mask],
+                    outputs.logits[ordinary_mask],
+                    caif_tokens_num=caif_tokens_num,
+                    **sampler_kwargs
+                )
+                next_tokens_caif = (
+                    next_tokens_caif * (1 - ended_sequences[caif_mask].long())
+                    + self.lm.config.eos_token_id
+                    * ended_sequences[caif_mask].long()
+                ).long()
+                next_tokens_ordinary = (
+                    next_tokens_ordinary
+                    * (1 - ended_sequences[ordinary_mask].long())
+                    + self.lm.config.eos_token_id
+                    * ended_sequences[ordinary_mask].long()
+                ).long()
+                next_tokens = torch.ones(batch_size).long().to(self.device)
+                next_tokens[caif_mask] = next_tokens_caif
+                next_tokens[ordinary_mask] = next_tokens_ordinary
+        else:
+            if is_caif_step:
+                next_tokens_sampler = self.caif_sampler
+            else:
+                next_tokens_sampler = self.ordinary_sampler
+            next_tokens = next_tokens_sampler(
+                input_ids,
+                outputs.logits,
+                caif_tokens_num=caif_tokens_num,
+                **sampler_kwargs
+            )
+            next_tokens = (
+                next_tokens * (1 - ended_sequences.long())
+                + self.lm.config.eos_token_id * ended_sequences.long()
+            ).long()
+        input_ids = torch.cat(
+            [input_ids, next_tokens[:, None].to(self.device)], dim=-1
+        )
+        ended_sequences += next_tokens == self.lm.config.eos_token_id
+        return input_ids, past, ended_sequences
+    def get_input_ids(self, input_prompt, num_samples):
+        input_ids = torch.tensor([[self.lm.config.bos_token_id]])
+        if input_prompt is not None:
+            input_prompt = self.tokenizer(
+                input_prompt, return_tensors="pt"
+            ).input_ids
+            input_ids = torch.cat([input_ids, input_prompt], 1)
+        input_ids = input_ids.repeat(num_samples, 1).to(self.device)
+        past = None
+        ended_sequences = torch.zeros(
+            input_ids.shape[0], device=self.device
+        ).bool()
+        return input_ids, past, ended_sequences
+    @staticmethod
+    def sample(unscaled_probs, values):
+        samples = torch.multinomial(unscaled_probs, 1)
+        return torch.take_along_dim(values, samples, dim=1)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+transformers
+torch

sampling.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import torch
+from torch.nn import functional as F
+import transformers
+def sample_from_values(unscaled_probs, values):
+    samples = torch.multinomial(unscaled_probs, 1)
+    return torch.take_along_dim(values, samples, dim=1)
+class TopKWithTemperatureSampler:
+    def __call__(self, input_ids, output_logits, top_k, temperature, **kwargs):
+        next_token_logits = output_logits[:, -1]
+        next_token_log_probs = F.log_softmax(
+            next_token_logits, dim=-1
+        )
+        topk_log_probs = next_token_log_probs.topk(top_k, -1)
+        next_tokens = sample_from_values(
+            torch.exp(topk_log_probs[0] / temperature), topk_log_probs[1]
+        ).squeeze(1)
+        return next_tokens
+class CAIFSampler:
+    def __init__(self, classifier_name, lm_tokenizer, device, invert_cls_probs: bool = False):
+        self.device = device
+        self.classifier_tokenizer = transformers.AutoTokenizer.from_pretrained(
+            classifier_name
+        )
+        self.classifier_model = (
+            transformers.AutoModelForSequenceClassification.from_pretrained(
+                classifier_name
+            ).to(device)
+        )
+        self.classifier_model.eval()
+        self.lm_tokenizer = lm_tokenizer
+        self.invert_cls_probs = invert_cls_probs
+    def __call__(
+        self,
+        input_ids,
+        output_logis,
+        top_k,
+        temperature,
+        top_k_classifier,
+        classifier_weight,
+        caif_tokens_num=None,
+        **kwargs
+    ):
+        next_token_logits = output_logis[:, -1]
+        next_token_log_probs = F.log_softmax(
+            next_token_logits, dim=-1
+        )
+        (next_token_unnormalized_probs, topk_indices,) = self.get_unnormalized_probs(
+            input_ids,
+            next_token_log_probs,
+            temperature,
+            top_k_classifier,
+            classifier_weight,
+            caif_tokens_num=caif_tokens_num
+        )
+        topk_probs = next_token_unnormalized_probs.topk(top_k, -1)
+        next_tokens = sample_from_values(
+            topk_probs[0],
+            torch.take_along_dim(topk_indices, topk_probs[1], dim=1),
+        ).squeeze(1)
+        return next_tokens
+    def get_unnormalized_probs(
+        self,
+        input_ids,
+        next_token_log_probs,
+        temperature,
+        top_k_classifier,
+        classifier_weight,
+        caif_tokens_num=None
+    ):
+        if classifier_weight == 0.0:
+            raise ValueError(
+                "classifier weight equal to 0 is not supported for CAIF Sampling"
+            )
+        top_next_token_log_probs = next_token_log_probs.topk(top_k_classifier, -1)
+        classifier_input = torch.cat(
+            [
+                input_ids.unsqueeze(1).repeat(1, top_k_classifier, 1).flatten(0, 1),
+                top_next_token_log_probs[1].view(-1).unsqueeze(-1),
+            ],
+            -1,
+        )
+        classifier_input = [
+            self.lm_tokenizer.decode(sequence, skip_special_tokens=True)
+            for sequence in classifier_input
+        ]
+        if self.invert_cls_probs:
+            classifier_log_probs = torch.log(
+                1 - self.get_classifier_probs(
+                    classifier_input, caif_tokens_num=caif_tokens_num
+                ).view(-1, top_k_classifier)
+            )
+        else:
+            classifier_log_probs = self.get_classifier_log_probs(
+                classifier_input, caif_tokens_num=caif_tokens_num
+            ).view(-1, top_k_classifier)
+        next_token_probs = torch.exp(
+            (top_next_token_log_probs[0] + classifier_weight * classifier_log_probs)
+            / temperature
+        )
+        return next_token_probs, top_next_token_log_probs[1]
+    def get_classifier_log_probs(self, input, caif_tokens_num=None):
+        input_ids = self.classifier_tokenizer(
+            input, padding=True, return_tensors="pt"
+        ).to(self.device)
+        if caif_tokens_num is not None:
+            input_ids["input_ids"] = input_ids["input_ids"][:, -caif_tokens_num:]
+            if "attention_mask" in input_ids.keys():
+                input_ids["attention_mask"] = input_ids["attention_mask"][:, -caif_tokens_num:]
+            if "token_type_ids" in input_ids.keys():
+                input_ids["token_type_ids"] = input_ids["token_type_ids"][:, -caif_tokens_num:]
+        logits = self.classifier_model(**input_ids).logits[:, 0].squeeze(-1)
+        return torch.log(torch.sigmoid(logits))
+    def get_classifier_probs(self, input, caif_tokens_num=None):
+        input_ids = self.classifier_tokenizer(
+            input, padding=True, return_tensors="pt"
+        ).to(self.device)
+        if caif_tokens_num is not None:
+            input_ids["input_ids"] = input_ids["input_ids"][-caif_tokens_num:]
+            if "attention_mask" in input_ids.keys():
+                input_ids["attention_mask"] = input_ids["attention_mask"][-caif_tokens_num:]
+        logits = self.classifier_model(**input_ids).logits[:, 0].squeeze(-1)
+        return torch.sigmoid(logits)