File size: 1,090 Bytes
10b912d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from selective_context_source import SelectiveContext
from abs_compressor import AbstractCompressor


class SCCompressor(AbstractCompressor):
    base_model = 'gpt2'

    def __init__(self, lang: str = 'en', model: str = 'gpt2', device: str = 'cpu'):
        self.sc = SelectiveContext(model_type=model, lang=lang, device=device)

    def compress(self, original_prompt: str, ratio: float = 0.7, level: str = 'phrase') -> dict:

        # count tokens of original prompt
        original_tokens = len(self.gpt_tokenizer.encode(original_prompt))

        compressed_prompt, reduced_content = self.sc(original_prompt, reduce_ratio=ratio, reduce_level=level)

        # count tokens of compressed prompt
        compressed_tokens = len(self.gpt_tokenizer.encode(compressed_prompt))

        result = {
            'compressed_prompt': compressed_prompt,
            'ratio': compressed_tokens / original_tokens,
            'original_tokens': original_tokens,
            'compressed_tokens': compressed_tokens,
            'reduced_content': reduced_content,
        }
        return result