|
from selective_context_source import SelectiveContext |
|
from abs_compressor import AbstractCompressor |
|
|
|
|
|
class SCCompressor(AbstractCompressor): |
|
base_model = 'gpt2' |
|
|
|
def __init__(self, lang: str = 'en', model: str = 'gpt2', device: str = 'cpu'): |
|
self.sc = SelectiveContext(model_type=model, lang=lang, device=device) |
|
|
|
def compress(self, original_prompt: str, ratio: float = 0.7, level: str = 'phrase') -> dict: |
|
|
|
|
|
original_tokens = len(self.gpt_tokenizer.encode(original_prompt)) |
|
|
|
compressed_prompt, reduced_content = self.sc(original_prompt, reduce_ratio=ratio, reduce_level=level) |
|
|
|
|
|
compressed_tokens = len(self.gpt_tokenizer.encode(compressed_prompt)) |
|
|
|
result = { |
|
'compressed_prompt': compressed_prompt, |
|
'ratio': compressed_tokens / original_tokens, |
|
'original_tokens': original_tokens, |
|
'compressed_tokens': compressed_tokens, |
|
'reduced_content': reduced_content, |
|
} |
|
return result |
|
|
|
|