|
from typing import List, Any |
|
import tiktoken |
|
|
|
|
|
class AbstractCompressor: |
|
base_model = None |
|
tokenizer = None |
|
gpt_tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo-16k") |
|
|
|
def compress(self, original_prompt: str, ratio: float) -> dict: |
|
""" |
|
Input original prompt/sentence and compression ratio, return compressed prompt/sentence.\ |
|
|
|
:param original_prompt: |
|
:param ratio: |
|
:return: dict object |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
raise NotImplementedError() |
|
|
|
def fit(self, datas: List[dict], valid_size: int) -> None: |
|
""" |
|
For trainable methods, call this function for training parameters. |
|
Require training LongBench and valid set size. |
|
:param datas: |
|
:param valid_size: |
|
:return: |
|
""" |
|
raise NotImplementedError() |
|
|
|
def set_model(self, model: Any, **kwargs): |
|
""" |
|
Specify a trained or a pre-trained model. |
|
:param model: |
|
:param kwargs: |
|
:return: |
|
""" |
|
pass |
|
|