Spaces:
Sleeping
Sleeping
Remove env setup. Update Readme.
Browse files- Makefile +0 -16
- README.md +38 -1
- app.py +8 -17
- images/LLMLingua_logo.png +0 -0
- llmlingua/__init__.py +0 -4
- llmlingua/prompt_compressor.py +0 -0
- llmlingua/utils.py +0 -98
- llmlingua/version.py +0 -14
- setup.cfg +0 -28
- setup.py +0 -70
Makefile
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
.PHONY: install style test
|
2 |
-
|
3 |
-
PYTHON := python
|
4 |
-
CHECK_DIRS := llmlingua tests
|
5 |
-
|
6 |
-
install:
|
7 |
-
@${PYTHON} setup.py bdist_wheel
|
8 |
-
@${PYTHON} -m pip install dist/sdtools*
|
9 |
-
|
10 |
-
style:
|
11 |
-
black $(CHECK_DIRS)
|
12 |
-
isort -rc $(CHECK_DIRS)
|
13 |
-
flake8 $(CHECK_DIRS)
|
14 |
-
|
15 |
-
test:
|
16 |
-
@${PYTHON} -m pytest -n auto --dist=loadfile -s -v ./tests/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -9,5 +9,42 @@ app_file: app.py
|
|
9 |
pinned: false
|
10 |
license: cc-by-nc-sa-4.0
|
11 |
---
|
12 |
-
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
pinned: false
|
10 |
license: cc-by-nc-sa-4.0
|
11 |
---
|
12 |
+
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
LLMLingua-2 is one of the branch from [LLMLingua Series](https://llmlingua.com/). Please check the links below for more information.
|
16 |
+
<div style="display: flex; align-items: center;">
|
17 |
+
<div style="width: 100px; margin-right: 10px; height:auto;" align="left">
|
18 |
+
<img src="images/LLMLingua_logo.png" alt="LLMLingua" width="100" align="left">
|
19 |
+
</div>
|
20 |
+
<div style="flex-grow: 1;" align="center">
|
21 |
+
<h2 align="center">LLMLingua Series | Effectively Deliver Information to LLMs via Prompt Compression</h2>
|
22 |
+
</div>
|
23 |
+
</div>
|
24 |
+
|
25 |
+
<p align="center">
|
26 |
+
| <a href="https://llmlingua.com/"><b>Project Page</b></a> |
|
27 |
+
<a href="https://aclanthology.org/2023.emnlp-main.825/"><b>LLMLingua</b></a> |
|
28 |
+
<a href="https://arxiv.org/abs/2310.06839"><b>LongLLMLingua</b></a> |
|
29 |
+
<a href="https://arxiv.org/abs/2403."><b>LLMLingua-2</b></a> |
|
30 |
+
<a href="https://huggingface.co/spaces/microsoft/LLMLingua"><b>LLMLingua Demo</b></a> |
|
31 |
+
<a href="https://huggingface.co/spaces/microsoft/LLMLingua-2"><b>LLMLingua-2 Demo</b></a> |
|
32 |
+
</p>
|
33 |
+
|
34 |
+
|
35 |
+
## Brief Introduction
|
36 |
+
|
37 |
+
**LLMLingua** utilizes a compact, well-trained language model (e.g., GPT2-small, LLaMA-7B) to identify and remove non-essential tokens in prompts. This approach enables efficient inference with large language models (LLMs), achieving up to 20x compression with minimal performance loss.
|
38 |
+
|
39 |
+
- [LLMLingua: Compressing Prompts for Accelerated Inference of Large Language Models](https://aclanthology.org/2023.emnlp-main.825/) (EMNLP 2023)<br>
|
40 |
+
_Huiqiang Jiang, Qianhui Wu, Chin-Yew Lin, Yuqing Yang and Lili Qiu_
|
41 |
+
|
42 |
+
**LongLLMLingua** mitigates the 'lost in the middle' issue in LLMs, enhancing long-context information processing. It reduces costs and boosts efficiency with prompt compression, improving RAG performance by up to 21.4% using only 1/4 of the tokens.
|
43 |
+
|
44 |
+
- [LongLLMLingua: Accelerating and Enhancing LLMs in Long Context Scenarios via Prompt Compression](https://arxiv.org/abs/2310.06839) (ICLR ME-FoMo 2024)<br>
|
45 |
+
_Huiqiang Jiang, Qianhui Wu, Xufang Luo, Dongsheng Li, Chin-Yew Lin, Yuqing Yang and Lili Qiu_
|
46 |
+
|
47 |
+
**LLMLingua-2**, a small-size yet powerful prompt compression method trained via data distillation from GPT-4 for token classification with a BERT-level encoder, excels in task-agnostic compression. It surpasses LLMLingua in handling out-of-domain data, offering 3x-6x faster performance.
|
48 |
+
|
49 |
+
- [LLMLingua-2: Context-Aware Data Distillation for Efficient and Faithful Task-Agnostic Prompt Compression](https://arxiv.org/abs/2403.) (Under Review)<br>
|
50 |
+
_Zhuoshi Pan, Qianhui Wu, Huiqiang Jiang, Menglin Xia, Xufang Luo, Jue Zhang, Qingwei Lin, Victor Ruhle, Yuqing Yang, Chin-Yew Lin, H. Vicky Zhao, Lili Qiu, Dongmei Zhang_
|
app.py
CHANGED
@@ -1,9 +1,3 @@
|
|
1 |
-
|
2 |
-
# build the environment
|
3 |
-
import sys
|
4 |
-
import subprocess
|
5 |
-
subprocess.run([sys.executable, "-m", "pip", "install", "-e", "."])
|
6 |
-
|
7 |
# import the required libraries
|
8 |
import gradio as gr
|
9 |
import json
|
@@ -60,15 +54,12 @@ def compress(original_prompt, compression_rate, base_model="xlm-roberta-large",
|
|
60 |
|
61 |
|
62 |
title = "LLMLingua-2"
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
</div>
|
70 |
-
"""
|
71 |
-
)
|
72 |
theme = "soft"
|
73 |
css = """#anno-img .mask {opacity: 0.5; transition: all 0.2s ease-in-out;}
|
74 |
#anno-img .mask.active {opacity: 0.7}"""
|
@@ -76,8 +67,8 @@ css = """#anno-img .mask {opacity: 0.5; transition: all 0.2s ease-in-out;}
|
|
76 |
original_prompt_text = """John: So, um, I've been thinking about the project, you know, and I believe we need to, uh, make some changes. I mean, we want the project to succeed, right? So, like, I think we should consider maybe revising the timeline.
|
77 |
Sarah: I totally agree, John. I mean, we have to be realistic, you know. The timeline is, like, too tight. You know what I mean? We should definitely extend it.
|
78 |
"""
|
79 |
-
|
80 |
-
with gr.Blocks(title=title, css=css) as app:
|
81 |
gr.Markdown(header)
|
82 |
with gr.Row():
|
83 |
with gr.Column(scale=3):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# import the required libraries
|
2 |
import gradio as gr
|
3 |
import json
|
|
|
54 |
|
55 |
|
56 |
title = "LLMLingua-2"
|
57 |
+
|
58 |
+
header = """# LLMLingua-2: Efficient and Faithful Task-Agnostic Prompt Compression via Data Distillation
|
59 |
+
_Zhuoshi Pan, Qianhui Wu, Huiqiang Jiang, Menglin Xia, Xufang Luo, Jue Zhang, Qingwei Lin, Victor Ruehle, Yuqing Yang, Chin-Yew Lin, H. Vicky Zhao, Lili Qiu, Dongmei Zhang_<br/>
|
60 |
+
[[project page]](https://llmlingua.com/llmlingua2.html) [[paper]](https://arxiv.org/abs/2403.12968) [[code]](https://github.com/microsoft/LLMLingua)
|
61 |
+
"""
|
62 |
+
|
|
|
|
|
|
|
63 |
theme = "soft"
|
64 |
css = """#anno-img .mask {opacity: 0.5; transition: all 0.2s ease-in-out;}
|
65 |
#anno-img .mask.active {opacity: 0.7}"""
|
|
|
67 |
original_prompt_text = """John: So, um, I've been thinking about the project, you know, and I believe we need to, uh, make some changes. I mean, we want the project to succeed, right? So, like, I think we should consider maybe revising the timeline.
|
68 |
Sarah: I totally agree, John. I mean, we have to be realistic, you know. The timeline is, like, too tight. You know what I mean? We should definitely extend it.
|
69 |
"""
|
70 |
+
|
71 |
+
with gr.Blocks(title=title, css=css) as app:
|
72 |
gr.Markdown(header)
|
73 |
with gr.Row():
|
74 |
with gr.Column(scale=3):
|
images/LLMLingua_logo.png
ADDED
llmlingua/__init__.py
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
# Copyright (c) 2024 Microsoft
|
2 |
-
# Licensed under The cc-by-nc-sa-4.0 License [see LICENSE for details]
|
3 |
-
# flake8: noqa
|
4 |
-
from .prompt_compressor import PromptCompressor
|
|
|
|
|
|
|
|
|
|
llmlingua/prompt_compressor.py
DELETED
The diff for this file is too large to render.
See raw diff
|
|
llmlingua/utils.py
DELETED
@@ -1,98 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from torch.utils.data import Dataset
|
3 |
-
import random, os
|
4 |
-
import numpy as np
|
5 |
-
import torch
|
6 |
-
import string
|
7 |
-
|
8 |
-
class TokenClfDataset(Dataset):
|
9 |
-
def __init__(
|
10 |
-
self,
|
11 |
-
texts,
|
12 |
-
max_len=512,
|
13 |
-
tokenizer=None,
|
14 |
-
model_name="bert-base-multilingual-cased",
|
15 |
-
):
|
16 |
-
self.len = len(texts)
|
17 |
-
self.texts = texts
|
18 |
-
self.tokenizer = tokenizer
|
19 |
-
self.max_len = max_len
|
20 |
-
self.model_name = model_name
|
21 |
-
if "bert-base-multilingual-cased" in model_name:
|
22 |
-
self.cls_token = "[CLS]"
|
23 |
-
self.sep_token = "[SEP]"
|
24 |
-
self.unk_token = "[UNK]"
|
25 |
-
self.pad_token = "[PAD]"
|
26 |
-
self.mask_token = "[MASK]"
|
27 |
-
elif "xlm-roberta-large" in model_name:
|
28 |
-
self.bos_token = "<s>"
|
29 |
-
self.eos_token = "</s>"
|
30 |
-
self.sep_token = "</s>"
|
31 |
-
self.cls_token = "<s>"
|
32 |
-
self.unk_token = "<unk>"
|
33 |
-
self.pad_token = "<pad>"
|
34 |
-
self.mask_token = "<mask>"
|
35 |
-
else:
|
36 |
-
raise NotImplementedError()
|
37 |
-
|
38 |
-
def __getitem__(self, index):
|
39 |
-
text = self.texts[index]
|
40 |
-
tokenized_text = self.tokenizer.tokenize(text)
|
41 |
-
|
42 |
-
tokenized_text = (
|
43 |
-
[self.cls_token] + tokenized_text + [self.sep_token]
|
44 |
-
) # add special tokens
|
45 |
-
|
46 |
-
if len(tokenized_text) > self.max_len:
|
47 |
-
tokenized_text = tokenized_text[: self.max_len]
|
48 |
-
else:
|
49 |
-
tokenized_text = tokenized_text + [
|
50 |
-
self.pad_token for _ in range(self.max_len - len(tokenized_text))
|
51 |
-
]
|
52 |
-
|
53 |
-
attn_mask = [1 if tok != self.pad_token else 0 for tok in tokenized_text]
|
54 |
-
|
55 |
-
ids = self.tokenizer.convert_tokens_to_ids(tokenized_text)
|
56 |
-
|
57 |
-
return {
|
58 |
-
"ids": torch.tensor(ids, dtype=torch.long),
|
59 |
-
"mask": torch.tensor(attn_mask, dtype=torch.long),
|
60 |
-
}
|
61 |
-
|
62 |
-
def __len__(self):
|
63 |
-
return self.len
|
64 |
-
|
65 |
-
|
66 |
-
def seed_everything(seed: int):
|
67 |
-
random.seed(seed)
|
68 |
-
os.environ["PYTHONHASHSEED"] = str(seed)
|
69 |
-
np.random.seed(seed)
|
70 |
-
torch.manual_seed(seed)
|
71 |
-
torch.cuda.manual_seed(seed)
|
72 |
-
torch.backends.cudnn.deterministic = True
|
73 |
-
torch.backends.cudnn.benchmark = False
|
74 |
-
|
75 |
-
def is_begin_of_new_word(token, model_name, force_tokens, token_map):
|
76 |
-
if "bert-base-multilingual-cased" in model_name:
|
77 |
-
if token.lstrip("##") in force_tokens or token.lstrip("##") in set(token_map.values()):
|
78 |
-
return True
|
79 |
-
return not token.startswith("##")
|
80 |
-
elif "xlm-roberta-large" in model_name:
|
81 |
-
if token in string.punctuation or token in force_tokens or token in set(token_map.values()):
|
82 |
-
return True
|
83 |
-
return token.startswith("▁")
|
84 |
-
else:
|
85 |
-
raise NotImplementedError()
|
86 |
-
|
87 |
-
def replace_added_token(token, token_map):
|
88 |
-
for ori_token, new_token in token_map.items():
|
89 |
-
token = token.replace(new_token, ori_token)
|
90 |
-
return token
|
91 |
-
|
92 |
-
def get_pure_token(token, model_name):
|
93 |
-
if "bert-base-multilingual-cased" in model_name:
|
94 |
-
return token.lstrip("##")
|
95 |
-
elif "xlm-roberta-large" in model_name:
|
96 |
-
return token.lstrip("▁")
|
97 |
-
else:
|
98 |
-
raise NotImplementedError()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llmlingua/version.py
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
# Copyright (c) 2023 Microsoft
|
2 |
-
# Licensed under The MIT License [see LICENSE for details]
|
3 |
-
|
4 |
-
_MAJOR = "0"
|
5 |
-
_MINOR = "1"
|
6 |
-
# On master and in a nightly release the patch should be one ahead of the last
|
7 |
-
# released build.
|
8 |
-
_PATCH = "6"
|
9 |
-
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
|
10 |
-
# https://semver.org/#is-v123-a-semantic-version for the semantics.
|
11 |
-
_SUFFIX = ""
|
12 |
-
|
13 |
-
VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)
|
14 |
-
VERSION = "{0}.{1}.{2}{3}".format(_MAJOR, _MINOR, _PATCH, _SUFFIX)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
setup.cfg
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
[isort]
|
2 |
-
default_section = FIRSTPARTY
|
3 |
-
ensure_newline_before_comments = True
|
4 |
-
force_grid_wrap = 0
|
5 |
-
include_trailing_comma = True
|
6 |
-
known_first_party = sdtools
|
7 |
-
known_third_party =
|
8 |
-
imblearn
|
9 |
-
numpy
|
10 |
-
pandas
|
11 |
-
pytorch-tabnet
|
12 |
-
scipy
|
13 |
-
sklearn
|
14 |
-
torch
|
15 |
-
torchaudio
|
16 |
-
torchvision
|
17 |
-
torch_xla
|
18 |
-
tqdm
|
19 |
-
xgboost
|
20 |
-
|
21 |
-
line_length = 119
|
22 |
-
lines_after_imports = 2
|
23 |
-
multi_line_output = 3
|
24 |
-
use_parentheses = True
|
25 |
-
|
26 |
-
[flake8]
|
27 |
-
ignore = E203, E501, E741, W503, W605
|
28 |
-
max-line-length = 119
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
setup.py
DELETED
@@ -1,70 +0,0 @@
|
|
1 |
-
# Copyright (c) 2023 Microsoft
|
2 |
-
# Licensed under The MIT License [see LICENSE for details]
|
3 |
-
|
4 |
-
from setuptools import find_packages, setup
|
5 |
-
|
6 |
-
# PEP0440 compatible formatted version, see:
|
7 |
-
# https://www.python.org/dev/peps/pep-0440/
|
8 |
-
#
|
9 |
-
# release markers:
|
10 |
-
# X.Y
|
11 |
-
# X.Y.Z # For bugfix releases
|
12 |
-
#
|
13 |
-
# pre-release markers:
|
14 |
-
# X.YaN # Alpha release
|
15 |
-
# X.YbN # Beta release
|
16 |
-
# X.YrcN # Release Candidate
|
17 |
-
# X.Y # Final release
|
18 |
-
|
19 |
-
# version.py defines the VERSION and VERSION_SHORT variables.
|
20 |
-
# We use exec here so we don't import allennlp whilst setting up.
|
21 |
-
VERSION = {} # type: ignore
|
22 |
-
with open("llmlingua/version.py", "r") as version_file:
|
23 |
-
exec(version_file.read(), VERSION)
|
24 |
-
|
25 |
-
INSTALL_REQUIRES = [
|
26 |
-
"transformers>=4.26.0",
|
27 |
-
"accelerate",
|
28 |
-
"torch",
|
29 |
-
"tiktoken",
|
30 |
-
"nltk",
|
31 |
-
"numpy",
|
32 |
-
]
|
33 |
-
QUANLITY_REQUIRES = [
|
34 |
-
"black==21.4b0",
|
35 |
-
"flake8>=3.8.3",
|
36 |
-
"isort>=5.5.4",
|
37 |
-
"pre-commit",
|
38 |
-
"pytest",
|
39 |
-
"pytest-xdist",
|
40 |
-
]
|
41 |
-
DEV_REQUIRES = INSTALL_REQUIRES + QUANLITY_REQUIRES
|
42 |
-
|
43 |
-
setup(
|
44 |
-
name="llmlingua",
|
45 |
-
version=VERSION["VERSION"],
|
46 |
-
author="The LLMLingua team",
|
47 |
-
author_email="hjiang@microsoft.com",
|
48 |
-
description="To speed up LLMs' inference and enhance LLM's perceive of key information, compress the prompt and KV-Cache, which achieves up to 20x compression with minimal performance loss.",
|
49 |
-
long_description=open("README.md", encoding="utf8").read(),
|
50 |
-
long_description_content_type="text/markdown",
|
51 |
-
keywords="Prompt Compression, LLMs, Inference Acceleration, Black-box LLMs, Efficient LLMs",
|
52 |
-
license="MIT License",
|
53 |
-
url="https://github.com/microsoft/LLMLingua",
|
54 |
-
classifiers=[
|
55 |
-
"Intended Audience :: Science/Research",
|
56 |
-
"Development Status :: 3 - Alpha",
|
57 |
-
"Programming Language :: Python :: 3",
|
58 |
-
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
59 |
-
],
|
60 |
-
package_dir={"": "."},
|
61 |
-
packages=find_packages("."),
|
62 |
-
extras_require={
|
63 |
-
"dev": DEV_REQUIRES,
|
64 |
-
"quality": QUANLITY_REQUIRES,
|
65 |
-
},
|
66 |
-
install_requires=INSTALL_REQUIRES,
|
67 |
-
include_package_data=True,
|
68 |
-
python_requires=">=3.8.0",
|
69 |
-
zip_safe=False,
|
70 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|