namespace-Pt
commited on
Commit
•
39a72f9
1
Parent(s):
992b551
Upload folder using huggingface_hub
Browse files- README.md +10 -5
- modeling_utils.py +0 -4
README.md
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
<div align="center">
|
2 |
<h1>Activation Beacon for Mistral</h1>
|
3 |
|
@@ -34,7 +39,7 @@ We evaluate the model on LongBench using 32K context length.
|
|
34 |
|:-:|:-:|:-:|:-:|
|
35 |
|[Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)|32.70|25.87|27.42|
|
36 |
|[Yarn-Mistral-128K](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k)|33.71|36.08|23.47|
|
37 |
-
|Activation-Beacon-Mistral|39.14|43.27|29.52|
|
38 |
|
39 |
## [InfiniteBench](https://arxiv.org/pdf/2402.13718.pdf)
|
40 |
We evaluate the model on InfiniteBench using 128K context length. The results of Yarn-Mistral-128K is copied from the [paper](https://arxiv.org/pdf/2402.13718.pdf). For [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2), we use 32K context length.
|
@@ -43,7 +48,7 @@ We evaluate the model on InfiniteBench using 128K context length. The results of
|
|
43 |
|:-:|:-:|:-:|
|
44 |
|[Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)|13.14||
|
45 |
|[Yarn-Mistral-128K](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k)|9.55|9.09|
|
46 |
-
|Activation-Beacon-Mistral|26.81|12.49|
|
47 |
|
48 |
## [Topic Retrieval](https://lmsys.org/blog/2023-06-29-longchat/)
|
49 |
We evaluate the model on Topic Retrieval task with `[5,10,20,30,40,50,60,70]` topics.
|
@@ -52,13 +57,13 @@ We evaluate the model on Topic Retrieval task with `[5,10,20,30,40,50,60,70]` to
|
|
52 |
|
53 |
|
54 |
## [PG19 Perplexity](https://arxiv.org/abs/2309.12307)
|
55 |
-
We evaluate the sliding window perplexity on PG19 test set with window size 100K and stride 32K. We also report the latency and the GPU memory usage. For full-attention models, we enable flash-attention-2 and [tensor parallel](https://github.com/BlackSamorez/tensor_parallel). The evaluation is run on 8xA800 machine.
|
56 |
|
57 |
|Model|Perplexity|Latency (s)|Memory (GB)|
|
58 |
|:-:|:-:|:-:|:-:|
|
59 |
|[Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)|8.83|14.02|525.6 (cannot run on a single GPU)|
|
60 |
-
|[Yarn-Mistral-128K](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k)
|
61 |
-
|Activation-Beacon-Mistral|8.16|3.06|27.4|
|
62 |
|
63 |
|
64 |
## [Passkey Retrieval](https://arxiv.org/abs/2309.12307)
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
pipeline_tag: text-generation
|
4 |
+
---
|
5 |
+
|
6 |
<div align="center">
|
7 |
<h1>Activation Beacon for Mistral</h1>
|
8 |
|
|
|
39 |
|:-:|:-:|:-:|:-:|
|
40 |
|[Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)|32.70|25.87|27.42|
|
41 |
|[Yarn-Mistral-128K](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k)|33.71|36.08|23.47|
|
42 |
+
|Activation-Beacon-Mistral-7B|39.14|43.27|29.52|
|
43 |
|
44 |
## [InfiniteBench](https://arxiv.org/pdf/2402.13718.pdf)
|
45 |
We evaluate the model on InfiniteBench using 128K context length. The results of Yarn-Mistral-128K is copied from the [paper](https://arxiv.org/pdf/2402.13718.pdf). For [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2), we use 32K context length.
|
|
|
48 |
|:-:|:-:|:-:|
|
49 |
|[Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)|13.14||
|
50 |
|[Yarn-Mistral-128K](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k)|9.55|9.09|
|
51 |
+
|Activation-Beacon-Mistral-7B|26.81|12.49|
|
52 |
|
53 |
## [Topic Retrieval](https://lmsys.org/blog/2023-06-29-longchat/)
|
54 |
We evaluate the model on Topic Retrieval task with `[5,10,20,30,40,50,60,70]` topics.
|
|
|
57 |
|
58 |
|
59 |
## [PG19 Perplexity](https://arxiv.org/abs/2309.12307)
|
60 |
+
We evaluate the sliding window perplexity on PG19 test set with window size 100K and stride 32K. We also report the latency and the GPU memory usage. For full-attention models, we enable [flash-attention-2](https://github.com/Dao-AILab/flash-attention) and [tensor parallel](https://github.com/BlackSamorez/tensor_parallel). The evaluation is run on 8xA800 machine.
|
61 |
|
62 |
|Model|Perplexity|Latency (s)|Memory (GB)|
|
63 |
|:-:|:-:|:-:|:-:|
|
64 |
|[Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)|8.83|14.02|525.6 (cannot run on a single GPU)|
|
65 |
+
|[Yarn-Mistral-128K](https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k)|7.66|14.56|525.6 (cannot run on a single GPU)|
|
66 |
+
|Activation-Beacon-Mistral-7B|8.16|3.06|27.4|
|
67 |
|
68 |
|
69 |
## [Passkey Retrieval](https://arxiv.org/abs/2309.12307)
|
modeling_utils.py
CHANGED
@@ -70,10 +70,6 @@ def evaluate_perplexity(model, dataloader, accelerator:Optional[Accelerator]=Non
|
|
70 |
# if the dataloader has been prepared, we shall not prepare it twice, especially in case of deepspeed
|
71 |
dataloader = accelerator.prepare(dataloader)
|
72 |
|
73 |
-
# if accelerator.process_index == 0:
|
74 |
-
# for name, x in model.named_parameters():
|
75 |
-
# print(f"{name: ^80} {x.dtype}")
|
76 |
-
|
77 |
all_loss = defaultdict(list)
|
78 |
for i, x in enumerate(tqdm(dataloader, desc="Computing Perplexity")):
|
79 |
# NOTE: important to reset memory for every batch
|
|
|
70 |
# if the dataloader has been prepared, we shall not prepare it twice, especially in case of deepspeed
|
71 |
dataloader = accelerator.prepare(dataloader)
|
72 |
|
|
|
|
|
|
|
|
|
73 |
all_loss = defaultdict(list)
|
74 |
for i, x in enumerate(tqdm(dataloader, desc="Computing Perplexity")):
|
75 |
# NOTE: important to reset memory for every batch
|