Upload 14 files
Browse files- 5.png +0 -0
- clip-baseline.png +0 -0
- clip-index.png +0 -0
- clip-online.png +0 -0
- index.html +474 -236
- sam-baseline.png +0 -0
- sam-index.png +0 -0
- sam-online.png +0 -0
- style.css +1 -0
5.png
ADDED
clip-baseline.png
ADDED
clip-index.png
ADDED
clip-online.png
ADDED
index.html
CHANGED
@@ -1,255 +1,493 @@
|
|
1 |
<!DOCTYPE html>
|
2 |
<html>
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
<
|
18 |
-
<
|
19 |
-
<
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
conda create -n EdgeFM python=3.8<br>
|
29 |
conda activate EdgeFM
|
30 |
</code>
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
36 |
pip install -r requirements.txt
|
37 |
</code>
|
38 |
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
<
|
46 |
-
|
47 |
-
<
|
|
|
|
|
|
|
|
|
48 |
python experiments/elasticdnn/vit_b_16/offline/fm_lora/cls/cls.py<br>
|
49 |
python experiments/elasticdnn/vit_b_16/offline/fm_to_md/cls_md_wo_fbs.py<br>
|
50 |
python experiments/elasticdnn/vit_b_16/offline/fm_to_md/cls_md_index.py<br>
|
51 |
</code>
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
55 |
tensorboard --logdir <the file path of tensorboard logs outputed in the terminal>
|
56 |
</code>
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
python experiments/elasticdnn/vit_b_16/online_new/cls/cls.py
|
66 |
</code>
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
<li>
|
186 |
-
<code class="inline-code">def is_q_or_k_v_linear(self, layer_name: nn.Module)</code>
|
187 |
-
<ul>
|
188 |
-
<li>Check if the given layer is a Q/K/V Linear in the FM.</li>
|
189 |
-
<li>
|
190 |
-
<b>Inputs:</b>
|
191 |
-
<ul>
|
192 |
-
<li>layer_name: The name of a layer in the FM.</li>
|
193 |
-
</ul>
|
194 |
-
</li>
|
195 |
-
<li>
|
196 |
-
<b>Output:</b> Return True if the given layer is a Q/K/V Linear in the FM.
|
197 |
-
</li>
|
198 |
-
</ul>
|
199 |
-
</li>
|
200 |
-
|
201 |
-
<li>
|
202 |
-
<code class="inline-code">def is_feed_forward(self, layer_name: nn.Module)</code>
|
203 |
-
<ul>
|
204 |
-
<li>Check if the given layer is a feed forward layer in the FM.</li>
|
205 |
-
<li>
|
206 |
-
<b>Inputs:</b>
|
207 |
-
<ul>
|
208 |
-
<li>layer_name: The name of a layer in the FM.</li>
|
209 |
-
</ul>
|
210 |
-
</li>
|
211 |
-
<li>
|
212 |
-
<b>Output:</b> Return True if the given layer is a feed forward layer in the FM.
|
213 |
-
</li>
|
214 |
-
</ul>
|
215 |
-
</li>
|
216 |
-
|
217 |
-
<li>
|
218 |
-
<code class="inline-code">def prune_an_attention_layer(self, attention_layer_name, sparsity: float, samples: torch.Tensor)</code>
|
219 |
-
<ul>
|
220 |
-
<li>Pruning an attention layer.</li>
|
221 |
-
<li>
|
222 |
-
<b>Inputs:</b>
|
223 |
-
<ul>
|
224 |
-
<li>attention_layer_name: The name of target attention layer.</li>
|
225 |
-
<li>sparsity: The pruning strength.</li>
|
226 |
-
<li>samples: A given sample.</li>
|
227 |
-
</ul>
|
228 |
-
</li>
|
229 |
-
</ul>
|
230 |
-
</li>
|
231 |
-
|
232 |
-
<li>
|
233 |
-
<code class="inline-code">def prune_an_feed_forward_layer(self, feed_forward_layer_name, sparsity: float, samples: torch.Tensor)</code>
|
234 |
-
<ul>
|
235 |
-
<li>Pruning an feed forward layer.</li>
|
236 |
-
<li>
|
237 |
-
<b>Inputs:</b>
|
238 |
-
<ul>
|
239 |
-
<li>feed_forward_layer_name: The name of target feed forward layer.</li>
|
240 |
-
<li>sparsity: The pruning strength.</li>
|
241 |
-
<li>samples: A given sample.</li>
|
242 |
-
</ul>
|
243 |
-
</li>
|
244 |
-
</ul>
|
245 |
-
</li>
|
246 |
-
|
247 |
-
</ul>
|
248 |
-
|
249 |
-
<h3>4.2 Supporting a Hugging Face Model</h3>
|
250 |
|
251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
-
|
254 |
-
</body>
|
255 |
-
</html>
|
|
|
1 |
<!DOCTYPE html>
|
2 |
<html>
|
3 |
+
|
4 |
+
<head>
|
5 |
+
<meta charset="utf-8" />
|
6 |
+
<meta name="viewport" content="width=device-width" />
|
7 |
+
<title>EdgeFM</title>
|
8 |
+
<link rel="stylesheet" href="style.css" />
|
9 |
+
</head>
|
10 |
+
|
11 |
+
<body>
|
12 |
+
<div class="card">
|
13 |
+
<h1>EdgeFM: Unlocking Scaling Potential of Foundation Models<br> for Evolving Data Streams at Edge</h1>
|
14 |
+
|
15 |
+
<h2>Table of Contents</h2>
|
16 |
+
<ul>
|
17 |
+
<li><a href="#abstract">1. Abstract</a></li>
|
18 |
+
<li><a href="#installation">2. Installation</a></li>
|
19 |
+
<li><a href="#vit">3. Running Example 1: Supporting a Hugging Face FM Vision Transformer</a></li>
|
20 |
+
<li><a href="#clip">4. Running Example 2: Supporting a Hugging Face FM CLIP</a></li>
|
21 |
+
<li><a href="#sam">5. Running Example 3: Supporting a user-specified FM SAM</a></li>
|
22 |
+
<li>
|
23 |
+
<a href="#implementation">6. Implementation (Development API Documentation)</a>
|
24 |
+
<ul>
|
25 |
+
<li><a href="#hugging-face-model">6.1 Supporting a Hugging Face FM</a></li>
|
26 |
+
<li><a href="#user-specified-model">6.2 Supporting a user-specified FM</a></li>
|
27 |
+
</ul>
|
28 |
+
</li>
|
29 |
+
</ul>
|
30 |
+
|
31 |
+
<h2 id="abstract">1. Abstract</h2>
|
32 |
+
<p>Foundation models (FMs) such as large language models are the driving force of the next generation artificial
|
33 |
+
intelligence systems. The trend of deploying FMs at edge challenges their scaling potential when encountering
|
34 |
+
massive new input data with compressed model sizes and constrained device resources. The prior art sheds light on
|
35 |
+
learning new tasks and domains (data feature shifts) based on deployed networks. However, such learning approaches
|
36 |
+
exacerbate the existing limitations: (i) predetermined network architectures lower model accuracy, and (ii) fixed
|
37 |
+
model sizes hinder resource allocation optimization at a finer granularity.</p>
|
38 |
+
<p>In this paper, we propose EdgeFM, a lightweight, neuron-grained scaling solution to unlock FMs' scaling potency
|
39 |
+
in edge intelligence systems. EdgeFM achieves high accuracy and low overheads in model retraining by adaptively
|
40 |
+
transforming a FM into a compact model that retains the most important neurons to the current input data. At
|
41 |
+
run-time, EdgeFM determines optimal model sizes and assigned resources for multiple applications to maximize their
|
42 |
+
overall accuracy. We implement EdgeFM in prevalent FMs of natural language processing, computer vision and
|
43 |
+
multimodal applications and compare it against state-of-the-art techniques. Evaluation results show that our
|
44 |
+
approach improves accuracy by 21.88% while reducing memory footprint and energy consumptions by 27.14% and 65.65%,
|
45 |
+
and further achieves 15.96% overall accuracy improvement via neuron-grained resource scheduling.</p>
|
46 |
+
|
47 |
+
<h2 id="installation">2. Installation</h2>
|
48 |
+
<h3>2.1 Requirements</h3>
|
49 |
+
<ul>
|
50 |
+
<li>Linux and Windows</li>
|
51 |
+
<li>Python 3.8+</li>
|
52 |
+
<li>CUDA 10.2+</li>
|
53 |
+
</ul>
|
54 |
+
|
55 |
+
<h3>2.2 Preparing Environment</h3>
|
56 |
+
<p>First, create a conda virtual environment and activate it:</p>
|
57 |
+
<code>
|
58 |
conda create -n EdgeFM python=3.8<br>
|
59 |
conda activate EdgeFM
|
60 |
</code>
|
61 |
+
<p>Second, install torch and torchvision according to the <a href="https://pytorch.org/get-started/locally/">offical
|
62 |
+
site</a>.</p>
|
63 |
+
<img src="https://user-images.githubusercontent.com/73862727/146364503-5664de5b-24b1-4a85-b342-3d061cd7563f.png" />
|
64 |
+
<p>Get the installation command according to the selection in the official site, and copy them to the terminal. </p>
|
65 |
+
<p>Finally, install the required dependencies via pip:</p>
|
66 |
+
<code>
|
67 |
pip install -r requirements.txt
|
68 |
</code>
|
69 |
|
70 |
+
<h2 id="vit">3. Running Example 1: Supporting a Hugging Face FM Vision Transformer</h2>
|
71 |
|
72 |
+
<h3>3.1 Settings</h3>
|
73 |
+
<p><b>Models.</b> We use a semantic segmentation model based on Vision Transformer from Hugging Face as an example
|
74 |
+
to explain how to connect a Hugging Face FM to the EdgeFM.</p>
|
75 |
+
<p><b>Datasets.</b> We use datasets <a href="https://link.springer.com/chapter/10.1007/978-3-319-46475-6_7">GTA5</a>
|
76 |
+
and <a href="https://supervise.ly">SuperviselyPerson</a> as the source domain, and datasets <a
|
77 |
+
href="https://openaccess.thecvf.com/content_cvpr_2016/html/Cordts_The_Cityscapes_Dataset_CVPR_2016_paper.html">Cityscapes</a>
|
78 |
+
and <a href="https://ieeexplore.ieee.org/abstract/document/6976983">BaiduPerson</a> as the target domain.</p>
|
79 |
+
|
80 |
+
<h3>3.2 Offline Elastic Proxy Construction</h3>
|
81 |
+
<p>Run the following command sequentially to pre-train the knowledge base and index:</p>
|
82 |
+
<code>
|
83 |
python experiments/elasticdnn/vit_b_16/offline/fm_lora/cls/cls.py<br>
|
84 |
python experiments/elasticdnn/vit_b_16/offline/fm_to_md/cls_md_wo_fbs.py<br>
|
85 |
python experiments/elasticdnn/vit_b_16/offline/fm_to_md/cls_md_index.py<br>
|
86 |
</code>
|
87 |
+
<p>Note that the file path of the model checkpoint in last two files should be modified manually.</p>
|
88 |
+
<p>Run the following command to open TensorBoard and watch the metrics (e.g. losses and accuracy) during the
|
89 |
+
training process:</p>
|
90 |
+
<code>
|
91 |
tensorboard --logdir <the file path of tensorboard logs outputed in the terminal>
|
92 |
</code>
|
93 |
+
<p>Here are three TensorBoard screenshots when three commands above are running:</p>
|
94 |
+
<img src="1.png">
|
95 |
+
<img src="2.png">
|
96 |
+
<img src="3.png">
|
97 |
+
|
98 |
+
<h3>3.3 Online Evolving Input Data Adaptation</h3>
|
99 |
+
<p>Run the following command to evaluate EdgeFM over evolving data:</p>
|
100 |
+
<code>
|
101 |
python experiments/elasticdnn/vit_b_16/online_new/cls/cls.py
|
102 |
</code>
|
103 |
+
<p>You can also launch TensorBoard to watch the retraining accuracy and time during the retraining process. Here is
|
104 |
+
a screenshot:</p>
|
105 |
+
<img src="4.png">
|
106 |
+
|
107 |
+
<h3>(Optional) 3.4 Tuning the hyperparameters</h3>
|
108 |
+
<p>Most of hyperparameters are common and easy to understand (e.g. batch size, learning rate, and optimizer
|
109 |
+
arguments, etc). We introduce some unique hyperparameters in EdgeFM below.</p>
|
110 |
+
<p>For python experiments/elasticdnn/vit_b_16/offline/fm_lora/cls/cls.py:</p>
|
111 |
+
<ul>
|
112 |
+
<li><b>ab_r</b>: the value of r in LoRA.</li>
|
113 |
+
</ul>
|
114 |
+
<p>For python experiments/elasticdnn/vit_b_16/offline/fm_to_md/cls_md_wo_fbs.py:</p>
|
115 |
+
<ul>
|
116 |
+
<li><b>sample_size</b>: the size of an input sample. For typical image workloads, the size is (1, 3, 224, 224).
|
117 |
+
For language workloads, you can directly pass in a tokenized sample directory instead of a size.</li>
|
118 |
+
<li><b>generate_md_width_ratio</b>: the ratio of the original FM's width to knowledge base's width. We recommend
|
119 |
+
it is 4 or 8, which means that the knowledge base has 1/4 or 1/8 model size of the original FM.</li>
|
120 |
+
<li><b>distill_loss_weight</b>: it controls the strength of distilling the original FM's feature to the knowledge
|
121 |
+
base's feature using feature-based knowledge distillation. This helps improve the accuracy of the knowledge
|
122 |
+
base.</li>
|
123 |
+
</ul>
|
124 |
+
<p>For python experiments/elasticdnn/vit_b_16/offline/fm_to_md/cls_md_index.py:</p>
|
125 |
+
<ul>
|
126 |
+
<li><b>FBS_r</b>: the value of r in FBS module. We recommend it is 16.</li>
|
127 |
+
<li><b>indexes_optimizer_args</b>: the arguments of the optimizer used in training the neuron index between the
|
128 |
+
knowledge base and the FM.</li>
|
129 |
+
<li><b>min_sparisty and max_sparsity</b>: in each training iteration, the knowledge base is set to a random
|
130 |
+
sparsity and then trained (refer to dynamic neural networks). min_sparisty and max_sparsity determine that the
|
131 |
+
maximal model size and the minimal model size of the generated proxy model. For example, if min_sparisty = 0 and
|
132 |
+
max_sparsity = 0.9, the maximal model size of the proxy model is the same to the knowledge base, and the minimal
|
133 |
+
model size of the proxy model is 10% of the knowledge base.</li>
|
134 |
+
<li><b>bn_cal_num_iters</b>: BN statstics is unstable during the training of dynamic neural networks (refer to
|
135 |
+
S-Net (ICLR'19)). Therefore, before testing the accuracy of the knowledge base, its BN statstics should be
|
136 |
+
calibrated using several iterations of inference on the test dataset (if the model has any BN layers).</li>
|
137 |
+
<li><b>index_init</b>: how the value of neuron index is initialized. We recommend it is 'zero'.</li>
|
138 |
+
</ul>
|
139 |
+
|
140 |
+
<h3>3.5 Comparison results</h3>
|
141 |
+
<p>The comparsion between EdgeFM and nine baselines in this workload is demonstrated below. This figure is already
|
142 |
+
in the submitted paper.</p>
|
143 |
+
<img style="width: 70%; margin: 0 auto;" src="5.png">
|
144 |
+
|
145 |
+
|
146 |
+
<h2 id="clip">4. Running Example 2: Supporting a Hugging Face FM CLIP</h2>
|
147 |
+
|
148 |
+
<h3>4.1 Settings</h3>
|
149 |
+
<p><b>Models.</b> We use a image classification model based on CLIP from Hugging Face as an example
|
150 |
+
to explain how to connect a Hugging Face FM to the EdgeFM.</p>
|
151 |
+
<p><b>Datasets.</b> We use datasets <a href="https://link.springer.com/chapter/10.1007/978-3-319-46475-6_7">GTA5</a>
|
152 |
+
and <a href="https://supervise.ly">SuperviselyPerson</a> as the source domain, and datasets <a
|
153 |
+
href="https://openaccess.thecvf.com/content_cvpr_2016/html/Cordts_The_Cityscapes_Dataset_CVPR_2016_paper.html">Cityscapes</a>
|
154 |
+
and <a href="https://ieeexplore.ieee.org/abstract/document/6976983">BaiduPerson</a> as the target domain. We convert these semantic segmentation datasets into image classification datasets by cropping and saving the images in the segmentation bounding boxes.</p>
|
155 |
+
|
156 |
+
<h3>4.2 Offline Elastic Proxy Construction</h3>
|
157 |
+
<p>Run the following command sequentially to pre-train the knowledge base and index:</p>
|
158 |
+
<code>
|
159 |
+
python new_impl/cv/clip/cls.py<br>
|
160 |
+
python new_impl/cv/clip/cls_md_wo_fbs.py<br>
|
161 |
+
python new_impl/cv/clip/cls_md_index.py<br>
|
162 |
+
</code>
|
163 |
+
<p>Note that the file path of the model checkpoint in last two files should be modified manually.</p>
|
164 |
+
<p>Run the following command to open TensorBoard and watch the metrics (e.g. losses and accuracy) during the
|
165 |
+
training process:</p>
|
166 |
+
<code>
|
167 |
+
tensorboard --logdir <the file path of tensorboard logs outputed in the terminal>
|
168 |
+
</code>
|
169 |
+
<p>Here are three TensorBoard screenshots when three commands above are running:</p>
|
170 |
+
<img src="clip-index.png">
|
171 |
+
|
172 |
+
<h3>3.3 Online Evolving Input Data Adaptation</h3>
|
173 |
+
<p>Run the following command to evaluate EdgeFM over evolving data:</p>
|
174 |
+
<code>
|
175 |
+
python new_impl/cv/clip/cls_online.py
|
176 |
+
</code>
|
177 |
+
<p>You can also launch TensorBoard to watch the retraining accuracy and time during the retraining process. Here is
|
178 |
+
a screenshot:</p>
|
179 |
+
<img src="clip-online.png">
|
180 |
+
|
181 |
+
<h3>Compared with baseline</h3>
|
182 |
+
<p>Compared to the baseline adaptation method CUA runs alone (colored by blue), EdgeFM (colored by red) improves its accuracy by 15%. When facing drastic shifted domains, we can see that CUA is hard to improve the accuracy by retraining but EdgeFM notably recovers the accuracy because of the distribution-adaptive proxy model.
|
183 |
+
</p>
|
184 |
+
<img style="width: 50%; margin: 0 auto;" src="clip-baseline.png" />
|
185 |
+
|
186 |
+
|
187 |
+
<h2 id="sam">5. Running Example 3: Supporting a user-specified FM SAM (Segment Anything)</h2>
|
188 |
+
|
189 |
+
<h3>5.1 Settings</h3>
|
190 |
+
<p><b>Models.</b> We use the SOTA segmentation foundation model SAM. In this example, we support SAM using our designed standard FM API to explain how to connect a user-specified FM to the EdgeFM.</p>
|
191 |
+
<p><b>Datasets.</b> We use datasets <a href="https://link.springer.com/chapter/10.1007/978-3-319-46475-6_7">GTA5</a>
|
192 |
+
and <a href="https://supervise.ly">SuperviselyPerson</a> as the source domain, and datasets <a
|
193 |
+
href="https://openaccess.thecvf.com/content_cvpr_2016/html/Cordts_The_Cityscapes_Dataset_CVPR_2016_paper.html">Cityscapes</a>
|
194 |
+
and <a href="https://ieeexplore.ieee.org/abstract/document/6976983">BaiduPerson</a> as the target domain.</p>
|
195 |
+
|
196 |
+
<h3>5.2 Offline Elastic Proxy Construction</h3>
|
197 |
+
<p>Run the following command sequentially to pre-train the knowledge base and index:</p>
|
198 |
+
<code>
|
199 |
+
python new_impl/cv/sam/seg.py<br>
|
200 |
+
python new_impl/cv/sam/seg_md_wo_fbs.py<br>
|
201 |
+
python new_impl/cv/sam/seg_md_index.py<br>
|
202 |
+
</code>
|
203 |
+
<p>Note that the file path of the model checkpoint in last two files should be modified manually.</p>
|
204 |
+
<p>Run the following command to open TensorBoard and watch the metrics (e.g. losses and accuracy) during the
|
205 |
+
training process:</p>
|
206 |
+
<code>
|
207 |
+
tensorboard --logdir <the file path of tensorboard logs outputed in the terminal>
|
208 |
+
</code>
|
209 |
+
<p>Here are three TensorBoard screenshots when three commands above are running:</p>
|
210 |
+
<img src="sam-index.png">
|
211 |
+
|
212 |
+
<h3>5.3 Online Evolving Input Data Adaptation</h3>
|
213 |
+
<p>Run the following command to evaluate EdgeFM over evolving data:</p>
|
214 |
+
<code>
|
215 |
+
python new_impl/cv/seg/seg_online.py
|
216 |
+
</code>
|
217 |
+
<p>You can also launch TensorBoard to watch the retraining accuracy and time during the retraining process. Here is
|
218 |
+
a screenshot:</p>
|
219 |
+
<img src="sam-online.png">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
+
<h3>5.4 Compared with baseline</h3>
|
222 |
+
<p>Compared to the baseline adaptation method CUA runs alone (colored by blue), EdgeFM (colored by red) improves its accuracy by 8%. When facing drastic shifted domains, we can see that CUA is hard to improve the accuracy by retraining but EdgeFM notably recovers the accuracy because of the distribution-adaptive proxy model.
|
223 |
+
</p>
|
224 |
+
<img style="width: 50%; margin: 0 auto;" src="sam-baseline.png" />
|
225 |
+
|
226 |
+
|
227 |
+
<h2 id="implementation">6. Implementation (Development API Documentation)</h2>
|
228 |
+
<p>EdgeFM is implemented in Python with 8k LOCs and it is currently targeted for transformers running on commodity
|
229 |
+
edge devices and Linux environment. Its scaling and retraining of transformers are implemented based on timm 0.9.1
|
230 |
+
and transformers 4.30.2. Its scheduler is built upon the optimization problem solver in scikit-opt 0.6.6 and
|
231 |
+
resource management systems in Docker 10.03.6 and K3s 1.18.12.</p>
|
232 |
+
<p>Figure below illustrates the three steps of running a FM using EdgeFM. To facilitate the integration of a model,
|
233 |
+
EdgeFM decouples the integration of a model (step 1) from its offline construction of knowledge base and neuron
|
234 |
+
index (step 2) and online scaling and retraining of FM (step 3). This system design allows users only need to
|
235 |
+
implement the FM API at step 1 to integrate a model. Specifically, EdgeFM supports two types of models.</p>
|
236 |
+
<img src="Implementation.png">
|
237 |
+
<p><b>Hugging Face FMs.</b> We implement EdgeFM to support FM APIs in the Hugging Face AI community. Using the
|
238 |
+
AutoModel as example, EdgeFM calls function AutoModel.from_pretrained() to initialize a FM and calls function
|
239 |
+
AutoModel.forward() to perform a forward operation. EdgeFM allows users to run a Hugging Face's FM using 30 about
|
240 |
+
LOCs.</p>
|
241 |
+
<p><b>User-specified FMs.</b> EdgeFM designs a standard FM API (colored by green in the figure) to unify user
|
242 |
+
specified FM implementations. This API mainly defines: (i) how the FM performs an inference using the given
|
243 |
+
sample; (ii) how the accuracy of the FM is measured using the given test dataset; (iii) how to manipulate (e.g.
|
244 |
+
compress/update/remove) a specific layer in the FM. For each FM, this API can be implemented using about 200 LOCs.
|
245 |
+
</p>
|
246 |
+
|
247 |
+
<h3 id="hugging-face-model">6.1 Supporting a Hugging Face FM</h3>
|
248 |
+
|
249 |
+
<p>Supporting a Hugging Face Model is a simplification of supporting a user-specified model, because Hugging Face
|
250 |
+
FMs have many consistent implementation style so repetitive implementation work can be saved. The user can only
|
251 |
+
implement the following several simple functions:</p>
|
252 |
+
|
253 |
+
<ul>
|
254 |
+
|
255 |
+
<li>
|
256 |
+
<code class="inline-code">def get_feature_hook(self)</code>
|
257 |
+
<ul>
|
258 |
+
<li>Get the PyTorch hook attached before the layer that extracts the key features.</li>
|
259 |
+
<li>
|
260 |
+
<b>Output:</b> A PyTorch hook.
|
261 |
+
</li>
|
262 |
+
</ul>
|
263 |
+
</li>
|
264 |
+
|
265 |
+
<li>
|
266 |
+
<code class="inline-code">def get_task_head_params(self)</code>
|
267 |
+
<ul>
|
268 |
+
<li>Get the model parameters of the task head of the FM.</li>
|
269 |
+
<li>
|
270 |
+
<b>Output:</b> The model parameters of the task head of the FM.
|
271 |
+
</li>
|
272 |
+
</ul>
|
273 |
+
</li>
|
274 |
+
|
275 |
+
<li>
|
276 |
+
<code class="inline-code">def get_qkv_proj_ff1_ff2_layer_names(self)</code>
|
277 |
+
<ul>
|
278 |
+
<li>Get a list of names, and each element in the list is a list that contains the names of Q/K/V layer, QKV
|
279 |
+
projection layer, Feed Forward Layer 1, Feed Forward Layer 2. For example, for Hugging Face's BERT, this
|
280 |
+
function should return [['bert.encoder.layer.0.attention.self.query',
|
281 |
+
'bert.encoder.layer.0.attention.self.key', 'bert.encoder.layer.0.attention.self.value',
|
282 |
+
'bert.encoder.layer.0.attention.output.dense', 'bert.encoder.layer.0.intermediate.dense',
|
283 |
+
'bert.encoder.layer.0.output.dense'], ['bert.encoder.layer.1.attention.self.query',
|
284 |
+
'bert.encoder.layer.1.attention.self.key', 'bert.encoder.layer.1.attention.self.value',
|
285 |
+
'bert.encoder.layer.1.attention.output.dense', 'bert.encoder.layer.1.intermediate.dense',
|
286 |
+
'bert.encoder.layer.1.output.dense'], ...]</li>
|
287 |
+
<li>
|
288 |
+
<b>Output:</b> A list of names.
|
289 |
+
</li>
|
290 |
+
</ul>
|
291 |
+
</li>
|
292 |
+
|
293 |
+
<li>
|
294 |
+
<code class="inline-code">def get_accuracy(self, test_loader)</code>
|
295 |
+
<ul>
|
296 |
+
<li>Measure the accuracy of the FM using the given test data loader.</li>
|
297 |
+
<li>
|
298 |
+
<b>Inputs:</b>
|
299 |
+
<ul>
|
300 |
+
<li>test_loader: A given test dataloader.</li>
|
301 |
+
</ul>
|
302 |
+
</li>
|
303 |
+
<li>
|
304 |
+
<b>Output:</b> The measured accuracy.
|
305 |
+
</li>
|
306 |
+
</ul>
|
307 |
+
</li>
|
308 |
+
|
309 |
+
</ul>
|
310 |
+
|
311 |
+
<h3 id="user-specified-model">6.2 Supporting a user-specified FM</h3>
|
312 |
+
<p>The user should implement the following functions in the standard FM API.</p>
|
313 |
+
<ul>
|
314 |
+
|
315 |
+
<li>
|
316 |
+
<code class="inline-code">def forward(self, x, *args, **kwargs)</code>
|
317 |
+
<ul>
|
318 |
+
<li>Let the FM perform a forward inference operation using the given sample x.</li>
|
319 |
+
<li>
|
320 |
+
<b>Inputs:</b>
|
321 |
+
<ul>
|
322 |
+
<li>x: A given sample.</li>
|
323 |
+
<li>*args and **kwargs: Possible additional arguments used in the inference.</li>
|
324 |
+
</ul>
|
325 |
+
</li>
|
326 |
+
<li>
|
327 |
+
<b>Output:</b> The inference results.
|
328 |
+
</li>
|
329 |
+
</ul>
|
330 |
+
</li>
|
331 |
+
|
332 |
+
<li>
|
333 |
+
<code class="inline-code">def get_accuracy(self, test_loader)</code>
|
334 |
+
<ul>
|
335 |
+
<li>Measure the accuracy of the FM using the given test data loader.</li>
|
336 |
+
<li>
|
337 |
+
<b>Inputs:</b>
|
338 |
+
<ul>
|
339 |
+
<li>test_loader: A given test dataloader.</li>
|
340 |
+
</ul>
|
341 |
+
</li>
|
342 |
+
<li>
|
343 |
+
<b>Output:</b> The measured accuracy.
|
344 |
+
</li>
|
345 |
+
</ul>
|
346 |
+
</li>
|
347 |
+
|
348 |
+
<li>
|
349 |
+
<code class="inline-code">def forward_to_get_task_loss(self, x, y *args, **kwargs)</code>
|
350 |
+
<ul>
|
351 |
+
<li>Let the FM perform a forward operation using the given sample x, and calculate and return the task loss.
|
352 |
+
</li>
|
353 |
+
<li>
|
354 |
+
<b>Inputs:</b>
|
355 |
+
<ul>
|
356 |
+
<li>x: A given sample.</li>
|
357 |
+
<li>y: The corresponding label of x.</li>
|
358 |
+
<li>*args and **kwargs: Possible additional arguments used in the inference.</li>
|
359 |
+
</ul>
|
360 |
+
</li>
|
361 |
+
<li>
|
362 |
+
<b>Output:</b> The calculated task loss.
|
363 |
+
</li>
|
364 |
+
</ul>
|
365 |
+
</li>
|
366 |
+
|
367 |
+
<li>
|
368 |
+
<code class="inline-code">def get_feature_hook(self)</code>
|
369 |
+
<ul>
|
370 |
+
<li>Get the PyTorch hook attached before the layer that extracts the key features.</li>
|
371 |
+
<li>
|
372 |
+
<b>Output:</b> A PyTorch hook.
|
373 |
+
</li>
|
374 |
+
</ul>
|
375 |
+
</li>
|
376 |
+
|
377 |
+
<li>
|
378 |
+
<code class="inline-code">def get_task_head_params(self)</code>
|
379 |
+
<ul>
|
380 |
+
<li>Get the model parameters of the task head of the FM.</li>
|
381 |
+
<li>
|
382 |
+
<b>Output:</b> The model parameters of the task head of the FM.
|
383 |
+
</li>
|
384 |
+
</ul>
|
385 |
+
</li>
|
386 |
+
|
387 |
+
<li>
|
388 |
+
<code class="inline-code">def add_lora_ab_to_fm(self, ab_r: int, samples: torch.Tensor)</code>
|
389 |
+
<ul>
|
390 |
+
<li>Add a LoRA matrix to each attention layer in the FM. The user should check if the FM's output is changed
|
391 |
+
before and after the LoRA is added into the FM.</li>
|
392 |
+
<li>
|
393 |
+
<b>Inputs:</b>
|
394 |
+
<ul>
|
395 |
+
<li>ab_r: the factor r in LoRA.</li>
|
396 |
+
<li>samples: A given sample for sanity check.</li>
|
397 |
+
</ul>
|
398 |
+
</li>
|
399 |
+
<li>
|
400 |
+
<b>Output:</b> A PyTorch hook.
|
401 |
+
</li>
|
402 |
+
</ul>
|
403 |
+
</li>
|
404 |
+
|
405 |
+
<li>
|
406 |
+
<code class="inline-code">def fuse_lora_and_recover_net_structure(self, samples: torch.Tensor)</code>
|
407 |
+
<ul>
|
408 |
+
<li>Fuse the added LoRA matrix into the corresponding attention layer in the FM, and recover the network
|
409 |
+
structure to the original. This is invoked after the LoRA fine tuning.</li>
|
410 |
+
<li>
|
411 |
+
<b>Inputs:</b>
|
412 |
+
<ul>
|
413 |
+
<li>samples: A given sample for sanity check.</li>
|
414 |
+
</ul>
|
415 |
+
</li>
|
416 |
+
<li>
|
417 |
+
<b>Output:</b> A PyTorch hook.
|
418 |
+
</li>
|
419 |
+
</ul>
|
420 |
+
</li>
|
421 |
+
|
422 |
+
<li>
|
423 |
+
<code class="inline-code">def is_q_or_k_v_linear(self, layer_name: nn.Module)</code>
|
424 |
+
<ul>
|
425 |
+
<li>Check if the given layer is a Q/K/V Linear in the FM.</li>
|
426 |
+
<li>
|
427 |
+
<b>Inputs:</b>
|
428 |
+
<ul>
|
429 |
+
<li>layer_name: The name of a layer in the FM.</li>
|
430 |
+
</ul>
|
431 |
+
</li>
|
432 |
+
<li>
|
433 |
+
<b>Output:</b> Return True if the given layer is a Q/K/V Linear in the FM.
|
434 |
+
</li>
|
435 |
+
</ul>
|
436 |
+
</li>
|
437 |
+
|
438 |
+
<li>
|
439 |
+
<code class="inline-code">def is_feed_forward(self, layer_name: nn.Module)</code>
|
440 |
+
<ul>
|
441 |
+
<li>Check if the given layer is a feed forward layer in the FM.</li>
|
442 |
+
<li>
|
443 |
+
<b>Inputs:</b>
|
444 |
+
<ul>
|
445 |
+
<li>layer_name: The name of a layer in the FM.</li>
|
446 |
+
</ul>
|
447 |
+
</li>
|
448 |
+
<li>
|
449 |
+
<b>Output:</b> Return True if the given layer is a feed forward layer in the FM.
|
450 |
+
</li>
|
451 |
+
</ul>
|
452 |
+
</li>
|
453 |
+
|
454 |
+
<li>
|
455 |
+
<code
|
456 |
+
class="inline-code">def prune_an_attention_layer(self, attention_layer_name, sparsity: float, samples: torch.Tensor)</code>
|
457 |
+
<ul>
|
458 |
+
<li>Pruning an attention layer.</li>
|
459 |
+
<li>
|
460 |
+
<b>Inputs:</b>
|
461 |
+
<ul>
|
462 |
+
<li>attention_layer_name: The name of target attention layer.</li>
|
463 |
+
<li>sparsity: The pruning strength.</li>
|
464 |
+
<li>samples: A given sample.</li>
|
465 |
+
</ul>
|
466 |
+
</li>
|
467 |
+
</ul>
|
468 |
+
</li>
|
469 |
+
|
470 |
+
<li>
|
471 |
+
<code
|
472 |
+
class="inline-code">def prune_an_feed_forward_layer(self, feed_forward_layer_name, sparsity: float, samples: torch.Tensor)</code>
|
473 |
+
<ul>
|
474 |
+
<li>Pruning an feed forward layer.</li>
|
475 |
+
<li>
|
476 |
+
<b>Inputs:</b>
|
477 |
+
<ul>
|
478 |
+
<li>feed_forward_layer_name: The name of target feed forward layer.</li>
|
479 |
+
<li>sparsity: The pruning strength.</li>
|
480 |
+
<li>samples: A given sample.</li>
|
481 |
+
</ul>
|
482 |
+
</li>
|
483 |
+
</ul>
|
484 |
+
</li>
|
485 |
+
|
486 |
+
</ul>
|
487 |
+
|
488 |
+
|
489 |
+
|
490 |
+
</div>
|
491 |
+
</body>
|
492 |
|
493 |
+
</html>
|
|
|
|
sam-baseline.png
ADDED
sam-index.png
ADDED
sam-online.png
ADDED
style.css
CHANGED
@@ -81,6 +81,7 @@ code {
|
|
81 |
ul {
|
82 |
margin-top: 5px;
|
83 |
margin-bottom: 10px;
|
|
|
84 |
color: rgb(85, 85, 85);
|
85 |
font-size: 18px;
|
86 |
}
|
|
|
81 |
ul {
|
82 |
margin-top: 5px;
|
83 |
margin-bottom: 10px;
|
84 |
+
margin-left: 1rem;
|
85 |
color: rgb(85, 85, 85);
|
86 |
font-size: 18px;
|
87 |
}
|