alessandro trinca tornidor commited on
Commit
acbbf71
1 Parent(s): 4d19eb4

[test] update inference function to return also output mask, useful for tests (now on saturncloud test.ipynb notebook)

Browse files
README.md CHANGED
@@ -7,7 +7,40 @@ sdk: docker
7
  pinned: false
8
  ---
9
 
10
- (Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  [![Gradio](https://img.shields.io/badge/Gradio-Online%20Demo-blue)](http://103.170.5.190:7860/)
13
  [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/app-center/openxlab_app.svg)](https://openxlab.org.cn/apps/detail/openxlab-app/LISA)
 
7
  pinned: false
8
  ---
9
 
10
+ # exec jupyter on the remote server with port forwarding on localhost
11
+
12
+ 1. checkout repo, install venv with jupyter
13
+ 2. port forwarding in localhost wiht private key: `ssh -i ~/.ssh/id_ecdsa_saturncloud trincuz@ssh.community.saturnenterprise.io -L 8889:localhost:8889 -N -f`
14
+ 3. start the jupyter-lab server
15
+ 4. connect to page in localhost
16
+
17
+ ## Commands to work on saturncloud after clone and git lfs install
18
+ ```bash
19
+ cd ~/workspace/lisa-on-gpu/
20
+ rm -rf lisa_venv
21
+ python3 -m venv lisa_venv
22
+ ln -s lisa_venv/ venv
23
+ source venv/bin/activate
24
+ pip --version
25
+ which python
26
+ python -m pip install pip wheel --upgrade
27
+ python -m pip install pytest pytest-cov jupyterlab
28
+ python -m pip install -r requirements.txt
29
+ nohup jupyter-lab &
30
+ tail -F nohup.out
31
+ ```
32
+
33
+ # Jupyterlab Howto
34
+
35
+ To run the `test.ipynb` notebook you should already:
36
+ - cloned project https://huggingface.co/spaces/aletrn/lisa-on-gpu with active git lfs
37
+ - created and activated a virtualenv
38
+ - installed jupyterlab dependencies from requirements_jupyter.txt
39
+ - installed dependencies from requirements.txt
40
+
41
+ ## Hardware requirements
42
+ - an nvidia gpu with 10 or 12GB of memory (a T4 should suffice)
43
+ - at least 16GB of system ram
44
 
45
  [![Gradio](https://img.shields.io/badge/Gradio-Online%20Demo-blue)](http://103.170.5.190:7860/)
46
  [![Open in OpenXLab](https://cdn-static.openxlab.org.cn/app-center/openxlab_app.svg)](https://openxlab.org.cn/apps/detail/openxlab-app/LISA)
notebooks/test.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements_jupyter.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ jupyterlab
2
+ ipywidgets
3
+ pytest
4
+ pytest-cov
tests/__init__.py ADDED
File without changes
tests/imgs/example1_mask_0.png ADDED

Git LFS Details

  • SHA256: aab6b5e031486029e331f6d5e30acad41b0fdd73cc544e7c241dc9966932b2ca
  • Pointer size: 129 Bytes
  • Size of remote file: 6.29 kB
tests/test_app_helpers.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import unittest
3
+
4
+
5
+ class TestAppBuilders(unittest.TestCase):
6
+
7
+ def test_default_creation(self):
8
+ from utils import utils
9
+
10
+ placeholders = utils.create_placeholder_variables()
11
+ self.assertIsInstance(placeholders, dict)
12
+ assert placeholders["no_seg_out"].shape == (512, 512, 3)
13
+ assert placeholders["error_happened"].shape == (512, 512, 3)
14
+
15
+ def test_parse_args(self):
16
+ from utils import app_helpers
17
+
18
+ test_args_parse = app_helpers.parse_args([])
19
+ assert vars(test_args_parse) == {
20
+ 'version': 'xinlai/LISA-13B-llama2-v1-explanatory',
21
+ 'vis_save_path': './vis_output',
22
+ 'precision': 'fp16',
23
+ 'image_size': 1024,
24
+ 'model_max_length': 512,
25
+ 'lora_r': 8,
26
+ 'vision_tower': 'openai/clip-vit-large-patch14',
27
+ 'local_rank': 0,
28
+ 'load_in_8bit': False,
29
+ 'load_in_4bit': True,
30
+ 'use_mm_start_end': True,
31
+ 'conv_type': 'llava_v1'
32
+ }
33
+
34
+ def test_inference(self):
35
+ import cv2
36
+ import numpy as np
37
+ from utils import app_helpers, constants, utils
38
+
39
+ max_diff = 0.02
40
+
41
+ logging.info("starting...")
42
+ logging.warning("Remember: before running again 'get_inference_model_by_args(test_args_parse)' free some memory")
43
+ test_args_parse = app_helpers.parse_args([])
44
+ inference_fn = app_helpers.get_inference_model_by_args(test_args_parse)
45
+ idx_example = 0
46
+ input_prompt, input_image_path = constants.examples[idx_example]
47
+ logging.info("running inference function with input prompt '{}'.".format(input_prompt))
48
+ _, output_mask, output_str = inference_fn(
49
+ input_prompt,
50
+ utils.ROOT / input_image_path
51
+ )
52
+ logging.info(f"output_str: {output_str}.")
53
+ expected_mask = cv2.imread(
54
+ str(utils.ROOT / "tests" / "imgs" / f"example{idx_example}_mask_0.png"),
55
+ cv2.IMREAD_GRAYSCALE
56
+ )
57
+
58
+ tot = output_mask.size
59
+ count = np.sum(output_mask != expected_mask)
60
+ perc = 100 * count / tot
61
+
62
+ logging.info(f"diff 1 vs 1b: {perc:.2f}!")
63
+ try:
64
+ assert np.array_equal(output_mask, expected_mask)
65
+ except AssertionError:
66
+ try:
67
+ logging.error("failed equality assertion!")
68
+ logging.info(f"assert now that perc diff between ndarrays is minor than {max_diff}.")
69
+ assert perc < max_diff
70
+ except AssertionError as ae:
71
+ logging.error("failed all assertions, writing debug files...")
72
+ import datetime
73
+ now_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
74
+ output_folder = utils.ROOT / "tests" / "imgs"
75
+ prefix = f"broken_test_example{idx_example + 1}_{now_str}"
76
+ cv2.imwrite(
77
+ str(output_folder / f"{prefix}.png"),
78
+ output_mask
79
+ )
80
+ with open(output_folder / f"{prefix}__input_prompt.txt",
81
+ "w") as dst:
82
+ dst.write(input_prompt)
83
+ with open(output_folder / f"{prefix}__output_str.txt",
84
+ "w") as dst:
85
+ dst.write(output_str)
86
+ logging.info(f"Written files with prefix '{prefix}' in {output_folder} folder.")
87
+ raise ae
88
+ logging.info("end")
utils/app_helpers.py CHANGED
@@ -17,7 +17,6 @@ from model.llava import conversation as conversation_lib
17
  from model.llava.mm_utils import tokenizer_image_token
18
  from model.segment_anything.utils.transforms import ResizeLongestSide
19
 
20
-
21
  placeholders = utils.create_placeholder_variables()
22
 
23
 
@@ -96,10 +95,10 @@ def set_image_precision_by_args(input_image, precision):
96
 
97
  @session_logger.set_uuid_logging
98
  def preprocess(
99
- x,
100
- pixel_mean=torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1),
101
- pixel_std=torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1),
102
- img_size=1024,
103
  ) -> torch.Tensor:
104
  """Normalize pixel values and pad to a square input."""
105
  logging.info("preprocess started")
@@ -161,7 +160,8 @@ def get_model(args_to_parse):
161
  }
162
  )
163
  _model = LISAForCausalLM.from_pretrained(
164
- args_to_parse.version, low_cpu_mem_usage=True, vision_tower=args_to_parse.vision_tower, seg_token_idx=args_to_parse.seg_token_idx, **kwargs
 
165
  )
166
  _model.config.eos_token_id = _tokenizer.eos_token_id
167
  _model.config.bos_token_id = _tokenizer.bos_token_id
@@ -207,7 +207,6 @@ def get_inference_model_by_args(args_to_parse):
207
  @session_logger.set_uuid_logging
208
  def inference(input_str, input_image_pathname):
209
  ## filter out special chars
210
-
211
  input_str = get_cleaned_input(input_str)
212
  logging.info(f"input_str type: {type(input_str)}, input_image type: {type(input_image_pathname)}.")
213
  logging.info(f"input_str: {input_str}, input_image: {type(input_image_pathname)}.")
@@ -225,7 +224,7 @@ def get_inference_model_by_args(args_to_parse):
225
  prompt = utils.DEFAULT_IMAGE_TOKEN + "\n" + prompt
226
  if args_to_parse.use_mm_start_end:
227
  replace_token = (
228
- utils.DEFAULT_IM_START_TOKEN + utils.DEFAULT_IMAGE_TOKEN + utils.DEFAULT_IM_END_TOKEN
229
  )
230
  prompt = prompt.replace(utils.DEFAULT_IMAGE_TOKEN, replace_token)
231
 
@@ -276,25 +275,28 @@ def get_inference_model_by_args(args_to_parse):
276
  text_output = text_output.replace("\n", "").replace(" ", " ")
277
  text_output = text_output.split("ASSISTANT: ")[-1]
278
 
279
- logging.info(f"text_output type: {type(text_output)}, text_output: {text_output}.")
280
- save_img = None
 
 
 
 
281
  for i, pred_mask in enumerate(pred_masks):
282
- if pred_mask.shape[0] == 0:
283
  continue
284
-
285
  pred_mask = pred_mask.detach().cpu().numpy()[0]
286
- pred_mask = pred_mask > 0
 
287
 
288
- save_img = image_np.copy()
289
- save_img[pred_mask] = (
290
  image_np * 0.5
291
- + pred_mask[:, :, None].astype(np.uint8) * np.array([255, 0, 0]) * 0.5
292
- )[pred_mask]
293
 
294
- output_str = f"ASSISTANT: {text_output}"
295
- output_image = no_seg_out if save_img is None else save_img
296
- logging.info(f"output_image type: {type(output_image)}.")
297
- return output_image, output_str
298
 
299
  logging.info("prepared inference function!")
300
  return inference
@@ -303,7 +305,7 @@ def get_inference_model_by_args(args_to_parse):
303
  @session_logger.set_uuid_logging
304
  def get_gradio_interface(
305
  fn_inference: Callable
306
- ):
307
  return gr.Interface(
308
  fn_inference,
309
  inputs=[
@@ -311,7 +313,8 @@ def get_gradio_interface(
311
  gr.Image(type="filepath", label="Input Image")
312
  ],
313
  outputs=[
314
- gr.Image(type="pil", label="Segmentation Output"),
 
315
  gr.Textbox(lines=1, placeholder=None, label="Text Output")
316
  ],
317
  title=constants.title,
 
17
  from model.llava.mm_utils import tokenizer_image_token
18
  from model.segment_anything.utils.transforms import ResizeLongestSide
19
 
 
20
  placeholders = utils.create_placeholder_variables()
21
 
22
 
 
95
 
96
  @session_logger.set_uuid_logging
97
  def preprocess(
98
+ x,
99
+ pixel_mean=torch.Tensor([123.675, 116.28, 103.53]).view(-1, 1, 1),
100
+ pixel_std=torch.Tensor([58.395, 57.12, 57.375]).view(-1, 1, 1),
101
+ img_size=1024,
102
  ) -> torch.Tensor:
103
  """Normalize pixel values and pad to a square input."""
104
  logging.info("preprocess started")
 
160
  }
161
  )
162
  _model = LISAForCausalLM.from_pretrained(
163
+ args_to_parse.version, low_cpu_mem_usage=True, vision_tower=args_to_parse.vision_tower,
164
+ seg_token_idx=args_to_parse.seg_token_idx, **kwargs
165
  )
166
  _model.config.eos_token_id = _tokenizer.eos_token_id
167
  _model.config.bos_token_id = _tokenizer.bos_token_id
 
207
  @session_logger.set_uuid_logging
208
  def inference(input_str, input_image_pathname):
209
  ## filter out special chars
 
210
  input_str = get_cleaned_input(input_str)
211
  logging.info(f"input_str type: {type(input_str)}, input_image type: {type(input_image_pathname)}.")
212
  logging.info(f"input_str: {input_str}, input_image: {type(input_image_pathname)}.")
 
224
  prompt = utils.DEFAULT_IMAGE_TOKEN + "\n" + prompt
225
  if args_to_parse.use_mm_start_end:
226
  replace_token = (
227
+ utils.DEFAULT_IM_START_TOKEN + utils.DEFAULT_IMAGE_TOKEN + utils.DEFAULT_IM_END_TOKEN
228
  )
229
  prompt = prompt.replace(utils.DEFAULT_IMAGE_TOKEN, replace_token)
230
 
 
275
  text_output = text_output.replace("\n", "").replace(" ", " ")
276
  text_output = text_output.split("ASSISTANT: ")[-1]
277
 
278
+ logging.info(
279
+ f"found n {len(pred_masks)} prediction masks, "
280
+ f"text_output type: {type(text_output)}, text_output: {text_output}."
281
+ )
282
+ output_image = no_seg_out
283
+ output_mask = no_seg_out
284
  for i, pred_mask in enumerate(pred_masks):
285
+ if pred_mask.shape[0] == 0 or pred_mask.shape[1] == 0:
286
  continue
 
287
  pred_mask = pred_mask.detach().cpu().numpy()[0]
288
+ pred_mask_bool = pred_mask > 0
289
+ output_mask = pred_mask_bool.astype(np.uint8) * 255
290
 
291
+ output_image = image_np.copy()
292
+ output_image[pred_mask_bool] = (
293
  image_np * 0.5
294
+ + pred_mask_bool[:, :, None].astype(np.uint8) * np.array([255, 0, 0]) * 0.5
295
+ )[pred_mask_bool]
296
 
297
+ output_str = f"ASSISTANT: {text_output} ..."
298
+ logging.info(f"output_image type: {type(output_mask)}.")
299
+ return output_image, output_mask, output_str
 
300
 
301
  logging.info("prepared inference function!")
302
  return inference
 
305
  @session_logger.set_uuid_logging
306
  def get_gradio_interface(
307
  fn_inference: Callable
308
+ ):
309
  return gr.Interface(
310
  fn_inference,
311
  inputs=[
 
313
  gr.Image(type="filepath", label="Input Image")
314
  ],
315
  outputs=[
316
+ gr.Image(type="pil", label="segmentation Output"),
317
+ gr.Image(type="pil", label="mask Output"),
318
  gr.Textbox(lines=1, placeholder=None, label="Text Output")
319
  ],
320
  title=constants.title,